from IPython.display import display, HTML
from datetime import datetime
import networkx as nx
import numpy as np
import os
import requests
import seaborn as sns
from retentioneering.analysis.utils import _check_folder
from retentioneering.utils.export import export_tracks
def _save_graph(graph, graph_name, settings, plot_name=None):
settings = _check_folder(settings)
export_folder = settings['export_folder']
if not plot_name:
plot_name = datetime.now().strftime('%Y-%m-%dT%H_%M_%S_%f')
export_filename = os.path.join(export_folder, '{}_{}.png'.format(graph_name, plot_name))
if isinstance(graph, sns.mpl.axes.Axes):
graph = graph.get_figure()
graph.savefig(export_filename)
[docs]def plot_graph(df_agg, agg_type, settings, layout=nx.random_layout, save=True, figsize=(20, 10), plot_name=None):
"""
Visualize trajectories from aggregated tables (with python)
:param df_agg: table with aggregates (from retentioneering.analysis.get_all_agg function)
:param agg_type: name of col for weighting graph nodes (column name from df)
:param settings: experiment config (can be empty dict here)
:param layout: function that return dictionary of positions keyed by node for NetworkX graph
:param save: True if the graph should be saved
:param figsize: width, height in inches. If not provided, defaults to rcParams["figure.figsize"] = [6.4, 4.8]
:param plot_name: name of file with graph plot
:type df_agg: pd.DataFrame
:type agg_type: str
:type settings: dict
:type layout: func
:type save: bool
:type figsize: tuple
:type plot_name: str
:return: None
"""
edges = df_agg.loc[:, ['event_name', 'next_event', agg_type]]
G = nx.DiGraph()
G.add_weighted_edges_from(edges.values)
width = [G.get_edge_data(i, j)['weight'] for i, j in G.edges()]
width = np.array(width)
if len(np.unique(width)) != 1:
width = (width - width.min()) / (np.mean(width) - width.min())
width *= 2
width = np.where(width > 15, 15, width)
width = np.where(width < 2, 2, width)
else:
width = width * 3 / max(width)
pos = layout(G)
f = sns.mpl.pyplot.figure(figsize=figsize)
nx.draw_networkx_edges(G, pos, edge_color='b', alpha=0.2, width=width)
nx.draw_networkx_nodes(G, pos, node_color='b', alpha=0.3)
pos = {k: [pos[k][0], pos[k][1] + 0.03] for k in pos.keys()}
nx.draw_networkx_labels(G, pos, node_color='b', font_size=16)
sns.mpl.pyplot.axis('off')
if save:
_save_graph(f, 'graphvis', settings, plot_name)
[docs]def plot_graph_api(df, settings, users='all', task='lost', order='all', threshold=0.5,
start_event=None, end_event=None):
"""
Visualize trajectories from event clickstream (with Mathematica)
:param df: data from BQ or your own (clickstream). Should have at least three columns: `event_name`,
`event_timestamp` and `user_pseudo_id`
:param settings: experiment config (can be empty dict here)
:param users: `all` or list of user ids to plot specific group
:param task: type of task for different visualization (can be `lost` or `prunned_welcome`)
:param order: depth in sessions for filtering
:param threshold: threshold for session splitting
:param start_event: name of start event in trajectory
:param end_event: name of last event in trajectory
:param df: pd.DataFrame
:param settings: dict
:param users: str or list
:param task: str
:param order: int
:param threshold: float
:param start_event: str
:param end_event: str
:return: None
"""
export_folder, graph_name, set_name = export_tracks(df, settings, users, task, order, threshold,
start_event, end_event)
_api_plot(export_folder, graph_name, set_name, plot_type=task)
path = os.path.join(export_folder, 'graph_plot.pdf')
display(HTML("<a href='{href}'> {href} </a>".format(href=path)))
# try:
# img = WImage(filename=path)
# return img
# except:
print("Please check on path behind")
def _api_plot(export_folder, graph_name, set_name, plot_type='lost', download_path=None):
if not download_path:
download_path = export_folder
url = 'http://35.230.23.217:5001/'
files = {
'graph': ('graph.csv', open(os.path.join(export_folder, graph_name), 'rb'), 'multipart/form-data'),
'settings': ('settings.json', open(os.path.join(export_folder, set_name), 'rb'), 'multipart/form-data')}
r = requests.post(url, files=files, headers={'plot_type': plot_type}, auth=('admin', 'admin'))
if r.content == 'File was not proceed':
print("Can't plot graph for this data")
else:
with open(os.path.join(download_path, 'graph_plot.pdf'), 'wb') as f:
f.write(r.content)
[docs]def bars(x, y, settings=dict(), figsize=(8, 5), save=True, plot_name=None):
"""
Plot bar graph
:param x: bars names
:param y: bars values
:param settings: experiment config (can be empty dict here)
:param figsize: width, height in inches. If not provided, defaults to rcParams["figure.figsize"] = [6.4, 4.8]
:param save: True if the graph should be saved
:param plot_name: name of file with graph plot
:type x: list
:type y: list
:type settings: dict
:type figsize: tuple
:type save: bool
:type plot_name: str
:return: None
"""
sns.mpl.pyplot.figure(figsize=figsize)
bar = sns.barplot(x, y, palette='YlGnBu')
bar.set_xticklabels(bar.get_xticklabels(), rotation=90)
if save:
_save_graph(bar, 'bar', settings, plot_name)
[docs]def heatmap(x, labels=None, settings=dict(), figsize=(10, 15), save=True, plot_name=None):
"""
Plot heatmap graph
:param x: data to visualize
:param labels: list of labels for x ticks
:param settings: experiment config (can be empty dict here)
:param figsize: width, height in inches. If not provided, defaults to rcParams["figure.figsize"] = [6.4, 4.8]
:param save: True if the graph should be saved
:param plot_name: name of file with graph plot
:type x: list[list]
:type labels: str
:type settings: dict
:type figsize: tuple
:type save: bool
:type plot_name: str
:return: None
"""
sns.mpl.pyplot.figure(figsize=figsize)
heatmap = sns.heatmap(x, cmap="YlGnBu")
if labels is not None:
heatmap.set_xticklabels(labels, rotation=90)
if save:
_save_graph(heatmap, 'countmap', settings, plot_name)
[docs]def cluster_stats(data, labels=None, settings=dict(), plot_count=2, figsize=(10, 5), save=True, plot_name=None):
"""
Plot pie chart with different events
:param data: list of lists with size of each group
:param labels: list of labels for each group
:param settings: experiment config (can be empty dict here)
:param plot_count: number of plots to visualize
:param figsize: width, height in inches. If not provided, defaults to rcParams["figure.figsize"] = [6.4, 4.8]
:param save: True if the graph should be saved
:param plot_name: name of file with graph plot
:type data: list
:type labels: list or tuple
:type settings: dict
:type plot_count: int
:type figsize: tuple
:type save: bool
:type plot_name: str
:return: None
"""
if plot_count > len(data):
plot_count = len(data)
fig, ax = sns.mpl.pyplot.subplots(1 if plot_count <= 2 else ((plot_count - 1) // 2 + 1), (plot_count > 1) + 1)
fig.set_size_inches(*figsize)
i = 0
for i, group_sizes in enumerate(data[:plot_count]):
if plot_count == 1:
cur_ax = ax
elif plot_count == 2:
cur_ax = ax[i]
else:
cur_ax = ax[i // 2][i % 2]
cur_ax.pie(group_sizes, labels=labels, autopct='%1.1f%%')
cur_ax.set_title('Class {}'.format(i))
if plot_count > 1 and i % 2 != 1:
fig.delaxes(ax[i // 2, 1])
if save:
_save_graph(fig, 'clusters', settings, plot_name)