diff --git a/npf/graph.py b/npf/graph.py new file mode 100644 index 0000000..85edbc8 --- /dev/null +++ b/npf/graph.py @@ -0,0 +1,77 @@ +from npf.types.dataset import convert_to_xyeb +from npf.variable import is_numeric, get_numeric +import numpy as np + +class Graph: + """ + This is a structure holder for data to build a graph + """ + def __init__(self, grapher:'Grapher'): + self.grapher = grapher + self.subtitle = None + self.data_types = None + + def statics(self): + return dict([(var,list(values)[0]) for var,values in self.vars_values.items() if len(values) == 1]) + + def dyns(self): + return [var for var,values in self.vars_values.items() if len(values) > 1] + + #Convert the series into the XYEB format (see types.dataset) + def dataset(self, kind=None): + if not self.data_types: + + self.data_types = convert_to_xyeb( + datasets = self.series, + run_list = self.vars_all, + key = self.key, + max_series=self.grapher.config('graph_max_series'), + do_x_sort=self.do_sort, + series_sort=self.grapher.config('graph_series_sort'), + options=self.grapher.options, + statics=self.statics(), + y_group=self.grapher.configdict('graph_y_group'), + color=[get_numeric(v) for v in self.grapher.configlist('graph_color')], + kind=kind + ) + + return self.data_types + + # Divide all series by the first one, making a percentage of difference + @staticmethod + def series_prop(series, prop, exclusions = []): + if len(series) == 1: + raise Exception("Cannot make proportional series with only one serie !") + newseries = [] + if not is_numeric(prop): + prop=1 + if len(series[0]) < 3: + raise Exception("Malformed serie !") + base_results=series[0][2] + for i, (script, build, all_results) in enumerate(series[1:]): + new_results={} + for run,run_results in all_results.items(): + if not run in base_results: + print(run,"FIXME is not in base") + continue + + for result_type, results in run_results.items(): + if not result_type in base_results[run]: + run_results[result_type] = None + print(result_type, "not in base for %s" % run) + continue + base = base_results[run][result_type] + if len(base) > len(results): + base = base[:len(results)] + elif len(results) > len(base): + results = results[:len(base)] + base = np.array(base) + results = np.array(results) + if result_type not in exclusions: + f = np.nonzero(base) + results = (results[f] / base[f] * float(abs(prop)) + (prop if prop < 0 else 0)) + run_results[result_type] = results + new_results[run] = run_results + build._pretty_name = build._pretty_name + " / " + series[0][1]._pretty_name + newseries.append((script, build, new_results)) + return newseries diff --git a/npf/graph_choice.py b/npf/graph_choice.py new file mode 100644 index 0000000..8200bff --- /dev/null +++ b/npf/graph_choice.py @@ -0,0 +1,40 @@ +from npf import npf + +# types of graphs: bar, line, boxplot, simple_bar + + +def decide_graph_type(config, n_values, vars_values, key, result_type, ndyn, isubplot): + graph_type = "bar" + if ndyn == 0: + graph_type = "boxplot" if n_values == 1 else "simple_bar" + elif ndyn == 1 and n_values > 2 and npf.all_num(vars_values[key]): + graph_type = "line" + graph_types = config("graph_type", []) + + if len(graph_types) > 0 and (type(graph_types[0]) is tuple or type(graph_types) is tuple): + if type(graph_types) is tuple: + graph_types = dict([graph_types]) + else: + graph_types = dict(graph_types) + if result_type in graph_types: + graph_type = graph_types[result_type] + elif "default" in graph_types: + graph_type = graph_types["default"] + elif "result" in graph_types: + graph_type = graph_types["result"] + else: + graph_type = "line" + + else: + if type(graph_types) is str: + graph_types = [graph_types] + graph_types.extend([graph_type, "line"]) + graph_type = graph_types[isubplot if isubplot < len( + graph_types) else len(graph_types) - 1] + + if ndyn == 0 and graph_type == "line": + print("WARNING: Cannot graph", graph_type, + "as a line without dynamic variables") + graph_type = "simple_bar" + + return graph_type diff --git a/npf/grapher.py b/npf/grapher.py index 747ffe9..cf2c980 100644 --- a/npf/grapher.py +++ b/npf/grapher.py @@ -6,6 +6,8 @@ import traceback import sys +from npf.types.notebook.notebook import prepare_notebook_export + from npf.types.web.web import prepare_web_export if sys.version_info < (3, 7): from orderedset import OrderedSet @@ -23,11 +25,15 @@ from npf.types import dataset from npf.types.series import Series -from npf.types.dataset import Run, XYEB, AllXYEB, group_val, mask_from_filter +from npf.types.dataset import Run, XYEB, AllXYEB, group_val, var_divider, mask_from_filter from npf.variable import is_log, is_numeric, get_numeric, numericable, get_bool, is_bool from npf.section import SectionVariable from npf.build import Build +from npf.graph_choice import decide_graph_type +from npf.variable_to_series import extract_variable_to_series +from npf.series_to_graph import series_to_graph from npf import npf, variable +from npf.graph import Graph import matplotlib # There is a matplotlib bug which causes CI failures @@ -41,8 +47,7 @@ matplotlib.rcParams['pdf.fonttype'] = 42 matplotlib.rcParams['ps.fonttype'] = 42 import matplotlib.pyplot as plt -from matplotlib.ticker import LinearLocator, ScalarFormatter, Formatter, MultipleLocator, NullLocator -from matplotlib.lines import Line2D +from matplotlib.ticker import Formatter, NullLocator from matplotlib.ticker import FuncFormatter, FormatStrFormatter, EngFormatter import matplotlib.transforms as mtransforms @@ -52,8 +57,6 @@ import os import webcolors -from scipy.ndimage import gaussian_filter1d - import pandas as pd graphcolor = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), @@ -163,7 +166,7 @@ def guess_type(d): return d -def broken_axes_ratio (values): +def broken_axes_ratio(values): if len(values) == 3: _, _, ratio = values if ratio is not None: @@ -175,91 +178,6 @@ def broken_axes_ratio (values): return 1 -class Graph: - """ - This is a structure holder for data to build a graph - """ - def __init__(self, grapher:'Grapher'): - self.grapher = grapher - self.subtitle = None - self.data_types = None - - def statics(self): - return dict([(var,list(values)[0]) for var,values in self.vars_values.items() if len(values) == 1]) - - def dyns(self): - return [var for var,values in self.vars_values.items() if len(values) > 1] - - #Convert the series into the XYEB format (see types.dataset) - def dataset(self,kind=None): - if not self.data_types: - - self.data_types = dataset.convert_to_xyeb( - datasets = self.series, - run_list = self.vars_all, - key = self.key, - max_series=self.grapher.config('graph_max_series'), - do_x_sort=self.do_sort, - series_sort=self.grapher.config('graph_series_sort'), - options=self.grapher.options, - statics=self.statics(), - y_group=self.grapher.configdict('graph_y_group'), - color=[get_numeric(v) for v in self.grapher.configlist('graph_color')], - kind=kind - ) - - return self.data_types - - def split_for_series(self): - '''Make a sub-graph per serie''' - sg = [] - for script,build,all_results in self.series: - subgraph = self.grapher.series_to_graph([(script,build,all_results)], self.dyns(), self.vars_values.copy(), self.vars_all.copy()) - subgraph.subtitle = ((self.title + " : ") if self.title else '') + build.pretty_name() - subgraph.title = self.title - sg.append(subgraph) - return sg - - # Divide all series by the first one, making a percentage of difference - @staticmethod - def series_prop(series, prop, exclusions = []): - if len(series) == 1: - raise Exception("Cannot make proportional series with only one serie !") - newseries = [] - if not is_numeric(prop): - prop=1 - if len(series[0]) < 3: - raise Exception("Malformed serie !") - base_results=series[0][2] - for i, (script, build, all_results) in enumerate(series[1:]): - new_results={} - for run,run_results in all_results.items(): - if not run in base_results: - print(run,"FIXME is not in base") - continue - - for result_type, results in run_results.items(): - if not result_type in base_results[run]: - run_results[result_type] = None - print(result_type, "not in base for %s" % run) - continue - base = base_results[run][result_type] - if len(base) > len(results): - base = base[:len(results)] - elif len(results) > len(base): - results = results[:len(base)] - base = np.array(base) - results = np.array(results) - if result_type not in exclusions: - f = np.nonzero(base) - results = (results[f] / base[f] * float(abs(prop)) + (prop if prop < 0 else 0)) - run_results[result_type] = results - new_results[run] = run_results - build._pretty_name = build._pretty_name + " / " + series[0][1]._pretty_name - newseries.append((script, build, new_results)) - return newseries - - class Grapher: def __init__(self): self.scripts = set() @@ -691,7 +609,7 @@ def graph(self, filename, options, fileprefix=None, graph_variables: List[Run] = return # Add series to a pandas dataframe - if options.pandas_filename is not None or options.web is not None: + if options.pandas_filename is not None or options.web is not None or options.notebook_path is not None: all_results_df=pd.DataFrame() # Empty dataframe for test, build, all_results in series: for i, (x) in enumerate(all_results): @@ -700,13 +618,14 @@ def graph(self, filename, options, fileprefix=None, graph_variables: List[Run] = try: labels = [k[1] if type(k) is tuple else k for k,v in x.variables.items()] - x_vars = [[v[1] if type(v) is tuple else v for k,v in x.variables.items()]] + x_vars = [[(v[1] if type(v) is tuple else v) for k,v in x.variables.items()]] x_vars=pd.DataFrame(x_vars,index=[0],columns=labels) x_vars=pd.concat([pd.DataFrame({'build' :build.pretty_name()},index=[0]), pd.DataFrame({'test_index' :i},index=[0]), x_vars],axis=1) + vals = all_results[x] if not vals: continue - x_data=pd.DataFrame.from_dict(vals,orient='index').transpose() #Use orient='index' to handle lists with different lengths + x_data=pd.DataFrame.from_dict( {"y_"+k: v for k, v in vals.items()},orient='index').transpose() #Use orient='index' to handle lists with different lengths if len(x_data) == 0: continue x_data['run_index']=x_data.index @@ -792,7 +711,7 @@ def graph(self, filename, options, fileprefix=None, graph_variables: List[Run] = if self.options.graph_select_max: results = np.sort(results)[-self.options.graph_select_max:] - ydiv = dataset.var_divider(test, "result", result_type) + ydiv = var_divider(test, "result", result_type) if np.all(np.isnan(results)): results=np.asarray([0]) new_results.setdefault(run.copy(), OrderedDict())[result_type] = results / ydiv @@ -921,6 +840,10 @@ def graph(self, filename, options, fileprefix=None, graph_variables: List[Run] = if options.web is not None: prepare_web_export(series, all_results_df, options.web) + # Export to Jupyter notebook + if options.notebook_path is not None: + prepare_notebook_export(series, all_results_df, self.options, self.config) + def graph_group(self, series, vars_values, filename, fileprefix, title): if len(series) == 0: @@ -1030,6 +953,9 @@ def lam(x): versions = [] + """Vars_all is the set of all variable combination that have some value. Taking the iperf case, it will be + [ZERO_COPY=0, PARALLEL=1], [ZERO_COPY=0, PARALLEL=2], ... [ZERO_COPY=1, PARALLEL=8], + """ vars_all = OrderedSet() for i, (test, build, all_results) in enumerate(series): versions.append(build.pretty_name()) @@ -1040,13 +966,12 @@ def lam(x): dyns = [] for k, v in vars_values.items(): - if len(v) > 1: - dyns.append(k) + if len(v) <= 0: + print("ERROR: Variable %s has no values" % k) + elif len(v) == 1: + statics[k] = list(v)[0] else: - if len(v) > 0: - statics[k] = list(v)[0] - else: - print("ERROR: Variable %s has no values" % k) + dyns.append(k) #Divide a serie by another prop = self.config('graph_series_prop') @@ -1067,9 +992,9 @@ def lam(x): title=SectionVariable.replace_variables(v, title) if sv: #Only one supported for now - graphs = [ None for v in vars_values[sv] ] + graphs = [ None for _ in vars_values[sv] ] for j,(script, build, all_results) in enumerate(series): - graph = self.extract_variable_to_series(sv, vars_values.copy(), all_results, dyns.copy(), build, script) + graph = extract_variable_to_series(self, sv, vars_values.copy(), all_results, dyns.copy(), build, script) self.glob_legend_title = title #This variable has been extracted, the legend should not be the variable name in this case @@ -1098,7 +1023,7 @@ def lam(x): del dyns del vars_values else: - graph = self.series_to_graph(series, dyns, vars_values, vars_all) + graph = series_to_graph(self, series, dyns, vars_values, vars_all) graph.title = title graphs.append(graph) @@ -1239,11 +1164,9 @@ def plot_graphs(self, graphs, filename, fileprefix): return ret - - #Generate the plot of data_types at the given i/i_subplot position over n_cols/n_lines - def generate_plot_for_graph(self, i, i_subplot, figure, n_cols, n_lines, vars_values, data_types, dyns, vars_all, key, title, ret, subplot_legend_titles): - ndyn=len(dyns) + def generate_plot_for_graph(self, i, i_subplot, figure, n_cols, n_lines, vars_values, data_types, dyns, VARS_ALL, key, title, ret, subplot_legend_titles): + NDYN=len(dyns) subplot_type=self.config("graph_subplot_type") subplot_handles=[] axiseis = [] @@ -1273,7 +1196,7 @@ def generate_plot_for_graph(self, i, i_subplot, figure, n_cols, n_lines, vars_va brokenaxesY = [ b if len(b) == 3 else b + [None] for b in brokenaxesY ] brokenaxesX = [ b if len(b) == 3 else b + [None] for b in brokenaxesX ] - isubplot = int(i_subplot * len(figure) + i_s_subplot) + ISUBPLOT = int(i_subplot * len(figure) + i_s_subplot) if result_type in cross_reference: cross_key = cross_reference[result_type] @@ -1323,23 +1246,23 @@ def generate_plot_for_graph(self, i, i_subplot, figure, n_cols, n_lines, vars_va # plt.setp(axiseis[0].get_xticklabels(), visible=False) #axiseis[0].set_xlabel("") axis = plt.subplot(n_lines * nbrokenY, n_cols * nbrokenX, - isubplot + 1 + ibrokenY, + ISUBPLOT + 1 + ibrokenY, sharex = axiseis[0] if ibrokenY > 0 and nbrokenY > 1 else None, sharey = axiseis[0] if ibrokenX > 0 and nbrokenX > 1 else None) ihandle = 0 shift = 0 else: #subplot_type=="axis" for dual axis - if isubplot == 0: + if ISUBPLOT == 0: fix,axis=plt.subplots(nbrokenY * nbrokenX) ihandle = 0 - elif isubplot == len(figure) - 1: + elif ISUBPLOT == len(figure) - 1: axis=axis.twinx() ihandle = 1 else: axis=axiseis[0] ihandle = 0 if len(figure) > 1: - shift = isubplot + 1 + shift = ISUBPLOT + 1 else: shift = 0 @@ -1358,7 +1281,7 @@ def generate_plot_for_graph(self, i, i_subplot, figure, n_cols, n_lines, vars_va s = build._color_index tot = [build._color_index for x,y,e,build in data].count(s) elif gcolor: - s=gcolor[(i + isubplot*len(data)) % len(gcolor)] + s=gcolor[(i + ISUBPLOT*len(data)) % len(gcolor)] tot = gcolor.count(s) else: s=shift @@ -1384,87 +1307,54 @@ def generate_plot_for_graph(self, i, i_subplot, figure, n_cols, n_lines, vars_va build._color=cserie[f % len(cserie)] gi[s]+=1 - axis.tick_params(**tick_params) #This is the heart of the logic to find which kind of graph to use for the data - - graph_type = False - default_doleg = True - if ndyn == 0: - default_doleg = False - if len(vars_all) == 1: - graph_type = "boxplot" - else: - graph_type = "simple_bar" - elif ndyn == 1 and len(vars_all) > 2 and npf.all_num(vars_values[key]): - graph_type = "line" - graph_types = self.config("graph_type",[]) - - - if len(graph_types) > 0 and (type(graph_types[0]) is tuple or type(graph_types) is tuple): - if type(graph_types) is tuple: - graph_types = dict([graph_types]) - else: - graph_types = dict(graph_types) - if result_type in graph_types: - graph_type = graph_types[result_type] - elif "default" in graph_types: - graph_type = graph_types["default"] - elif "result" in graph_types: - graph_type = graph_types["result"] - else: - graph_type = "line" - - else: - if type(graph_types) is str: - graph_types = [graph_types] - graph_types.extend([graph_type, "line"]) - graph_type = graph_types[isubplot if isubplot < len(graph_types) else len(graph_types) - 1] - if ndyn == 0 and graph_type == "line": - print("WARNING: Cannot graph %s as a line without dynamic variables" % graph_type) - graph_type = "simple_bar" barplot = False horizontal = False + default_add_legend = True + + graph_type = decide_graph_type(self.config, len(VARS_ALL), vars_values, key, result_type, NDYN, ISUBPLOT) + try: if graph_type == "simple_bar": """No dynamic variables : do a barplot X=version""" - r, ndata = self.do_simple_barplot(axis, result_type, data, shift, isubplot) + r, ndata = self.do_simple_barplot(axis, result_type, data, shift, ISUBPLOT) barplot = True elif graph_type == "line" or graph_type == "lines": """One dynamic variable used as X, series are version line plots""" - r, ndata = self.do_line_plot(axis, key, result_type, data, data_types, shift, isubplot, xmin, xmax, xdata) + r, ndata = self.do_line_plot(axis, key, result_type, data, data_types, shift, ISUBPLOT, xmin, xmax, xdata) elif graph_type == "boxplot": """A box plot, with multiple X values and series in color""" - r, ndata = self.do_box_plot(axis, key, result_type, data, xdata, shift, isubplot) + r, ndata = self.do_box_plot(axis, key, result_type, data, xdata, shift, ISUBPLOT) barplot = True #It's like a barplot, no formatting elif graph_type == "cdf": """CDF""" - r, ndata = self.do_cdf(axis, key, result_type, data, xdata, shift, isubplot) - default_doleg = True + r, ndata = self.do_cdf(axis, key, result_type, data, xdata, shift, ISUBPLOT) + default_add_legend = True ymin = 0 ymax = 100 xname=self.var_name(result_type) elif graph_type == "heatmap": """Heatmap""" - r, ndata = self.do_heatmap(axis, key, result_type, data, xdata, vars_values, shift, isubplot, sparse = False) - default_doleg = False + r, ndata = self.do_heatmap(axis, key, result_type, data, xdata, vars_values, shift, ISUBPLOT, sparse = False) + default_add_legend = False barplot = True elif graph_type == "sparse_heatmap": """sparse Heatmap""" - r, ndata = self.do_heatmap(axis, key, result_type, data, xdata, vars_values, shift, isubplot, sparse = True) - default_doleg = False + r, ndata = self.do_heatmap(axis, key, result_type, data, xdata, vars_values, shift, ISUBPLOT, sparse = True) + default_add_legend = False barplot = True elif graph_type == "barh" or graph_type=="horizontal_bar": - r, ndata= self.do_barplot(axis,vars_all, dyns, result_type, data, shift, ibrokenY==0, horizontal=True, data_types=data_types) + r, ndata= self.do_barplot(axis,VARS_ALL, dyns, result_type, data, shift, ibrokenY==0, horizontal=True, data_types=data_types) barplot = True horizontal = True else: """Barplot. X is all seen variables combination, series are version""" - r, ndata= self.do_barplot(axis,vars_all, dyns, result_type, data, shift, ibrokenY==0, data_types=data_types) + r, ndata= self.do_barplot(axis,VARS_ALL, dyns, result_type, data, shift, ibrokenY==0, data_types=data_types) barplot = True except Exception as e: print("ERROR : could not graph %s" % result_type) @@ -1512,7 +1402,7 @@ def generate_plot_for_graph(self, i, i_subplot, figure, n_cols, n_lines, vars_va if len(figure) == 1 or subplot_type=="subplot": sl = 0 elif gcolor: - sl = gcolor[(isubplot * len(data)) % len(gcolor)] % len(legendcolors) + sl = gcolor[(ISUBPLOT * len(data)) % len(gcolor)] % len(legendcolors) else: sl = shift % len(legendcolors) @@ -1740,7 +1630,7 @@ def generate_plot_for_graph(self, i, i_subplot, figure, n_cols, n_lines, vars_va print("INFO: Legend not shown as there is only one serie with a default name (local, version). Set --config graph_legend=1 to force printing a legend. See the documentation at https://npf.readthedocs.io/en/latest/graph.html to see how to change the legend.") else: doleg = True - if (default_doleg or doleg) and doleg is not False: + if (default_add_legend or doleg) and doleg is not False: loc = self.config("legend_loc") if type(loc) is dict or type(loc) is list: loc = self.scriptconfig("legend_loc",key="result",result_type=result_type) @@ -1761,10 +1651,7 @@ def generate_plot_for_graph(self, i, i_subplot, figure, n_cols, n_lines, vars_va if self.configlist("subplot_legend_loc"): loc=self.configlist("subplot_legend_loc")[ilegend] else: - if ilegend == 0: - loc = 'upper left' - else: - loc = 'lower right' + loc = 'upper left' if ilegend == 0 else 'lower right' else: if ilegend > 0: diff --git a/npf/npf.py b/npf/npf.py index 0d2f7a4..37b6038 100755 --- a/npf/npf.py +++ b/npf/npf.py @@ -67,7 +67,24 @@ def add_verbosity_options(parser: ArgumentParser): def add_graph_options(parser: ArgumentParser): o = parser.add_argument_group('Output data') o.add_argument('--web', - help='Generate interactive graphs in a .html file format') + help='Generate interactive graphs in a *.html file format') + o.add_argument('--notebook', '--nb', dest='notebook_path', + help='Generate a Jupyter Notebook that plots the data (*.ipynb file format).') + o.add_argument('--template-nb', dest='template_nb_path', + help='Use a custom Jupyter Notebook as template.', + default="npf/types/notebook/template.ipynb") + o.add_argument('--nb-kernel', dest='nb_kernel', + help='Specify which kernel to use for executing the Jupyter Notebook.', + default="python3") + o.add_argument('--update-nb', + help='If the notebook already exists, try to update the experiment data (cell containing "data = ").', action='store_true', + default=False) + o.add_argument('--force-nb', + help='If the notebook already exists, overwrite it (the previous data and code will be lost).', action='store_true', + default=False) + o.add_argument('--disable-nb-exec', dest='execute_nb', + help='By default the output notebook is executed, this option disables that feature.', action='store_false', + default=True) o.add_argument('--output', help='Output data to CSV files, one per result type. By default it prints the variable value as first column and the second column is the mean of all runs. Check --output-columns to change this behavior.', dest='output', type=str, nargs='?', const='graph', default=None) o.add_argument('--output-columns', dest='output_columns', type=str, nargs='+', default=['x', 'mean'], diff --git a/npf/series_to_graph.py b/npf/series_to_graph.py new file mode 100644 index 0000000..27d9509 --- /dev/null +++ b/npf/series_to_graph.py @@ -0,0 +1,66 @@ +from npf import npf +from npf.graph import Graph +from npf.variable_to_series import extract_variable_to_series + +# Convert a list of series to a graph object +# if the list has a unique item and there are dynamic variables, one +# dynamic variable will be extracted to make a list of serie +def series_to_graph(grapher, series, dyns, vars_values, vars_all): + nseries = len(series) + + ndyn = len(dyns) + if grapher.options.do_transform and (nseries == 1 and ndyn > 0 and not grapher.options.graph_no_series and not ( + ndyn == 1 and npf.all_num(vars_values[dyns[0]]) and len(vars_values[dyns[0]]) > 2) and dyns[0] != "time"): + """Only one serie: expand one dynamic variable as serie, but not if it was plotable as a line""" + script, build, all_results = series[0] + if grapher.config("var_serie") and grapher.config("var_serie") in dyns: + key = grapher.config("var_serie") + else: + key = None + # First pass : use the non-numerical variable with the most points, but limited to 10 + n_val = 0 + nonums = [] + for i in range(ndyn): + k = dyns[i] + if k == 'time': + continue + if not npf.all_num(vars_values[k]): + nonums.append(k) + if len(vars_values[k]) > n_val and len(vars_values[k]) < 10: + key = k + n_val = len(vars_values[k]) + if key is None: + # Second pass if that missed, use the numerical variable with the less point if dyn==2 (->lineplot) else the most points + n_val = 0 if ndyn > 2 else 999 + for i in range(ndyn): + k = dyns[i] + if k == 'time': + continue + if (ndyn > 2 and len(vars_values[k]) > n_val) or (ndyn <= 2 and len(vars_values[k]) < n_val): + key = k + n_val = len(vars_values[k]) + + # Key is found, no the extraction itself + if not key: + key = 'time' + if key: + graph = extract_variable_to_series(grapher, key, vars_values, all_results, dyns, build, script) + + else: + grapher.glob_legend_title = None + if ndyn == 0: + key = "version" + do_sort = False + elif ndyn == 1: + key = dyns[0] + do_sort = True + else: + key = "Variables" + do_sort = False + graph = Graph(grapher) + graph.key = key + graph.do_sort = do_sort + graph.vars_all = vars_all + graph.vars_values = vars_values + graph.series = series + return graph diff --git a/npf/types/notebook/notebook.py b/npf/types/notebook/notebook.py new file mode 100644 index 0000000..3e15db0 --- /dev/null +++ b/npf/types/notebook/notebook.py @@ -0,0 +1,161 @@ +from typing import List +import nbformat as nbf +from nbconvert.preprocessors import ExecutePreprocessor, CellExecutionError +from jupyter_client.kernelspec import NoSuchKernel +from jinja2 import Template + +from json import dumps +import re +import os +import time + +import pandas as pd +from npf.graph_choice import decide_graph_type + +INDENT_DATA = False +TIMEOUT = 60 # seconds + + +def prepare_notebook_export(datasets: List[tuple], all_results_df: pd.DataFrame, options, config): + path = options.notebook_path + + dataset = datasets[0] + test, _build, _runs = dataset + var_names = dict(test.config["var_names"]) + + x_vars = list(test.variables.dynamics().keys()) + + y_vars = list(filter(lambda x: x.startswith("y_"), all_results_df.columns)) + y_vars = [y.lstrip('y_') for y in y_vars] + data = all_results_df.rename(columns=lambda c: c.lstrip("y_")) + + # variables that get replaced in the template notebook + variables = { + "name": test.get_title(), + "var_names": var_names, + "x_vars": x_vars, + "x_names": get_name(x_vars, var_names), + "y_vars": y_vars, + "y_names": get_name(y_vars, var_names), + "data": dumps(data.to_dict(orient="records"), indent=4 if INDENT_DATA else None), + "dir_name": os.path.dirname(path), + "file_path": ".".join(path.split(".")[:-1]), # drops the extension + "file_name": ".".join(path.split("/")[-1].split(".")[:-1]), + } + + # both update and force options are not allowed + if options.update_nb and options.force_nb: + print("\nBoth --update-nb and --force-nb options are not allowed together.") + user_input = input("Do you want to overwrite the notebook? (yes/no): ") + if user_input.lower() != 'yes': + options.force_nb = False + + # check if the notebook already exists and ask the user if they want to overwrite it + if os.path.exists(path) and not options.force_nb: + if options.update_nb: + update_notebook(path, variables.get("data"), options) + return + else: # no update or force is specified + print("\nNotebook already exists at the provided path. Use --update-nb to try to update the data or --force-nb to overwrite the notebook.") + user_input = input("Do you want to overwrite it? (yes/no): ") + if user_input.lower() != 'yes': + print("Cancelled notebook export.") + return + + # TODO: Select a suitable key when there are multiple values + key = x_vars[0] + + # TODO: there might be many result types + result_type = y_vars[0] + + n_values = len(all_results_df[x_vars].value_counts()) + ndyn = len(x_vars) + if ndyn > 1: + ndyn -= 1 + + graph_type = decide_graph_type(config, n_values, all_results_df, key, result_type, ndyn, isubplot=0) + + # read template notebook + with open(options.template_nb_path) as f: + nb = nbf.read(f, as_version=4) + + # keep only cells with the specified tag + nb.cells = [cell for cell in nb.cells if has_tag(cell, graph_type)] + + # remove cell tags except for the "data" tag + for cell in nb.cells: + if "data" in cell.metadata.get("tags", []): + cell.metadata.tags = ["data"] + else: + cell.metadata.pop("tags", None) + + # render cells by replacing variables in the template using jinja2 + for cell in nb.cells: + cell_template = Template(cell.source) + cell.source = cell_template.render(variables) + + if options.execute_nb: + exec_and_export_nb(nb, path, options) + else: + export_nb(nb, path) + + +def export_nb(nb, path: str): + """Exports the notebook to the specified path.""" + with open(path, 'w') as f: + nbf.write(nb, f) # write notebook to file + print("Notebook exported to", path) + + +def exec_and_export_nb(nb, path: str, options): + try: + ep = ExecutePreprocessor(timeout=TIMEOUT, kernel_name=options.nb_kernel) + start_time = time.time() + ep.preprocess(nb, {'metadata': {'path': os.path.dirname(path)}}) + print("Notebook executed in %.2f seconds." % + (time.time() - start_time)) + except CellExecutionError: + print("Notebook execution failed.") + raise + except NoSuchKernel: + print("\n[ERROR] No such kernel. Try the following to fix this issue:") + print("\tList kernels with `jupyter kernelspec list` and specify another kernel.") + print("\tIf no kernel exists, install one with `python3 -m pip install ipykernel` and `python3 -m ipykernel install --user`.\n") + raise + finally: + export_nb(nb, path) + + +def update_notebook(path: str, new_data: str, options): + """Replace the data in the existing notebook and execute it.""" + # read the existing notebook + with open(path) as f: + nb = nbf.read(f, as_version=4) + + # find the cell with the data tag and replace the data + updated = 0 + for cell in nb.cells: + if has_tag(cell, "data"): + updated += 1 + cell.source = "data = " + new_data + + if updated >= 1: + print("Data updated in the notebook.") + else: + print("No cell with the 'data' tag found in the notebook.") + + if options.execute_nb: + exec_and_export_nb(nb, path, options) + + +def has_tag(cell, tag: str) -> bool: + """Returns True if the cell has the specified tag or "all".""" + tags = cell.metadata.get("tags", []) + return tag in tags or "all" in tags + + +def get_name(var, var_names: dict[str, str]): + """Returns the name associated with a variable or a list of variables.""" + if isinstance(var, list): + return [get_name(v, var_names) for v in var] + return var_names[var] if var in var_names else var diff --git a/npf/types/notebook/template.ipynb b/npf/types/notebook/template.ipynb new file mode 100644 index 0000000..866cfd3 --- /dev/null +++ b/npf/types/notebook/template.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "tags": [ + "all" + ] + }, + "source": [ + "# Results of {{name}}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "all", + "data" + ] + }, + "outputs": [], + "source": [ + "data = {{data}}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "all" + ] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "var_names = {{var_names}}\n", + "x_vars = {{x_vars}}\n", + "y_vars = {{y_vars}}\n", + "\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "all" + ] + }, + "outputs": [], + "source": [ + "def get_name(var):\n", + " return var_names[var] if var in var_names else var" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "bar" + ] + }, + "outputs": [], + "source": [ + "# BAR\n", + "\n", + "for y_var in y_vars:\n", + " values = df.groupby({{x_vars}})[y_var]\n", + "\n", + " {%- if x_vars|length > 1 %}\n", + " values.mean().unstack().plot.bar(yerr=values.std().unstack())\n", + " {% else %}\n", + " values.mean().plot.bar(yerr=values.std())\n", + " {% endif %}\n", + "\n", + " plt.xlabel('{{x_names[0]}}')\n", + " plt.ylabel(get_name(y_var))\n", + " plt.legend(title='{{x_names[1]}}')\n", + " plt.title('{{name}}')\n", + " plt.grid(alpha=0.3)\n", + "\n", + " # plt.savefig(f'{{file_name}}_{y_var}.pdf')\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "barh" + ] + }, + "outputs": [], + "source": [ + "# BARH (HORIZONTAL BAR)\n", + "\n", + "for y_var in y_vars:\n", + " values = df.groupby({{x_vars}})[y_var]\n", + "\n", + " {%- if x_vars|length > 1 %}\n", + " values.mean().unstack().plot.barh(xerr=values.std().unstack())\n", + " {% else %}\n", + " values.mean().plot.barh(xerr=values.std())\n", + " {% endif %}\n", + "\n", + " plt.xlabel(get_name(y_var))\n", + " plt.ylabel('{{x_names[0]}}')\n", + " plt.legend(title='{{x_names[1]}}')\n", + " plt.title('{{name}}')\n", + " plt.grid(alpha=0.3)\n", + "\n", + " # plt.savefig(f'{{file_name}}_{y_var}.pdf')\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "line" + ] + }, + "outputs": [], + "source": [ + "# LINE(S)\n", + "\n", + "for y_var in y_vars:\n", + " values = df.groupby({{x_vars}})[y_var]\n", + "\n", + " {%- if x_vars|length > 1 %}\n", + " values.mean().unstack().plot.line(yerr=values.std().unstack())\n", + " {% else %}\n", + " values.mean().plot.line(yerr=values.std())\n", + " {% endif %}\n", + "\n", + " plt.xlabel('{{x_names[0]}}')\n", + " plt.ylabel(get_name(y_var))\n", + " plt.legend(title='{{x_names[1]}}')\n", + " plt.title('{{name}}')\n", + " plt.grid(alpha=0.3)\n", + "\n", + " # plt.savefig(f'{{file_name}}_{y_var}.pdf')\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "boxplot" + ] + }, + "outputs": [], + "source": [ + "# BOXPLOT\n", + "\n", + "{%- if x_vars|length == 1 %}\n", + "\n", + "for y_var in y_vars:\n", + " sns.boxplot(x='{{x_vars[0]}}', y=y_var, data=df)\n", + "\n", + " plt.xlabel('{{x_names[0]}}')\n", + " plt.ylabel(get_name(y_var))\n", + " plt.title('{{name}}')\n", + " plt.grid(alpha=0.3)\n", + "\n", + " # plt.savefig(f'{{file_name}}_{y_var}.pdf')\n", + " plt.show()\n", + "\n", + "{%- else %}\n", + "\n", + "# select the variable with the fewest unique values for the legend\n", + "legend_x_var = df[x_vars].nunique().idxmin()\n", + "\n", + "# for each y_var, plot all x_var combinations (except the one used for the legend)\n", + "for yv in y_vars:\n", + " for xv in [var for var in x_vars if var != legend_x_var]:\n", + " sns.boxplot(x=xv, y=yv, hue=legend_x_var, data=df)\n", + " plt.xlabel(get_name(xv))\n", + " plt.ylabel(get_name(yv))\n", + " plt.legend(title=get_name(legend_x_var))\n", + " plt.title('{{name}}')\n", + " plt.grid(alpha=0.3)\n", + "\n", + " # plt.savefig(f'{{file_name}}_{y_var}.pdf')\n", + " plt.show()\n", + "\n", + "{%- endif %}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "cdf" + ] + }, + "outputs": [], + "source": [ + "# CDF\n", + "\n", + "{%- if x_vars|length == 1 %}\n", + "\n", + "for y_var in y_vars:\n", + " for x_var, x_var_values in df.groupby({{x_vars}}):\n", + " sns.kdeplot(x_var_values[y_var], cumulative=True, label=x_var)\n", + "\n", + " plt.xlabel(get_name(y_var))\n", + " plt.ylabel(f'Cumulative distribution of {get_name(y_var)}')\n", + " plt.legend(title=get_name(x_var))\n", + " plt.title('{{name}}')\n", + " plt.grid(alpha=0.3)\n", + "\n", + " # plt.savefig(f'{{file_name}}_{y_var}.pdf')\n", + " plt.show()\n", + "\n", + "{%- else %}\n", + "\n", + "# select the variable with the fewest unique values for the legend\n", + "legend_x_var = df[x_vars].nunique().idxmin()\n", + "\n", + "# for each y_var, plot all x_var combinations (except the one used for the legend)\n", + "for yv in y_vars:\n", + " for xv in [var for var in x_vars if var != legend_x_var]:\n", + " for x_var, x_var_values in df.groupby({{x_vars}}):\n", + " sns.kdeplot(x_var_values[yv], cumulative=True, label=x_var)\n", + "\n", + " plt.xlabel(get_name(yv))\n", + " plt.ylabel(f'Cumulative distribution of {get_name(yv)}')\n", + " plt.legend(title=get_name(legend_x_var))\n", + " plt.title('{{name}}')\n", + " plt.grid(alpha=0.3)\n", + "\n", + " # plt.savefig(f'{{file_name}}_{y_var}.pdf')\n", + " plt.show()\n", + "\n", + "{%- endif %}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "heatmap" + ] + }, + "outputs": [], + "source": [ + "# HEATMAP\n", + "\n", + "{%- if x_vars|length == 2 %}\n", + "\n", + "for y_var in y_vars:\n", + " sns.heatmap(df.pivot_table(index=x_vars[0], columns=x_vars[1], values=y_var))\n", + "\n", + " plt.xlabel(get_name(x_vars[1]))\n", + " plt.ylabel(get_name(x_vars[0]))\n", + " plt.title('{{name}}')\n", + " plt.grid(alpha=0.3)\n", + "\n", + " # plt.savefig(f'{{file_name}}_{y_var}.pdf')\n", + " plt.show()\n", + "\n", + "{%- else %}\n", + "\n", + "# select the variable with the fewest unique values for the legend\n", + "legend_x_var = df[x_vars].nunique().idxmin()\n", + "\n", + "# for each y_var, plot all x_var combinations (except the one used for the legend)\n", + "for yv in y_vars:\n", + " for xv in [var for var in x_vars if var != legend_x_var]:\n", + " sns.heatmap(df.pivot_table(index=xv, columns=legend_x_var, values=yv))\n", + "\n", + " plt.xlabel(get_name(legend_x_var))\n", + " plt.ylabel(get_name(xv))\n", + " plt.title('{{name}}')\n", + " plt.grid(alpha=0.3)\n", + "\n", + " # plt.savefig(f'{{file_name}}_{y_var}.pdf')\n", + " plt.show()\n", + "\n", + "{%- endif %}" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/npf/types/web/web.py b/npf/types/web/web.py index 141af15..a1b971e 100644 --- a/npf/types/web/web.py +++ b/npf/types/web/web.py @@ -3,7 +3,7 @@ from npf_web_extension import app def prepare_web_export(datasets, all_results_df, path): - + # TODO: (error) this code loops but doesn't save the different datasets. # Getting parameters and measurements name = "undefined" parameters = [] diff --git a/npf/variable_to_series.py b/npf/variable_to_series.py new file mode 100644 index 0000000..ec293eb --- /dev/null +++ b/npf/variable_to_series.py @@ -0,0 +1,64 @@ +import sys +from npf.graph import Graph +if sys.version_info < (3, 7): + from orderedset import OrderedSet +else: + from ordered_set import OrderedSet +from collections import OrderedDict + +# Extract the variable key so it becomes a serie +def extract_variable_to_series(grapher, key, vars_values, all_results, dyns, build, script) -> Graph: + if not key in dyns: + raise ValueError("Cannot extract %s because it is not a dynamic variable (%s are)" % (key, ', '.join(dyns))) + dyns.remove(key) + series = [] + versions = [] + values = list(vars_values[key]) + del vars_values[key] + try: + #values.sort() + pass + except TypeError: + print("ERROR : Cannot sort the following values :", values) + return + new_varsall = OrderedSet() + for i, value in enumerate(values): + newserie = OrderedDict() + for run, run_results in all_results.items(): + # if (graph_variables and not run in graph_variables): + # continue + if run.variables[key] == value: + newrun = run.copy() + del newrun.variables[key] + newserie[newrun] = run_results + new_varsall.add(newrun) + + if type(value) is tuple: + value = value[1] + versions.append(value) + nb = build.copy() + nb._pretty_name = str(value) + if len(grapher.graphmarkers) > 0: + nb._marker = grapher.graphmarkers[i % len(grapher.graphmarkers)] + series.append((script, nb, newserie)) + grapher.glob_legend_title = grapher.var_name(key) + vars_all = list(new_varsall) + + if len(dyns) == 1: + key = dyns[0] + do_sort = True + elif len(dyns) == 0: + do_sort = True + else: + key = "Variables" + do_sort = False + do_sort = grapher.config_bool_or_in('graph_x_sort', key, default=do_sort) + if (do_sort): + vars_all.sort() + graph = Graph(grapher) + graph.do_sort = do_sort + graph.key = key + graph.vars_all = vars_all + graph.vars_values = vars_values + graph.series = series + return graph diff --git a/requirements.txt b/requirements.txt index 1832bfe..6ee58f6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ . -pytest \ No newline at end of file +pytest diff --git a/setup.py b/setup.py index dea8438..2177f3a 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ with open("README.md", "r") as fh: long_description = fh.read() -install_requires=[ +install_requires = [ 'require-python-3', 'pandas', 'numpy', @@ -29,6 +29,8 @@ 'packaging', 'importlib_metadata', 'npf-web-extension >= 0.6.4', + 'nbformat', + 'nbconvert', 'jinja2', 'spellwise', 'seaborn', @@ -47,23 +49,23 @@ url="https://github.com/tbarbette/npf", packages=setuptools.find_packages(), package_data={'': ['*.repo', '*.npf']}, - py_modules=['npf_run','npf_compare','npf_watch'], + py_modules=['npf_run', 'npf_compare', 'npf_watch'], classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], python_requires='>=3.6', - entry_points = { - 'console_scripts': [ - 'npf=npf_compare:main', - 'npf-regress=npf_run:main', - 'npf-run=npf_run:main', - 'npf-compare=npf_compare:main', - 'npf-watch=npf_watch:main', - 'npf-run.py=npf_run:main', - 'npf-compare.py=npf_compare:main', - 'npf-watch.py=npf_watch:main', - ], - }, + entry_points={ + 'console_scripts': [ + 'npf=npf_compare:main', + 'npf-regress=npf_run:main', + 'npf-run=npf_run:main', + 'npf-compare=npf_compare:main', + 'npf-watch=npf_watch:main', + 'npf-run.py=npf_run:main', + 'npf-compare.py=npf_compare:main', + 'npf-watch.py=npf_watch:main', + ], + }, ) diff --git a/tests/examples/3in-3out.npf b/tests/examples/3in-3out.npf new file mode 100644 index 0000000..6c342c4 --- /dev/null +++ b/tests/examples/3in-3out.npf @@ -0,0 +1,9 @@ +%variables +N=[1*64] +A={1, 2, 3} +B={0,100} + +%script +echo "RESULT-N $(($N * $A + $B))" +echo "RESULT-LOG $(( log($N * $A + $B) ))" +echo "RESULT-EXP $(( pow(2,$N * $A + $B) ))" diff --git a/tests/tcp/01-iperf.npf b/tests/tcp/01-iperf.npf index d19d43d..cf1776f 100644 --- a/tests/tcp/01-iperf.npf +++ b/tests/tcp/01-iperf.npf @@ -12,7 +12,7 @@ graph_background=7 %variables PARALLEL=[1-8] -WINDOW={16,512} +WINDOW={64,512} TIME=2 %script@server diff --git a/tests/tcp/04-iperf.npf b/tests/tcp/04-iperf.npf new file mode 100644 index 0000000..2f43039 --- /dev/null +++ b/tests/tcp/04-iperf.npf @@ -0,0 +1,33 @@ +%info +IPerf 2 Throughput & Transfer Experiment + +This version extracts the number of transferred bytes, and not only the throughput. + +%config +n_runs=5 +var_names={PARALLEL:Number of parallel connections,WINDOW:Window size (kB),THROUGHPUT:Throughput,TRANSFER:Transfer} +timeout=25 +default_repo=iperf2 +graph_background=7 + +%import graph-beautiful + +%variables +PARALLEL=[1-8] +WINDOW={64,512} +TIME=2 + +%script@server +iperf -s + +%script@client delay=1 +//Launch the program, copy the output to a log +iperf -c ${server:0:ip} -w ${WINDOW}k -t $TIME -P $PARALLEL 2>&1 | tee iperf.log + +//Parse the log to find the throughput & transfer rate +throughput=$(cat iperf.log | grep -ioE "[0-9.]+ [kmg]?bits" | tail -n 1) +transfer=$(cat iperf.log | grep -ioE "[0-9.]+ [kmg]?Bytes" | tail -n 1) + +//Give the throughput to NPF through stdout +echo "RESULT-THROUGHPUT $throughput" +echo "RESULT-TRANSFER $transfer"