diff --git a/lux/_config/config.py b/lux/_config/config.py index 3ec8ffcb..a3f24c49 100644 --- a/lux/_config/config.py +++ b/lux/_config/config.py @@ -32,6 +32,8 @@ def __init__(self): self._pandas_fallback = True self._interestingness_fallback = True self.heatmap_bin_size = 40 + self._streaming = True + ##################################### #### Optimization Configurations #### ##################################### diff --git a/lux/action/correlation.py b/lux/action/correlation.py index 05a300cf..6c9cfcfb 100644 --- a/lux/action/correlation.py +++ b/lux/action/correlation.py @@ -60,10 +60,6 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): {examples}. The visualizations are ranked from most to least linearly correlated based on \ their Pearson’s correlation score.", } - ignore_rec_flag = False - # Doesn't make sense to compute correlation if less than 4 data values - if len(ldf) < 5: - ignore_rec_flag = True # Then use the data populated in the vis list to compute score for vis in vlist: measures = vis.get_attr_by_data_model("measure") @@ -81,9 +77,6 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): vis.score = interestingness(vis, ldf) else: vis.score = -1 - if ignore_rec_flag: - recommendation["collection"] = [] - return recommendation vlist.sort() vlist = vlist.showK() recommendation["collection"] = vlist diff --git a/lux/action/custom.py b/lux/action/custom.py index 1fe01efc..f9161c77 100644 --- a/lux/action/custom.py +++ b/lux/action/custom.py @@ -45,36 +45,67 @@ def custom(ldf): lux.config.executor.execute(vlist, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) - # ldf.clear_intent() vlist.sort(remove_invalid=True) return recommendation -def custom_actions(ldf): +def custom_action(ldf, action): """ - Generates user-defined vis based on globally defined actions. + Computing initial custom_action for lazy streaming of the rest of the actions Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. + action: action_name as string + e.g "Correlation" + Returns ------- - recommendations : Dict[str,obj] - object with a collection of visualizations that were previously registered. + One recommendation + """ + recommendation = None + display_condition = lux.config.actions[action].display_condition + if display_condition is None or (display_condition is not None and display_condition(ldf)): + args = lux.config.actions[action].args + if args: + recommendation = lux.config.actions[action].action(ldf, args) + else: + recommendation = lux.config.actions[action].action(ldf) + return recommendation + + +def filter_keys(ldf, loading_bar=None): """ - if len(lux.config.actions) > 0 and (len(ldf) > 0 or lux.config.executor.name != "PandasExecutor"): - recommendations = [] + Filters out actions before beginning computations so we know which tabs to display. + Logic to filter out actions in lux/action/default.py + """ + + keys = [] + data_types = set(ldf._data_type.values()) + progress = 0 + if loading_bar is not None: + loading_bar.max = len(lux.config.actions.keys()) + if len(ldf) > 0 or lux.config.executor.name != "PandasExecutor": for action_name in lux.config.actions.keys(): display_condition = lux.config.actions[action_name].display_condition if display_condition is None or (display_condition is not None and display_condition(ldf)): - args = lux.config.actions[action_name].args - if args: - recommendation = lux.config.actions[action_name].action(ldf, args) - else: - recommendation = lux.config.actions[action_name].action(ldf) - recommendations.append(recommendation) - return recommendations - else: - return [] + if lux.config.actions[action_name].args: + if not lux.config.actions[action_name].args[0] in data_types: + continue + keys.append(action_name) + progress += 1 + if loading_bar is not None: + loading_bar.value = progress + + # # Pushing back correlation and geographical actions for performance reasons + if "correlation" in keys: + keys.pop(keys.index("correlation")) + keys.append("correlation") + + if "geographical" in keys: + keys.pop(keys.index("geographical")) + keys.append("geographical") + + return keys diff --git a/lux/action/default.py b/lux/action/default.py index 30b61644..5f6e8200 100644 --- a/lux/action/default.py +++ b/lux/action/default.py @@ -1,12 +1,17 @@ +import lux +from lux.action.custom import custom +from lux.action.correlation import correlation +from lux.action.univariate import univariate +from lux.action.enhance import enhance +from lux.action.filter import add_filter +from lux.action.generalize import generalize +from lux.action.temporal import temporal, create_temporal_vis +from lux.utils import utils +from lux.vis.VisList import VisList +from lux.interestingness.interestingness import interestingness + + def register_default_actions(): - import lux - from lux.action.custom import custom - from lux.action.correlation import correlation - from lux.action.univariate import univariate - from lux.action.enhance import enhance - from lux.action.filter import add_filter - from lux.action.generalize import generalize - from lux.action.temporal import temporal # display conditions for default actions no_vis = lambda ldf: (ldf.current_vis is None) or ( @@ -16,14 +21,108 @@ def register_default_actions(): multiple_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) > 1 # globally register default actions - lux.config.register_action("correlation", correlation, no_vis) - lux.config.register_action("distribution", univariate, no_vis, "quantitative") - lux.config.register_action("occurrence", univariate, no_vis, "nominal") - lux.config.register_action("temporal", temporal, no_vis) + lux.config.register_action("correlation", correlation, correlation_check) + lux.config.register_action("distribution", univariate, distribution_check, "quantitative") + lux.config.register_action("occurrence", univariate, occurence_check, "nominal") + lux.config.register_action("temporal", temporal, temporal_check) lux.config.register_action("geographical", univariate, no_vis, "geographical") - lux.config.register_action("Enhance", enhance, one_current_vis) + lux.config.register_action("Enhance", enhance, enhance_check) lux.config.register_action("Filter", add_filter, one_current_vis) - lux.config.register_action("Generalize", generalize, one_current_vis) + lux.config.register_action("Generalize", generalize, generalize_check) lux.config.register_action("Custom", custom, multiple_current_vis) + + +def generalize_check(ldf): + filters = utils.get_filter_specs(ldf._intent) + attributes = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent)) + if (len(attributes) <= 1 or len(attributes) > 4) and len(filters) == 0: + return False + else: + return ldf.current_vis is not None and len(ldf.current_vis) == 1 + + +def correlation_check(ldf): + if len(ldf) < 5: + return False + else: + if (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0): + filter_specs = utils.get_filter_specs(ldf._intent) + intent = [ + lux.Clause("?", data_model="measure"), + lux.Clause("?", data_model="measure"), + ] + intent.extend(filter_specs) + vlist = VisList(intent, ldf) + if len(vlist) < 1: + return False + return True + else: + if len(ldf.columns) == 2: + return True + return False + + +def occurence_check(ldf): + filter_specs = utils.get_filter_specs(ldf._intent) + intent = [lux.Clause("?", data_type="nominal")] + intent.extend(filter_specs) + vlist = VisList(intent, ldf) + for vis in vlist: + vis.score = interestingness(vis, ldf) + vlist.sort() + + if len(vlist) < 1: + return False + else: + return (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0) + + +def distribution_check(ldf): + filter_specs = utils.get_filter_specs(ldf._intent) + possible_attributes = [ + c + for c in ldf.columns + if ldf.data_type[c] == "quantitative" and ldf.cardinality[c] > 5 and c != "Number of Records" + ] + intent = [lux.Clause(possible_attributes)] + intent.extend(filter_specs) + vlist = VisList(intent, ldf) + if len(vlist) < 1: + return False + else: + return (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0) + + +def temporal_check(ldf): + # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) + if len(ldf) < 3: + return False + + for c in ldf.columns: + if ldf.data_type[c] == "temporal": + return True + + filter_specs = utils.get_filter_specs(ldf._intent) + intent = [lux.Clause("?", data_type="temporal")] + intent.extend(filter_specs) + vlist = VisList(intent, ldf) + for vis in vlist: + vis.score = interestingness(vis, ldf) + vlist.sort() + + if len(vlist) < 1: + return False + else: + return (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0) + + +def enhance_check(ldf): + filters = utils.get_filter_specs(ldf._intent) + intent = ldf._intent.copy() + attr_specs = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent)) + if len(attr_specs) > 2: + return False + else: + return ldf.current_vis is not None and len(ldf.current_vis) == 1 diff --git a/lux/action/generalize.py b/lux/action/generalize.py index 385891d4..1c186b3b 100644 --- a/lux/action/generalize.py +++ b/lux/action/generalize.py @@ -51,10 +51,6 @@ def generalize(ldf): "long_description": f"Remove one aspect of the Current Vis. We can either remove an attribute or filter from {intended_attrs}.", } # to observe a more general trend - # if we do no have enough column attributes or too many, return no vis. - if len(attributes) < 1 or len(attributes) > 4: - recommendation["collection"] = [] - return recommendation # for each column specification, create a copy of the ldf's vis and remove the column specification # then append the vis to the output if len(attributes) > 1: diff --git a/lux/action/temporal.py b/lux/action/temporal.py index 6bf831ce..6a44ee25 100644 --- a/lux/action/temporal.py +++ b/lux/action/temporal.py @@ -61,10 +61,6 @@ def temporal(ldf): + " followed by trends across other timescales (e.g., year, month, week, day)." ) - # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) - if len(ldf) < 3: - recommendation["collection"] = [] - return recommendation vlist.sort() recommendation["collection"] = vlist return recommendation diff --git a/lux/action/univariate.py b/lux/action/univariate.py index 1e6d357e..1092a3e9 100644 --- a/lux/action/univariate.py +++ b/lux/action/univariate.py @@ -58,9 +58,6 @@ def univariate(ldf, *args): "description": "Show univariate histograms of

quantitative

attributes.", "long_description": f"Distribution displays univariate histogram distributions of all quantitative attributes{examples}. Visualizations are ranked from most to least skewed.", } - # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) - if len(ldf) < 5: - ignore_rec_flag = True elif data_type_constraint == "nominal": possible_attributes = [ c for c in ldf.columns if ldf.data_type[c] == "nominal" and c != "Number of Records" @@ -89,6 +86,14 @@ def univariate(ldf, *args): "description": "Show choropleth maps of

geographic

attributes", "long_description": f"Occurence displays choropleths of averages for some geographic attribute{examples}. Visualizations are ranked by diversity of the geographic attribute.", } + elif data_type_constraint == "temporal": + intent = [lux.Clause("?", data_type="temporal")] + intent.extend(filter_specs) + recommendation = { + "action": "Temporal", + "description": "Show trends over

time-related

attributes.", + "long_description": "Temporal displays line charts for all attributes related to datetimes in the dataframe.", + } if ignore_rec_flag: recommendation["collection"] = [] return recommendation diff --git a/lux/core/frame.py b/lux/core/frame.py index 2a1359ab..dd728bbc 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -90,6 +90,7 @@ def __init__(self, *args, **kw): self._min_max = None self.pre_aggregated = None self._type_override = {} + self.loading_bar = None warnings.formatwarning = lux.warning_format @property @@ -324,6 +325,7 @@ def recommendation(self): self.maintain_metadata() self.current_vis = Compiler.compile_intent(self, self._intent) self.maintain_recs() + self.compute_remaining_actions() return self._recommendation @recommendation.setter @@ -426,12 +428,32 @@ def maintain_recs(self, is_series="DataFrame"): elif not (len(rec_df) < 5 and not rec_df.pre_aggregated and not is_sql_tbl) and not ( self.index.nlevels >= 2 or self.columns.nlevels >= 2 ): - from lux.action.custom import custom_actions + from lux.action.custom import custom_action, filter_keys + + self.action_keys = filter_keys(rec_df, self.loading_bar) + + if lux.config._streaming: + # Compute one tab to display on initial widget + if len(self.action_keys) > 0: + rec = custom_action(rec_df, self.action_keys[0]) + rec_df._append_rec(rec_infolist, rec) + self.action_keys.pop(0) + + # Fill the rest of the tabs with empty (non-clickable) tabs + for action_name in self.action_keys: + rec = { + "action": action_name.capitalize(), + "description": "", + "long_description": "", + "collection": [], + } + rec_infolist.append(rec) + else: + for action_name in self.action_keys: + rec = custom_action(rec_df, action_name) + rec_df._append_rec(rec_infolist, rec) + self.action_keys = [] - # generate vis from globally registered actions and append to dataframe - custom_action_collection = custom_actions(rec_df) - for rec in custom_action_collection: - rec_df._append_rec(rec_infolist, rec) lux.config.update_actions["flag"] = False # Store _rec_info into a more user-friendly dictionary form @@ -439,17 +461,22 @@ def maintain_recs(self, is_series="DataFrame"): for rec_info in rec_infolist: action_type = rec_info["action"] vlist = rec_info["collection"] - if len(vlist) > 0: - rec_df._recommendation[action_type] = vlist + rec_df._recommendation[action_type] = vlist rec_df._rec_info = rec_infolist rec_df.show_all_column_vis() - if lux.config.render_widget: - self._widget = rec_df.render_widget() + + self._widget = rec_df.render_widget( + pandasHtml=rec_df.to_html(max_rows=10, classes="pandasStyle") + ) + # re-render widget for the current dataframe if previous rec is not recomputed elif show_prev: rec_df.show_all_column_vis() if lux.config.render_widget: - self._widget = rec_df.render_widget() + self._widget = rec_df.render_widget( + pandasHtml=rec_df.to_html(max_rows=10, classes="pandasStyle") + ) + self._recs_fresh = True ####################################################### @@ -551,10 +578,8 @@ def set_intent_on_click(self, change): intent_action = list(self._widget.selectedIntentIndex.keys())[0] vis = self._recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] self.set_intent_as_vis(vis) - - self.maintain_metadata() - self.current_vis = Compiler.compile_intent(self, self._intent) self.maintain_recs() + self.compute_remaining_actions() with self.output: clear_output() @@ -572,53 +597,51 @@ def _ipython_display_(self): if self._pandas_only: display(self.display_pandas()) self._pandas_only = False - else: - if not self.index.nlevels >= 2 or self.columns.nlevels >= 2: - self.maintain_metadata() - if self._intent != [] and (not hasattr(self, "_compiled") or not self._compiled): - from lux.processor.Compiler import Compiler + if not self.index.nlevels >= 2 or self.columns.nlevels >= 2: + self.maintain_metadata() - self.current_vis = Compiler.compile_intent(self, self._intent) + if self._intent != [] and (not hasattr(self, "_compiled") or not self._compiled): + from lux.processor.Compiler import Compiler - if lux.config.default_display == "lux": - self._toggle_pandas_display = False - else: - self._toggle_pandas_display = True + self.current_vis = Compiler.compile_intent(self, self._intent) + + self.output = widgets.Output() + display(self.output) - # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) - self.maintain_recs() + # Initialized view before actions are computed + self.loading_bar = widgets.IntProgress( + value=0, + min=0, + max=10, + description="Loading:", + bar_style="info", + style={"bar_color": "#add8e6"}, + orientation="horizontal", + ) + with self.output: + display(self.loading_bar) + + # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) + self.maintain_recs() - # Observers(callback_function, listen_to_this_variable) - self._widget.observe(self.remove_deleted_recs, names="deletedIndices") - self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") + with self.output: + clear_output() + display(self._widget) - button = widgets.Button( - description="Toggle Pandas/Lux", - layout=widgets.Layout(width="140px", top="5px"), - ) - self.output = widgets.Output() - display(button, self.output) - - def on_button_clicked(b): - with self.output: - if b: - self._toggle_pandas_display = not self._toggle_pandas_display - clear_output() - if self._toggle_pandas_display: - display(self.display_pandas()) - else: - # b.layout.display = "none" - display(self._widget) - # b.layout.display = "inline-block" - - button.on_click(on_button_clicked) - on_button_clicked(None) + # Observers(callback_function, listen_to_this_variable) + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") + + if len(self._recommendation) > 0: + if hasattr(self, "action_keys"): + self.compute_remaining_actions() except (KeyboardInterrupt, SystemExit): raise except Exception: if lux.config.pandas_fallback: + clear_output() warnings.warn( "\nUnexpected error in rendering Lux widget and recommendations. " "Falling back to Pandas display.\n" @@ -630,10 +653,33 @@ def on_button_clicked(b): else: raise + def compute_remaining_actions(self): + # Lazily load the rest of the tabs + from lux.action.custom import custom_action + + i = 1 + for action_name in self.action_keys: + rec = custom_action(self, action_name) + if rec is not None and (len(rec["collection"])) > 0: + self._rec_info.pop(i) + self._rec_info.insert(i, rec) + + vlist = self._rec_info[i]["collection"] + if len(vlist) > 0: + self._recommendation[rec["action"]] = vlist + + new_widget = self.render_widget() + self._widget.recommendations = new_widget.recommendations + self._widget.loadNewTab = action_name.capitalize() + + i += 1 + + self.action_keys = [] + def display_pandas(self): return self.to_pandas() - def render_widget(self, renderer: str = "altair", input_current_vis=""): + def render_widget(self, renderer: str = "altair", input_current_vis="", pandasHtml=""): """ Generate a LuxWidget based on the LuxDataFrame @@ -675,11 +721,15 @@ def render_widget(self, renderer: str = "altair", input_current_vis=""): import luxwidget widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) + if pandasHtml is None: + pandasHtml = "" + return luxwidget.LuxWidget( currentVis=widgetJSON["current_vis"], recommendations=widgetJSON["recommendation"], intent=LuxDataFrame.intent_to_string(self._intent), message=self._message.to_html(), + pandasHtml=pandasHtml, config={"plottingScale": lux.config.plotting_scale}, ) @@ -739,14 +789,14 @@ def rec_to_JSON(recs): rec_copy = copy.deepcopy(recs) for idx, rec in enumerate(rec_copy): - if len(rec["collection"]) > 0: - rec["vspec"] = [] - for vis in rec["collection"]: - chart = vis.to_code(language=lux.config.plotting_backend, prettyOutput=False) - rec["vspec"].append(chart) - rec_lst.append(rec) - # delete since not JSON serializable - del rec_lst[idx]["collection"] + rec["vspec"] = [] + for vis in rec["collection"]: + chart = vis.to_code(language=lux.config.plotting_backend, prettyOutput=False) + rec["vspec"].append(chart) + rec_lst.append(rec) + # delete since not JSON serializable + del rec_lst[idx]["collection"] + return rec_lst def save_as_html(self, filename: str = "export.html", output=False): @@ -762,6 +812,7 @@ def save_as_html(self, filename: str = "export.html", output=False): if self.widget is None: self.maintain_metadata() self.maintain_recs() + self.compute_remaining_actions() from ipywidgets.embed import embed_data diff --git a/lux/core/series.py b/lux/core/series.py index 75e7602d..cbd82a27 100644 --- a/lux/core/series.py +++ b/lux/core/series.py @@ -152,49 +152,36 @@ def _ipython_display_(self): print(series_repr) ldf._pandas_only = False else: - if not self.index.nlevels >= 2: - ldf.maintain_metadata() - if lux.config.default_display == "lux": - self._toggle_pandas_display = False - else: - self._toggle_pandas_display = True + # Initialized view before actions are computed + self.loadingBar = widgets.IntProgress( + value=0, + min=0, + max=10, + description="Loading:", + bar_style="info", + style={"bar_color": "#add8e6"}, + orientation="horizontal", + ) + display(self.loadingBar) + + if ldf._intent != [] and (not hasattr(ldf, "_compiled") or not ldf._compiled): + from lux.processor.Compiler import Compiler + + ldf.current_vis = Compiler.compile_intent(ldf, ldf._intent) # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) ldf.maintain_recs(is_series="Series") + clear_output() + display(self._widget) + # Observers(callback_function, listen_to_this_variable) ldf._widget.observe(ldf.remove_deleted_recs, names="deletedIndices") ldf._widget.observe(ldf.set_intent_on_click, names="selectedIntentIndex") - self._widget = ldf._widget - self._recommendation = ldf._recommendation - - # box = widgets.Box(layout=widgets.Layout(display='inline')) - button = widgets.Button( - description="Toggle Pandas/Lux", - layout=widgets.Layout(width="140px", top="5px"), - ) - ldf.output = widgets.Output() - # box.children = [button,output] - # output.children = [button] - # display(box) - display(button, ldf.output) - - def on_button_clicked(b): - with ldf.output: - if b: - self._toggle_pandas_display = not self._toggle_pandas_display - clear_output() - if self._toggle_pandas_display: - print(series_repr) - else: - # b.layout.display = "none" - display(ldf._widget) - # b.layout.display = "inline-block" - - button.on_click(on_button_clicked) - on_button_clicked(None) + if len(ldf._widget.recommendations) <= 1 and hasattr(ldf, "action_keys"): + ldf.compute_remaining_actions() except (KeyboardInterrupt, SystemExit): raise diff --git a/lux/vis/Vis.py b/lux/vis/Vis.py index ea5eb9f7..3c090f73 100644 --- a/lux/vis/Vis.py +++ b/lux/vis/Vis.py @@ -131,6 +131,7 @@ def _ipython_display_(self): recommendations=[], intent="", message="", + pandasHtml="", config={"plottingScale": lux.config.plotting_scale}, ) display(widget) diff --git a/lux/vis/VisList.py b/lux/vis/VisList.py index 97e51eb3..db77fa58 100644 --- a/lux/vis/VisList.py +++ b/lux/vis/VisList.py @@ -277,6 +277,7 @@ def _ipython_display_(self): recommendations=recJSON, intent="", message="", + pandasHtml="", config={"plottingScale": lux.config.plotting_scale}, ) display(self._widget) diff --git a/tests/test_action.py b/tests/test_action.py index b1d5ddbd..cccd0dd6 100644 --- a/tests/test_action.py +++ b/tests/test_action.py @@ -56,7 +56,7 @@ def test_temporal_action(global_var): test_data_vis_count = [4, 4, 2, 1, 1] for entry in zip(test_data, test_data_vis_count): df, num_vis = entry[0], entry[1] - df._repr_html_() + df._ipython_display_() assert ("Temporal" in df.recommendation, "Temporal visualizations should be generated.") recommended = df.recommendation["Temporal"] assert (len(recommended) == num_vis, "Incorrect number of temporal visualizations generated.") @@ -220,8 +220,10 @@ def test_custom_aggregation(global_var): df = pytest.college_df df.set_intent(["HighestDegree", lux.Clause("AverageCost", aggregation=np.ptp)]) + df._ipython_display_() assert list(df.recommendation.keys()) == ["Enhance", "Filter", "Generalize"] + df.clear_intent() diff --git a/tests/test_columns.py b/tests/test_columns.py index 37361831..8fe2007c 100644 --- a/tests/test_columns.py +++ b/tests/test_columns.py @@ -126,22 +126,24 @@ def test_abbrev_agg(): def test_int_columns(global_var): df = pd.read_csv("lux/data/college.csv") df.columns = range(len(df.columns)) - assert list(df.recommendation.keys()) == ["Correlation", "Distribution", "Occurrence"] + assert set(df.recommendation.keys()) == set(["Correlation", "Distribution", "Occurrence"]) df.intent = [8, 3] - assert list(df.recommendation.keys()) == ["Enhance", "Filter", "Generalize"] + assert set(df.recommendation.keys()) == set(["Enhance", "Filter", "Generalize"]) df.intent = [0] - assert list(df.recommendation.keys()) == ["Enhance", "Filter"] + assert set(df.recommendation.keys()) == set(["Enhance", "Filter"]) def test_name_column(global_var): df = pd.read_csv("lux/data/car.csv") new_df = df.rename(columns={"Name": "name"}) - assert list(new_df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(new_df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(new_df.recommendation["Correlation"]) assert new_df["name"][0] != None assert (new_df["name"].unique() != None)[0] diff --git a/tests/test_config.py b/tests/test_config.py index 701b4c6e..b93f543c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -229,6 +229,7 @@ def test_sampling_flag_config(): N = int(1.1 * lux.config.sampling_cap) df = pd.DataFrame({"col1": np.random.rand(N), "col2": np.random.rand(N)}) df.maintain_recs() + df.compute_remaining_actions() assert len(df.recommendation["Correlation"][0].data) == lux.config.sampling_cap lux.config.sampling = True lux.config.heatmap = True diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 6f3dea8c..f306b794 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -53,4 +53,4 @@ def test_convert_dtype(global_var): df = pytest.college_df cdf = df.convert_dtypes() cdf._ipython_display_() - assert list(cdf.recommendation.keys()) == ["Correlation", "Distribution", "Occurrence"] + assert set(cdf.recommendation.keys()) == set(["Correlation", "Distribution", "Occurrence"]) diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index 7e7cd8c4..d2c0d4de 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -132,12 +132,15 @@ def test_rename3(global_var): "col10", ] df._ipython_display_() - assert list(df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) + assert len(df.cardinality) == 10 assert "col2" in list(df.cardinality.keys()) @@ -147,12 +150,16 @@ def test_concat(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = pd.concat([df.loc[:, "Name":"Cylinders"], df.loc[:, "Year":"Origin"]], axis="columns") + new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == [ - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(new_df.recommendation.keys()) == set( + [ + "Distribution", + "Occurrence", + "Temporal", + ] + ) + assert len(new_df.cardinality) == 5 @@ -220,7 +227,7 @@ def test_groupby_agg_very_small(global_var): # df["Year"] = pd.to_datetime(df["Year"], format='%Y') # new_df = df.groupby(["Year", "Cylinders"]).agg(sum).stack().reset_index() # new_df._ipython_display_() -# assert list(new_df.recommendation.keys() ) == ['Column Groups'] # TODO +# assert set(new_df.recommendation.keys() ) == set(['Column Groups']) # TODO # assert len(new_df.cardinality) == 7 # TODO @@ -229,12 +236,15 @@ def test_query(global_var): df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.query("Weight > 3000") new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(new_df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) + assert len(new_df.cardinality) == 10 @@ -243,12 +253,14 @@ def test_pop(global_var): df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.pop("Weight") df._ipython_display_() - assert list(df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(df.cardinality) == 9 @@ -257,7 +269,7 @@ def test_transform(global_var): df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.iloc[:, 1:].groupby("Origin").transform(sum) new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == ["Occurrence"] + assert set(new_df.recommendation.keys()) == set(["Occurrence"]) assert len(new_df.cardinality) == 7 @@ -267,12 +279,16 @@ def test_get_group(global_var): gbobj = df.groupby("Origin") new_df = gbobj.get_group("Japan") new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + new_df._ipython_display_() + assert set(new_df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) + assert len(new_df.cardinality) == 10 @@ -282,12 +298,14 @@ def test_applymap(global_var): mapping = {"USA": 0, "Europe": 1, "Japan": 2} df["Origin"] = df[["Origin"]].applymap(mapping.get) df._ipython_display_() - assert list(df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(df.cardinality) == 10 @@ -296,12 +314,14 @@ def test_strcat(global_var): df["Year"] = pd.to_datetime(df["Year"], format="%Y") df["combined"] = df["Origin"].str.cat(df["Brand"], sep=", ") df._ipython_display_() - assert list(df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(df.cardinality) == 11 @@ -314,7 +334,7 @@ def test_named_agg(global_var): mean_displacement=("Displacement", "mean"), ) new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == ["Column Groups"] + assert set(new_df.recommendation.keys()) == set(["Column Groups"]) assert len(new_df.cardinality) == 4 @@ -323,12 +343,14 @@ def test_change_dtype(global_var): df["Year"] = pd.to_datetime(df["Year"], format="%Y") df["Cylinders"] = pd.Series(df["Cylinders"], dtype="Int64") df._ipython_display_() - assert list(df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(df.data_type) == 10 @@ -337,12 +359,14 @@ def test_get_dummies(global_var): df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = pd.get_dummies(df) new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(new_df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(new_df.data_type) == 339 @@ -352,12 +376,15 @@ def test_drop(global_var): new_df = df.drop([0, 1, 2], axis="rows") new_df2 = new_df.drop(["Name", "MilesPerGal", "Cylinders"], axis="columns") new_df2._ipython_display_() - assert list(new_df2.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(new_df2.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) + assert len(new_df2.cardinality) == 7 @@ -367,12 +394,15 @@ def test_merge(global_var): new_df = df.drop([0, 1, 2], axis="rows") new_df2 = pd.merge(df, new_df, how="left", indicator=True) new_df2._ipython_display_() - assert list(new_df2.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] # TODO once bug is fixed + assert set(new_df2.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) # TODO once bug is fixed + assert len(new_df2.cardinality) == 11 @@ -381,12 +411,15 @@ def test_prefix(global_var): df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.add_prefix("1_") new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(new_df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) + assert len(new_df.cardinality) == 10 assert new_df.cardinality["1_Name"] == 300 @@ -396,32 +429,36 @@ def test_loc(global_var): df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.loc[:, "Displacement":"Origin"] new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(new_df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(new_df.cardinality) == 6 new_df = df.loc[0:10, "Displacement":"Origin"] new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(new_df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(new_df.cardinality) == 6 new_df = df.loc[0:10, "Displacement":"Horsepower"] new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == ["Correlation", "Distribution"] + assert set(new_df.recommendation.keys()) == set(["Correlation", "Distribution"]) assert len(new_df.cardinality) == 2 import numpy as np inter_df = df.groupby("Brand")[["Acceleration", "Weight", "Horsepower"]].agg(np.mean) new_df = inter_df.loc["chevrolet":"fiat", "Acceleration":"Weight"] new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == ["Column Groups"] + assert set(new_df.recommendation.keys()) == set(["Column Groups"]) assert len(new_df.cardinality) == 3 @@ -430,32 +467,36 @@ def test_iloc(global_var): df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.iloc[:, 3:9] new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(new_df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(new_df.cardinality) == 6 new_df = df.iloc[0:11, 3:9] new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(new_df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(new_df.cardinality) == 6 new_df = df.iloc[0:11, 3:5] new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == ["Correlation", "Distribution"] + assert set(new_df.recommendation.keys()) == set(["Correlation", "Distribution"]) assert len(new_df.cardinality) == 2 import numpy as np inter_df = df.groupby("Brand")[["Acceleration", "Weight", "Horsepower"]].agg(np.mean) new_df = inter_df.iloc[5:10, 0:2] new_df._ipython_display_() - assert list(new_df.recommendation.keys()) == ["Column Groups"] + assert set(new_df.recommendation.keys()) == set(["Column Groups"]) assert len(new_df.cardinality) == 3 @@ -638,12 +679,14 @@ def test_read_json(global_var): url = "https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/car.json" df = pd.read_json(url) df._ipython_display_() - assert list(df.recommendation.keys()) == [ - "Correlation", - "Distribution", - "Occurrence", - "Temporal", - ] + assert set(df.recommendation.keys()) == set( + [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] + ) assert len(df.data_type) == 10 @@ -651,7 +694,7 @@ def test_read_sas(global_var): url = "https://github.com/lux-org/lux-datasets/blob/master/data/airline.sas7bdat?raw=true" df = pd.read_sas(url, format="sas7bdat") df._ipython_display_() - assert list(df.recommendation.keys()) == ["Correlation", "Distribution", "Temporal"] + assert set(df.recommendation.keys()) == set(["Correlation", "Distribution", "Temporal"]) assert len(df.data_type) == 6 diff --git a/tests/test_performance.py b/tests/test_performance.py index a1d65122..4634688d 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -25,9 +25,11 @@ def test_lazy_maintain_performance_census(global_var): df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true") tic = time.perf_counter() df.maintain_recs() + df.compute_remaining_actions() toc = time.perf_counter() delta = toc - tic df.maintain_recs() + df.compute_remaining_actions() toc2 = time.perf_counter() delta2 = toc2 - toc print(f"1st display Performance: {delta:0.4f} seconds") @@ -42,9 +44,11 @@ def test_lazy_maintain_performance_census(global_var): lux.config.lazy_maintain = False tic = time.perf_counter() df.maintain_recs() + df.compute_remaining_actions() toc = time.perf_counter() delta = toc - tic df.maintain_recs() + df.compute_remaining_actions() toc2 = time.perf_counter() delta2 = toc2 - toc print(f"1st display Performance: {delta:0.4f} seconds") diff --git a/tests/test_type.py b/tests/test_type.py index 5395c661..953457c5 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -230,8 +230,8 @@ def test_set_data_type(): ) with pytest.warns(UserWarning) as w: df._ipython_display_() - assert "starter template that you can use" in str(w[-1].message) - assert "df.set_data_type" in str(w[-1].message) + assert "df.set_data_type" in str(w[0].message) + assert "starter template that you can use" in str(w[0].message) df.set_data_type({"Month": "nominal", "Year": "nominal"}) assert df.data_type["Month"] == "nominal"