Skip to content

Commit

Permalink
FIX-#431: fix bug with caching sampled df
Browse files Browse the repository at this point in the history
Signed-off-by: Kunal Agarwal <[email protected]>
  • Loading branch information
westernguy2 committed Jan 31, 2022
1 parent 222037a commit c696b2b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
13 changes: 11 additions & 2 deletions lux/executor/PandasExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ def execute_sampling(ldf: LuxDataFrame):
if SAMPLE_FLAG and len(ldf) > SAMPLE_THRESH:
if ldf._sampled is None: # memoize unfiltered sample df
final_df = ldf.sample(n=SAMPLE_THRESH, random_state=1)
ldf._sampled = final_df
else:
final_df = ldf._sampled
ldf._message.add_unique(
f"Large dataframe detected: Lux is only visualizing a sample of {SAMPLE_THRESH} rows.",
priority=99,
Expand Down Expand Up @@ -105,7 +108,6 @@ def execute(vislist: VisList, ldf: LuxDataFrame, approx=False):
-------
None
"""

for vis in vislist:
# The vis data starts off being original or sampled dataframe
vis._source = ldf
Expand Down Expand Up @@ -375,7 +377,7 @@ def execute_2D_binning(vis: Vis) -> None:
with pd.option_context("mode.chained_assignment", None):
x_attr = vis.get_attr_by_channel("x")[0].attribute
y_attr = vis.get_attr_by_channel("y")[0].attribute

vis._vis_data = vis._vis_data.copy()
vis._vis_data["xBin"] = pd.cut(vis._vis_data[x_attr], bins=lux.config.heatmap_bin_size)
vis._vis_data["yBin"] = pd.cut(vis._vis_data[y_attr], bins=lux.config.heatmap_bin_size)

Expand Down Expand Up @@ -560,3 +562,10 @@ def compute_stats(self, ldf: LuxDataFrame):
index_column_name = ldf_sampled.index.name
ldf.unique_values[index_column_name] = list(ldf_sampled.index)
ldf.cardinality[index_column_name] = len(ldf_sampled.index)

#propogate computed stats to sampled df
if ldf._sampled is not None:
ldf._sampled.unique_values = ldf.unique_values
ldf._sampled._min_max = ldf._min_max
ldf._sampled.cardinality = ldf.cardinality
ldf._sampled._length = ldf._length
2 changes: 1 addition & 1 deletion tests/test_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_temporal_action(global_var):
test_data_vis_count = [4, 4, 2, 1, 1]
for entry in zip(test_data, test_data_vis_count):
df, num_vis = entry[0], entry[1]
df._repr_html_()
df._ipython_display_()
assert ("Temporal" in df.recommendation, "Temporal visualizations should be generated.")
recommended = df.recommendation["Temporal"]
assert (len(recommended) == num_vis, "Incorrect number of temporal visualizations generated.")
Expand Down

0 comments on commit c696b2b

Please sign in to comment.