Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazy Streaming of Tabs #291

Open
wants to merge 52 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
b2563e6
changed button to tab and lazily load widget
Feb 16, 2021
381b129
adding tabbing
Feb 17, 2021
f851096
removed comments
Feb 21, 2021
bcb8917
Merge branch 'master' into lazy_stream
Feb 21, 2021
b024cfa
some progress on lazy streaming
Feb 22, 2021
f90d5b5
added datetime check for temporal action before processing
Feb 23, 2021
2c2a59f
reversed order of actions to put correlation at the back
Feb 23, 2021
83a19d2
changed to pushing out tabs as they are done
Mar 2, 2021
4c59f84
Merge branch 'master' into lazy_stream
Mar 2, 2021
54083b7
debugging intent
Mar 2, 2021
bd27bd9
fixed intent bug
Mar 2, 2021
693252d
changed tests to use set and fixed bugs
Mar 2, 2021
0687fd7
added bakc if statement
Mar 2, 2021
f6378ea
updated test due to ordering of tabs
Mar 2, 2021
a1e56fd
fixed merge conflicts
Mar 3, 2021
bda3c4b
fixed merge conflicts
Mar 4, 2021
1d2555e
Merge branch 'master' into lazy_stream
Mar 7, 2021
a420a64
added config to turn off streaming, support for greyed tabs
Mar 11, 2021
9398896
action logic
Mar 14, 2021
1b5c923
changed to pre-displaying tabs
Mar 14, 2021
4d2be94
override css
Mar 14, 2021
3fe6e68
Merge branch 'master' into lazy_stream
Mar 14, 2021
e60ef84
reformatted with black
Mar 14, 2021
86d6c91
code cleanup
Mar 14, 2021
e61bda7
changed back to repr_html
Mar 14, 2021
f772fc7
fixed html
Mar 14, 2021
c485d2d
reformatted with black
Mar 14, 2021
129d7c8
fixed merge conflicts
Apr 28, 2021
593e6b0
some updates
Apr 29, 2021
7dd15b2
loadingbar added
Apr 29, 2021
a6d05ba
merged into master
Apr 29, 2021
a70a9e7
latest commit
Apr 29, 2021
b2f272f
Delete communities.csv
jakeatgalileo Apr 29, 2021
c60cfc0
Fixed a couple merge conflicts
Apr 29, 2021
cc4129b
added in css for pandas
Apr 30, 2021
e6c5720
merged into master
May 1, 2021
f226cca
formatted with black
May 1, 2021
3ef1f7b
fixed some tests
May 3, 2021
0c34bb5
updated display
May 3, 2021
aef50f8
progress on temporal
May 4, 2021
4d32f1f
fixed pandas coverage tests
May 4, 2021
45f8330
Merge branch 'master' into lazy_stream
May 4, 2021
8493b91
progress
May 5, 2021
00bdf3d
Merge branch 'master' into lazy_stream
May 5, 2021
07c47ba
fixed last tests
May 5, 2021
9ce4681
readded output widget for intent button
May 5, 2021
998086a
updated per comments
May 6, 2021
5aea05b
reformatted with black
May 6, 2021
16d2880
fixed merge conflicts
Jun 28, 2021
60aab30
fixed loading bar bug
Jun 30, 2021
c74b1aa
Merge branch 'master' into lazy_stream
Jul 1, 2021
5f0f9c2
ran black
Jul 3, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lux/_config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def __init__(self):
self._pandas_fallback = True
self._interestingness_fallback = True
self.heatmap_bin_size = 40
self._streaming = True

#####################################
#### Optimization Configurations ####
#####################################
Expand Down
7 changes: 0 additions & 7 deletions lux/action/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,6 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
{examples}. The visualizations are ranked from most to least linearly correlated based on \
their Pearson’s correlation score.",
}
ignore_rec_flag = False
# Doesn't make sense to compute correlation if less than 4 data values
if len(ldf) < 5:
ignore_rec_flag = True
# Then use the data populated in the vis list to compute score
for vis in vlist:
measures = vis.get_attr_by_data_model("measure")
Expand All @@ -81,9 +77,6 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
vis.score = interestingness(vis, ldf)
else:
vis.score = -1
if ignore_rec_flag:
recommendation["collection"] = []
return recommendation
vlist.sort()
vlist = vlist.showK()
recommendation["collection"] = vlist
Expand Down
63 changes: 47 additions & 16 deletions lux/action/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,36 +45,67 @@ def custom(ldf):
lux.config.executor.execute(vlist, ldf)
for vis in vlist:
vis.score = interestingness(vis, ldf)
# ldf.clear_intent()
vlist.sort(remove_invalid=True)
return recommendation


def custom_actions(ldf):
def custom_action(ldf, action):
"""
Generates user-defined vis based on globally defined actions.
Computing initial custom_action for lazy streaming of the rest of the actions

Parameters
----------
ldf : lux.core.frame
LuxDataFrame with underspecified intent.

action: action_name as string
e.g "Correlation"

Returns
-------
recommendations : Dict[str,obj]
object with a collection of visualizations that were previously registered.
One recommendation
"""
recommendation = None
display_condition = lux.config.actions[action].display_condition
if display_condition is None or (display_condition is not None and display_condition(ldf)):
args = lux.config.actions[action].args
if args:
recommendation = lux.config.actions[action].action(ldf, args)
else:
recommendation = lux.config.actions[action].action(ldf)
return recommendation


def filter_keys(ldf, loading_bar=None):
"""
if len(lux.config.actions) > 0 and (len(ldf) > 0 or lux.config.executor.name != "PandasExecutor"):
recommendations = []
Filters out actions before beginning computations so we know which tabs to display.
Logic to filter out actions in lux/action/default.py
"""

keys = []
data_types = set(ldf._data_type.values())
progress = 0
if loading_bar is not None:
loading_bar.max = len(lux.config.actions.keys())
if len(ldf) > 0 or lux.config.executor.name != "PandasExecutor":
for action_name in lux.config.actions.keys():
display_condition = lux.config.actions[action_name].display_condition
if display_condition is None or (display_condition is not None and display_condition(ldf)):
args = lux.config.actions[action_name].args
if args:
recommendation = lux.config.actions[action_name].action(ldf, args)
else:
recommendation = lux.config.actions[action_name].action(ldf)
recommendations.append(recommendation)
return recommendations
else:
return []
if lux.config.actions[action_name].args:
if not lux.config.actions[action_name].args[0] in data_types:
continue
keys.append(action_name)
progress += 1
if loading_bar is not None:
loading_bar.value = progress

# # Pushing back correlation and geographical actions for performance reasons
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do we currently determine what to compute first v.s. what to compute lazily? Or for now, are we explicitly saying that "correlation" and "geographical" will compute later?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now, actions are checked in alphabetical order (with correlation being first). I manually put correlation and geographical later because i noticed those two take the longest to compute/render. The order in which we check/compute is very flexible as we can just move the action names around in the list.

if "correlation" in keys:
keys.pop(keys.index("correlation"))
keys.append("correlation")

if "geographical" in keys:
keys.pop(keys.index("geographical"))
keys.append("geographical")

return keys
127 changes: 113 additions & 14 deletions lux/action/default.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import lux
from lux.action.custom import custom
from lux.action.correlation import correlation
from lux.action.univariate import univariate
from lux.action.enhance import enhance
from lux.action.filter import add_filter
from lux.action.generalize import generalize
from lux.action.temporal import temporal, create_temporal_vis
from lux.utils import utils
from lux.vis.VisList import VisList
from lux.interestingness.interestingness import interestingness


def register_default_actions():
import lux
from lux.action.custom import custom
from lux.action.correlation import correlation
from lux.action.univariate import univariate
from lux.action.enhance import enhance
from lux.action.filter import add_filter
from lux.action.generalize import generalize
from lux.action.temporal import temporal

# display conditions for default actions
no_vis = lambda ldf: (ldf.current_vis is None) or (
Expand All @@ -16,14 +21,108 @@ def register_default_actions():
multiple_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) > 1

# globally register default actions
lux.config.register_action("correlation", correlation, no_vis)
lux.config.register_action("distribution", univariate, no_vis, "quantitative")
lux.config.register_action("occurrence", univariate, no_vis, "nominal")
lux.config.register_action("temporal", temporal, no_vis)
lux.config.register_action("correlation", correlation, correlation_check)
lux.config.register_action("distribution", univariate, distribution_check, "quantitative")
lux.config.register_action("occurrence", univariate, occurence_check, "nominal")
lux.config.register_action("temporal", temporal, temporal_check)
lux.config.register_action("geographical", univariate, no_vis, "geographical")

lux.config.register_action("Enhance", enhance, one_current_vis)
lux.config.register_action("Enhance", enhance, enhance_check)
lux.config.register_action("Filter", add_filter, one_current_vis)
lux.config.register_action("Generalize", generalize, one_current_vis)
lux.config.register_action("Generalize", generalize, generalize_check)

lux.config.register_action("Custom", custom, multiple_current_vis)


def generalize_check(ldf):
filters = utils.get_filter_specs(ldf._intent)
attributes = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent))
if (len(attributes) <= 1 or len(attributes) > 4) and len(filters) == 0:
return False
else:
return ldf.current_vis is not None and len(ldf.current_vis) == 1


def correlation_check(ldf):
if len(ldf) < 5:
return False
else:
if (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0):
filter_specs = utils.get_filter_specs(ldf._intent)
intent = [
lux.Clause("?", data_model="measure"),
lux.Clause("?", data_model="measure"),
]
intent.extend(filter_specs)
vlist = VisList(intent, ldf)
if len(vlist) < 1:
return False
return True
else:
if len(ldf.columns) == 2:
return True
return False


def occurence_check(ldf):
filter_specs = utils.get_filter_specs(ldf._intent)
intent = [lux.Clause("?", data_type="nominal")]
intent.extend(filter_specs)
vlist = VisList(intent, ldf)
for vis in vlist:
vis.score = interestingness(vis, ldf)
vlist.sort()

if len(vlist) < 1:
return False
else:
return (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0)


def distribution_check(ldf):
filter_specs = utils.get_filter_specs(ldf._intent)
possible_attributes = [
c
for c in ldf.columns
if ldf.data_type[c] == "quantitative" and ldf.cardinality[c] > 5 and c != "Number of Records"
]
intent = [lux.Clause(possible_attributes)]
intent.extend(filter_specs)
vlist = VisList(intent, ldf)
if len(vlist) < 1:
return False
else:
return (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0)


def temporal_check(ldf):
# Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
if len(ldf) < 3:
return False

for c in ldf.columns:
if ldf.data_type[c] == "temporal":
return True

filter_specs = utils.get_filter_specs(ldf._intent)
intent = [lux.Clause("?", data_type="temporal")]
intent.extend(filter_specs)
vlist = VisList(intent, ldf)
for vis in vlist:
vis.score = interestingness(vis, ldf)
vlist.sort()

if len(vlist) < 1:
return False
else:
return (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0)


def enhance_check(ldf):
filters = utils.get_filter_specs(ldf._intent)
intent = ldf._intent.copy()
attr_specs = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent))
if len(attr_specs) > 2:
return False
else:
return ldf.current_vis is not None and len(ldf.current_vis) == 1
4 changes: 0 additions & 4 deletions lux/action/generalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ def generalize(ldf):
"long_description": f"Remove one aspect of the Current Vis. We can either remove an attribute or filter from {intended_attrs}.",
}
# to observe a more general trend
# if we do no have enough column attributes or too many, return no vis.
if len(attributes) < 1 or len(attributes) > 4:
recommendation["collection"] = []
return recommendation
# for each column specification, create a copy of the ldf's vis and remove the column specification
# then append the vis to the output
if len(attributes) > 1:
Expand Down
4 changes: 0 additions & 4 deletions lux/action/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,6 @@ def temporal(ldf):
+ " followed by trends across other timescales (e.g., year, month, week, day)."
)

# Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
if len(ldf) < 3:
recommendation["collection"] = []
return recommendation
vlist.sort()
recommendation["collection"] = vlist
return recommendation
Expand Down
11 changes: 8 additions & 3 deletions lux/action/univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@ def univariate(ldf, *args):
"description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes.",
"long_description": f"Distribution displays univariate histogram distributions of all quantitative attributes{examples}. Visualizations are ranked from most to least skewed.",
}
# Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
if len(ldf) < 5:
ignore_rec_flag = True
elif data_type_constraint == "nominal":
possible_attributes = [
c for c in ldf.columns if ldf.data_type[c] == "nominal" and c != "Number of Records"
Expand Down Expand Up @@ -89,6 +86,14 @@ def univariate(ldf, *args):
"description": "Show choropleth maps of <p class='highlight-descriptor'>geographic</p> attributes",
"long_description": f"Occurence displays choropleths of averages for some geographic attribute{examples}. Visualizations are ranked by diversity of the geographic attribute.",
}
elif data_type_constraint == "temporal":
intent = [lux.Clause("?", data_type="temporal")]
intent.extend(filter_specs)
recommendation = {
"action": "Temporal",
"description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.",
"long_description": "Temporal displays line charts for all attributes related to datetimes in the dataframe.",
}
if ignore_rec_flag:
recommendation["collection"] = []
return recommendation
Expand Down
Loading