Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(experiments): Support for data warehouse experiments #26247

Closed
wants to merge 39 commits into from
Closed
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
4f96688
Treat data_warehouse like actions and events on the frontend
danielbachhuber Nov 18, 2024
6edcb53
Merge branch 'master' into experiments/data-warehouse-support-v2
danielbachhuber Nov 18, 2024
2f9861d
Merge branch 'master' into experiments/data-warehouse-support-v2
danielbachhuber Nov 19, 2024
5729a90
Some todos
danielbachhuber Nov 19, 2024
84351e2
One more note
danielbachhuber Nov 19, 2024
351df62
Merge branch 'master' into experiments/data-warehouse-support-v2
danielbachhuber Nov 19, 2024
ad7abc7
Switch to named types so it's more obvious what the values can be
danielbachhuber Nov 19, 2024
0938f95
Named types are no bueno here
danielbachhuber Nov 19, 2024
a1a915d
First pass at joining to the data warehouse table
danielbachhuber Nov 19, 2024
c2a427d
Disable this for now
danielbachhuber Nov 19, 2024
f2b59c3
Split a multi-part breakdown field
danielbachhuber Nov 19, 2024
71b8514
Restore properties
danielbachhuber Nov 19, 2024
ba7e369
Bail early if `breakdown_key` isn't present
danielbachhuber Nov 19, 2024
a6785ad
add test
EDsCODE Nov 20, 2024
8a05eb3
Merge branch 'master' into experiments/data-warehouse-support-v2
danielbachhuber Nov 20, 2024
daedb67
Remove backticks because keys are split by the period
danielbachhuber Nov 20, 2024
f4d2e43
Merge branch 'master' into experiments/data-warehouse-support-v2
danielbachhuber Nov 20, 2024
d711274
Merge branch 'master' into experiments/data-warehouse-support-v2
danielbachhuber Nov 20, 2024
48e4537
Fix parsing data warehouse properties with nested values
danielbachhuber Nov 20, 2024
8e1993f
Merge branch 'hogql/fix-dw-properties' into experiments/data-warehous…
danielbachhuber Nov 20, 2024
01c6dae
Abstract
danielbachhuber Nov 20, 2024
becfb7f
Revert "Abstract"
danielbachhuber Nov 20, 2024
1d496ba
Revert "Revert "Abstract""
danielbachhuber Nov 20, 2024
cc53bb6
Consolidated implementation
danielbachhuber Nov 20, 2024
8508e3b
Fix linting issues
danielbachhuber Nov 20, 2024
f187c56
Merge branch 'hogql/fix-dw-properties' into experiments/data-warehous…
danielbachhuber Nov 20, 2024
6d55621
Merge branch 'master' into experiments/data-warehouse-support-v2
danielbachhuber Nov 21, 2024
4db4021
Merge branch 'master' into experiments/data-warehouse-support-v2
danielbachhuber Nov 21, 2024
bb3dd2e
Allow data warehouse properties in TrendsQuery
danielbachhuber Nov 21, 2024
5057208
Use explicit type check
danielbachhuber Nov 21, 2024
d8531a0
Merge branch 'hogql/allow-dw-properties' into experiments/data-wareho…
danielbachhuber Nov 21, 2024
d31e887
Add a better test
danielbachhuber Nov 21, 2024
68a5d72
Revert changes to `test_trends_breakdown_with_property`
danielbachhuber Nov 21, 2024
45eff94
Properties are now considered for data warehouse series
danielbachhuber Nov 21, 2024
459a5ed
Merge branch 'hogql/allow-dw-properties' into experiments/data-wareho…
danielbachhuber Nov 21, 2024
80d42d0
Merge branch 'master' into experiments/data-warehouse-support-v2
danielbachhuber Nov 21, 2024
230a127
Update query snapshots
github-actions[bot] Nov 21, 2024
207cb7d
Update UI snapshots for `chromium` (1)
github-actions[bot] Nov 21, 2024
58bc669
Update UI snapshots for `chromium` (1)
github-actions[bot] Nov 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ee/clickhouse/views/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ def update(self, instance: Experiment, validated_data: dict, *args: Any, **kwarg
# if (
# not instance.filters.get("events")
# and not instance.filters.get("actions")
# and not instance.filters.get("data_warehouse")
# and validated_data.get("start_date")
# and not validated_data.get("filters")
# ):
Expand Down
8 changes: 7 additions & 1 deletion frontend/src/scenes/experiments/ExperimentView/Goal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,13 @@ export function MetricDisplayOld({ filters }: { filters?: FilterType }): JSX.Ele

return (
<>
{([...(filters?.events || []), ...(filters?.actions || [])] as ActionFilter[])
{(
[
...(filters?.events || []),
...(filters?.actions || []),
...(filters?.data_warehouse || []),
] as ActionFilter[]
)
.sort((a, b) => (a.order || 0) - (b.order || 0))
.map((event: ActionFilter, idx: number) => (
<div key={idx} className="mb-2">
Expand Down
19 changes: 14 additions & 5 deletions frontend/src/scenes/experiments/experimentLogic.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,8 @@ export const experimentLogic = kea<experimentLogicType>([
setExperiment: async ({ experiment }) => {
const experimentEntitiesChanged =
(experiment.filters?.events && experiment.filters.events.length > 0) ||
(experiment.filters?.actions && experiment.filters.actions.length > 0)
(experiment.filters?.actions && experiment.filters.actions.length > 0) ||
(experiment.filters?.data_warehouse && experiment.filters.data_warehouse.length > 0)

if (!experiment.filters || Object.keys(experiment.filters).length === 0) {
return
Expand All @@ -667,7 +668,9 @@ export const experimentLogic = kea<experimentLogicType>([

if (name === 'filters') {
const experimentEntitiesChanged =
(value?.events && value.events.length > 0) || (value?.actions && value.actions.length > 0)
(value?.events && value.events.length > 0) ||
(value?.actions && value.actions.length > 0) ||
(value?.data_warehouse && value.data_warehouse.length > 0)

if (!value || Object.keys(value).length === 0) {
return
Expand All @@ -685,7 +688,8 @@ export const experimentLogic = kea<experimentLogicType>([

const experimentEntitiesChanged =
(experiment.filters?.events && experiment.filters.events.length > 0) ||
(experiment.filters?.actions && experiment.filters.actions.length > 0)
(experiment.filters?.actions && experiment.filters.actions.length > 0) ||
(experiment.filters?.data_warehouse && experiment.filters.data_warehouse.length > 0)

if (!experiment.filters || Object.keys(experiment.filters).length === 0) {
return
Expand All @@ -699,7 +703,8 @@ export const experimentLogic = kea<experimentLogicType>([
const experiment = values.experiment
const experimentEntitiesChanged =
(experiment.filters?.events && experiment.filters.events.length > 0) ||
(experiment.filters?.actions && experiment.filters.actions.length > 0)
(experiment.filters?.actions && experiment.filters.actions.length > 0) ||
(experiment.filters?.data_warehouse && experiment.filters.data_warehouse.length > 0)

if (!experiment.filters || Object.keys(experiment.filters).length === 0) {
return
Expand Down Expand Up @@ -1045,7 +1050,11 @@ export const experimentLogic = kea<experimentLogicType>([
if (!filters) {
return undefined
}
entities = [...(filters?.events || []), ...(filters?.actions || [])] as ActionFilterType[]
entities = [
...(filters?.events || []),
...(filters?.actions || []),
...(filters?.data_warehouse || []),
] as ActionFilterType[]
}

// Find out if we're using count per actor math aggregates averages per user
Expand Down
129 changes: 106 additions & 23 deletions posthog/hogql_queries/experiments/experiment_trends_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from django.conf import settings
from posthog.constants import ExperimentNoResultsErrorKeys
from posthog.hogql import ast
from posthog.hogql.database.database import create_hogql_database
from posthog.hogql.database.models import LazyJoin
from posthog.hogql.database.schema.events import EventsTable
from posthog.hogql_queries.experiments import CONTROL_VARIANT_KEY
from posthog.hogql_queries.experiments.trends_statistics import (
are_results_significant,
Expand All @@ -19,6 +22,8 @@
BreakdownFilter,
CachedExperimentTrendsQueryResponse,
ChartDisplayType,
DataWarehouseNode,
DataWarehousePropertyFilter,
EventPropertyFilter,
EventsNode,
ExperimentSignificanceCode,
Expand All @@ -27,6 +32,7 @@
ExperimentVariantTrendsBaseStats,
InsightDateRange,
PropertyMathType,
PropertyOperator,
TrendsFilter,
TrendsQuery,
TrendsQueryResponse,
Expand Down Expand Up @@ -89,12 +95,18 @@
explicitDate=True,
)

def _get_breakdown_filter(self) -> BreakdownFilter:
def _get_event_breakdown_filter(self) -> BreakdownFilter:
return BreakdownFilter(
breakdown=self.breakdown_key,
breakdown_type="event",
)

def _get_data_warehouse_breakdown_filter(self, column_name: str) -> BreakdownFilter:
return BreakdownFilter(
breakdown=f"{column_name}.properties.`{self.breakdown_key}`",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jurajmajerik As it turns out, this syntax isn't supported yet. See some conversation here: https://posthog.slack.com/archives/C019RAX2XBN/p1732050126051829

breakdown_type="data_warehouse",
)

def _prepare_count_query(self) -> TrendsQuery:
"""
This method takes the raw trend query and adapts it
Expand All @@ -109,6 +121,10 @@

uses_math_aggregation = self._uses_math_aggregation_by_user_or_property_value(prepared_count_query)

# TODO:
# - Apply the column name to the breakdown filter and transform the breakdown type to datawarehouse_property
# - Apply the column name to the properties filter and transform the property type to datawarehouse property

# :TRICKY: for `avg` aggregation, use `sum` data as an approximation
if prepared_count_query.series[0].math == PropertyMathType.AVG:
prepared_count_query.series[0].math = PropertyMathType.SUM
Expand All @@ -118,15 +134,35 @@

prepared_count_query.trendsFilter = TrendsFilter(display=ChartDisplayType.ACTIONS_LINE_GRAPH_CUMULATIVE)
prepared_count_query.dateRange = self._get_insight_date_range()
prepared_count_query.breakdownFilter = self._get_breakdown_filter()
prepared_count_query.properties = [
EventPropertyFilter(
key=self.breakdown_key,
value=self.variants,
operator="exact",
type="event",
)
]
if self._is_data_warehouse_query(prepared_count_query):
column_name = self._get_data_warehouse_events_column_name(prepared_count_query)
if not column_name:
raise ValueError("Data warehouse table is expected to have a lazy join to the events table")
prepared_count_query.breakdownFilter = self._get_data_warehouse_breakdown_filter(column_name)
prepared_count_query.properties = [
DataWarehousePropertyFilter(
key=f"{column_name}.event",
value="$feature_flag_called",
operator=PropertyOperator.EXACT,
type="data_warehouse",
),
DataWarehousePropertyFilter(
key=f"{column_name}.properties.`{self.breakdown_key}`",
value=self.variants,
operator=PropertyOperator.EXACT,
type="data_warehouse",
),
]
else:
prepared_count_query.breakdownFilter = self._get_event_breakdown_filter()
prepared_count_query.properties = [
EventPropertyFilter(
key=self.breakdown_key,
value=self.variants,
operator=PropertyOperator.EXACT,
type="event",
)
]

return prepared_count_query

Expand All @@ -152,7 +188,7 @@

if hasattr(count_event, "event"):
prepared_exposure_query.dateRange = self._get_insight_date_range()
prepared_exposure_query.breakdownFilter = self._get_breakdown_filter()
prepared_exposure_query.breakdownFilter = self._get_event_breakdown_filter()
prepared_exposure_query.trendsFilter = TrendsFilter(
display=ChartDisplayType.ACTIONS_LINE_GRAPH_CUMULATIVE
)
Expand All @@ -166,7 +202,7 @@
EventPropertyFilter(
key=self.breakdown_key,
value=self.variants,
operator="exact",
operator=PropertyOperator.EXACT,
type="event",
)
]
Expand All @@ -177,16 +213,30 @@
elif self.query.exposure_query:
prepared_exposure_query = TrendsQuery(**self.query.exposure_query.model_dump())
prepared_exposure_query.dateRange = self._get_insight_date_range()
prepared_exposure_query.breakdownFilter = self._get_breakdown_filter()
prepared_exposure_query.trendsFilter = TrendsFilter(display=ChartDisplayType.ACTIONS_LINE_GRAPH_CUMULATIVE)
prepared_exposure_query.properties = [
EventPropertyFilter(
key=self.breakdown_key,
value=self.variants,
operator="exact",
type="event",
)
]
if self._is_data_warehouse_query(prepared_exposure_query):
column_name = self._get_data_warehouse_events_column_name(prepared_exposure_query)
if not column_name:
raise ValueError("Data warehouse table is expected to have a lazy join to the events table")
prepared_exposure_query.breakdownFilter = self._get_data_warehouse_breakdown_filter(column_name)
prepared_exposure_query.properties = [
DataWarehousePropertyFilter(
key=f"properties.`{self.breakdown_key}`",
value=self.variants,
operator=PropertyOperator.EXACT,
type="data_warehouse",
)
]
else:
prepared_exposure_query.breakdownFilter = self._get_event_breakdown_filter()
prepared_exposure_query.properties = [
EventPropertyFilter(
key=self.breakdown_key,
value=self.variants,
operator=PropertyOperator.EXACT,
type="event",
)
]
# 3. Otherwise, we construct a default exposure query: unique users for the $feature_flag_called event
else:
prepared_exposure_query = TrendsQuery(
Expand All @@ -206,13 +256,13 @@
EventPropertyFilter(
key="$feature_flag_response",
value=self.variants,
operator="exact",
operator=PropertyOperator.EXACT,
type="event",
),
EventPropertyFilter(
key="$feature_flag",
value=[self.feature_flag.key],
operator="exact",
operator=PropertyOperator.EXACT,
type="event",
),
],
Expand Down Expand Up @@ -362,5 +412,38 @@
if has_errors:
raise ValidationError(detail=json.dumps(errors))

def _is_data_warehouse_query(self, query: TrendsQuery) -> bool:
return any(isinstance(series, DataWarehouseNode) for series in query.series)

def _get_data_warehouse_events_column_name(self, query: TrendsQuery) -> Optional[str]:
if hasattr(self, "data_warehouse_events_column_name"):
return self.data_warehouse_events_column_name

Check failure on line 420 in posthog/hogql_queries/experiments/experiment_trends_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Cannot determine type of "data_warehouse_events_column_name"

self.data_warehouse_events_column_name = None

try:
if not query.series:
raise ValueError("Query series is empty")

database = create_hogql_database(self.team.pk)
if not database:
raise ValueError("Failed to create HogQL database")

table_name = query.series[0].table_name

Check failure on line 432 in posthog/hogql_queries/experiments/experiment_trends_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Item "EventsNode" of "EventsNode | ActionsNode | DataWarehouseNode" has no attribute "table_name"

Check failure on line 432 in posthog/hogql_queries/experiments/experiment_trends_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Item "ActionsNode" of "EventsNode | ActionsNode | DataWarehouseNode" has no attribute "table_name"
if table_name not in database.model_extra:

Check failure on line 433 in posthog/hogql_queries/experiments/experiment_trends_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Unsupported right operand type for in ("dict[str, Any] | None")
raise ValueError(f"Table '{table_name}' not found in database model")

table_fields = database.model_extra[table_name].fields

Check failure on line 436 in posthog/hogql_queries/experiments/experiment_trends_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Value of type "dict[str, Any] | None" is not indexable
for field_name, field in table_fields.items():
if isinstance(field, LazyJoin) and isinstance(field.join_table, EventsTable):
self.data_warehouse_events_column_name = field_name
break

return self.data_warehouse_events_column_name

except Exception as e:
# You might want to log the error here
raise ValueError(f"Failed to get data warehouse events column name: {str(e)}") from e

def to_query(self) -> ast.SelectQuery:
raise ValueError(f"Cannot convert source query of type {self.query.count_query.kind} to query")
11 changes: 10 additions & 1 deletion posthog/hogql_queries/insights/trends/trends_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,7 +888,16 @@ def _is_breakdown_filter_field_boolean(self):
if not table_or_view:
raise ValueError(f"Table {series.table_name} not found")

field_type = dict(table_or_view.columns)[self.query.breakdownFilter.breakdown]["clickhouse"]
breakdown_key = (
self.query.breakdownFilter.breakdown[0]
if isinstance(self.query.breakdownFilter.breakdown, list)
else self.query.breakdownFilter.breakdown
)

if breakdown_key not in dict(table_or_view.columns):
return False

field_type = dict(table_or_view.columns)[breakdown_key]["clickhouse"]

if field_type.startswith("Nullable("):
field_type = field_type.replace("Nullable(", "")[:-1]
Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql_queries/insights/trends/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def get_properties_chain(
raise Exception("group_type_index missing from params")

if breakdown_type == "data_warehouse":
return [breakdown_field]
return [*breakdown_field.split(".")]

if breakdown_type == "data_warehouse_person_property":
return ["person", *breakdown_field.split(".")]
Expand Down
Loading