From 61113ddc38ae18d293fe71e77743e6a931cee33f Mon Sep 17 00:00:00 2001 From: Kraiem Taha Yassine Date: Tue, 12 Nov 2024 18:21:18 +0100 Subject: [PATCH] Dev (#2737) * fix(chalice): fixed Math-operators validation refactor(chalice): search for sessions that have events for heatmaps * refactor(chalice): search for sessions that have at least 1 location event for heatmaps * fix(chalice): fixed Math-operators validation refactor(chalice): search for sessions that have events for heatmaps * refactor(chalice): search for sessions that have at least 1 location event for heatmaps * feat(chalice): autocomplete return top 10 with stats * fix(chalice): fixed autocomplete top 10 meta-filters * refactor(chalice): refactored sessions search exp refactor(chalice): support funnels exp --- api/auth/auth_project.py | 8 +- api/chalicelib/core/custom_metrics.py | 88 +++---- api/chalicelib/core/funnels.py | 4 +- api/chalicelib/core/significance.py | 6 +- api/routers/subs/metrics.py | 4 +- api/schemas/schemas.py | 4 +- ee/api/auth/auth_project.py | 8 +- ee/api/chalicelib/core/custom_metrics.py | 99 ++++---- ee/api/chalicelib/core/sessions_exp.py | 30 +-- ee/api/chalicelib/core/significance_exp.py | 262 ++++++++++++++++++++- ee/api/routers/subs/metrics.py | 6 +- 11 files changed, 386 insertions(+), 133 deletions(-) diff --git a/api/auth/auth_project.py b/api/auth/auth_project.py index a3ccc04e5..821dc695b 100644 --- a/api/auth/auth_project.py +++ b/api/auth/auth_project.py @@ -31,8 +31,8 @@ class ProjectAuthorizer: logger.debug(f"unauthorized project {self.project_identifier}:{value}") raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="project not found.") else: - current_project = schemas.CurrentProjectContext(projectId=current_project["projectId"], - projectKey=current_project["projectKey"], - platform=current_project["platform"], - name=current_project["name"]) + current_project = schemas.ProjectContext(projectId=current_project["projectId"], + projectKey=current_project["projectKey"], + platform=current_project["platform"], + name=current_project["name"]) request.state.currentContext.project = current_project diff --git a/api/chalicelib/core/custom_metrics.py b/api/chalicelib/core/custom_metrics.py index c8a91028b..cf485e637 100644 --- a/api/chalicelib/core/custom_metrics.py +++ b/api/chalicelib/core/custom_metrics.py @@ -10,7 +10,6 @@ from chalicelib.utils import helper, pg_client from chalicelib.utils.TimeUTC import TimeUTC logger = logging.getLogger(__name__) -PIE_CHART_GROUP = 5 # TODO: refactor this to split @@ -37,7 +36,7 @@ def __get_table_of_series(project_id, data: schemas.CardSchema): return results -def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int = None): +def __get_funnel_chart(project: schemas.ProjectContext, data: schemas.CardFunnel, user_id: int = None): if len(data.series) == 0: return { "stages": [], @@ -47,53 +46,54 @@ def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int = # return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, # data=data.series[0].filter, # metric_format=data.metric_format) - return funnels.get_simple_funnel(project_id=project_id, + return funnels.get_simple_funnel(project=project, data=data.series[0].filter, metric_format=data.metric_format) -def __get_errors_list(project_id, user_id, data: schemas.CardSchema): +def __get_errors_list(project: schemas.ProjectContext, user_id, data: schemas.CardSchema): if len(data.series) == 0: return { "total": 0, "errors": [] } - return errors.search(data.series[0].filter, project_id=project_id, user_id=user_id) + return errors.search(data.series[0].filter, project_id=project.project_id, user_id=user_id) -def __get_sessions_list(project_id, user_id, data: schemas.CardSchema): +def __get_sessions_list(project: schemas.ProjectContext, user_id, data: schemas.CardSchema): if len(data.series) == 0: logger.debug("empty series") return { "total": 0, "sessions": [] } - return sessions.search_sessions(data=data.series[0].filter, project_id=project_id, user_id=user_id) + return sessions.search_sessions(data=data.series[0].filter, project_id=project.project_id, user_id=user_id) -def __get_heat_map_chart(project_id, user_id, data: schemas.CardHeatMap, include_mobs: bool = True): +def __get_heat_map_chart(project: schemas.ProjectContext, user_id, data: schemas.CardHeatMap, + include_mobs: bool = True): if len(data.series) == 0: return None data.series[0].filter.filters += data.series[0].filter.events data.series[0].filter.events = [] - return heatmaps.search_short_session(project_id=project_id, user_id=user_id, + return heatmaps.search_short_session(project_id=project.project_id, user_id=user_id, data=schemas.HeatMapSessionsSearch( **data.series[0].filter.model_dump()), include_mobs=include_mobs) -def __get_path_analysis_chart(project_id: int, user_id: int, data: schemas.CardPathAnalysis): +def __get_path_analysis_chart(project: schemas.ProjectContext, user_id: int, data: schemas.CardPathAnalysis): if len(data.series) == 0: data.series.append( schemas.CardPathAnalysisSeriesSchema(startTimestamp=data.startTimestamp, endTimestamp=data.endTimestamp)) elif not isinstance(data.series[0].filter, schemas.PathAnalysisSchema): data.series[0].filter = schemas.PathAnalysisSchema() - return product_analytics.path_analysis(project_id=project_id, data=data) + return product_analytics.path_analysis(project_id=project.project_id, data=data) -def __get_timeseries_chart(project_id: int, data: schemas.CardTimeSeries, user_id: int = None): - series_charts = __try_live(project_id=project_id, data=data) +def __get_timeseries_chart(project: schemas.ProjectContext, data: schemas.CardTimeSeries, user_id: int = None): + series_charts = __try_live(project_id=project.project_id, data=data) results = [{}] * len(series_charts[0]) for i in range(len(results)): for j, series_chart in enumerate(series_charts): @@ -106,47 +106,47 @@ def not_supported(**args): raise Exception("not supported") -def __get_table_of_user_ids(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_user_ids(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_sessions(project_id: int, data: schemas.CardTable, user_id): - return __get_sessions_list(project_id=project_id, user_id=user_id, data=data) +def __get_table_of_sessions(project: schemas.ProjectContext, data: schemas.CardTable, user_id): + return __get_sessions_list(project=project, user_id=user_id, data=data) -def __get_table_of_errors(project_id: int, data: schemas.CardTable, user_id: int): - return __get_errors_list(project_id=project_id, user_id=user_id, data=data) +def __get_table_of_errors(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int): + return __get_errors_list(project=project, user_id=user_id, data=data) -def __get_table_of_issues(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_issues(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_browsers(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_browsers(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_devises(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_devises(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_countries(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_countries(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_urls(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_urls(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_referrers(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_referrers(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_requests(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_requests(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_chart(project_id: int, data: schemas.CardTable, user_id: int): +def __get_table_chart(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int): supported = { schemas.MetricOfTable.SESSIONS: __get_table_of_sessions, schemas.MetricOfTable.ERRORS: __get_table_of_errors, @@ -159,13 +159,13 @@ def __get_table_chart(project_id: int, data: schemas.CardTable, user_id: int): schemas.MetricOfTable.REFERRER: __get_table_of_referrers, schemas.MetricOfTable.FETCH: __get_table_of_requests } - return supported.get(data.metric_of, not_supported)(project_id=project_id, data=data, user_id=user_id) + return supported.get(data.metric_of, not_supported)(project=project, data=data, user_id=user_id) -def get_chart(project_id: int, data: schemas.CardSchema, user_id: int): +def get_chart(project: schemas.ProjectContext, data: schemas.CardSchema, user_id: int): if data.is_predefined: return custom_metrics_predefined.get_metric(key=data.metric_of, - project_id=project_id, + project_id=project.project_id, data=data.model_dump()) supported = { @@ -176,7 +176,7 @@ def get_chart(project_id: int, data: schemas.CardSchema, user_id: int): schemas.MetricType.INSIGHTS: not_supported, schemas.MetricType.PATH_ANALYSIS: __get_path_analysis_chart } - return supported.get(data.metric_type, not_supported)(project_id=project_id, data=data, user_id=user_id) + return supported.get(data.metric_type, not_supported)(project=project, data=data, user_id=user_id) def get_sessions_by_card_id(project_id, user_id, metric_id, data: schemas.CardSessionsSchema): @@ -628,8 +628,8 @@ def get_funnel_sessions_by_issue(user_id, project_id, metric_id, issue_id, "issue": issue} -def make_chart_from_card(project_id, user_id, metric_id, data: schemas.CardSessionsSchema): - raw_metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, include_data=True) +def make_chart_from_card(project: schemas.ProjectContext, user_id, metric_id, data: schemas.CardSessionsSchema): + raw_metric: dict = get_card(metric_id=metric_id, project_id=project.project_id, user_id=user_id, include_data=True) if raw_metric is None: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="card not found") @@ -641,18 +641,18 @@ def make_chart_from_card(project_id, user_id, metric_id, data: schemas.CardSessi if metric.is_predefined: return custom_metrics_predefined.get_metric(key=metric.metric_of, - project_id=project_id, + project_id=project.project_id, data=data.model_dump()) elif metric.metric_type == schemas.MetricType.HEAT_MAP: if raw_metric["data"] and raw_metric["data"].get("sessionId"): - return heatmaps.get_selected_session(project_id=project_id, + return heatmaps.get_selected_session(project_id=project.project_id, session_id=raw_metric["data"]["sessionId"]) else: - return heatmaps.search_short_session(project_id=project_id, + return heatmaps.search_short_session(project_id=project.project_id, data=schemas.HeatMapSessionsSearch(**metric.model_dump()), user_id=user_id) - return get_chart(project_id=project_id, data=metric, user_id=user_id) + return get_chart(project=project, data=metric, user_id=user_id) def card_exists(metric_id, project_id, user_id) -> bool: diff --git a/api/chalicelib/core/funnels.py b/api/chalicelib/core/funnels.py index cba19417a..ee45342b0 100644 --- a/api/chalicelib/core/funnels.py +++ b/api/chalicelib/core/funnels.py @@ -69,14 +69,14 @@ def get_issues_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchem last_stage=len(data.events)))} -def get_simple_funnel(project_id, data: schemas.CardSeriesFilterSchema, +def get_simple_funnel(project:schemas.ProjectContext, data: schemas.CardSeriesFilterSchema, metric_format: schemas.MetricExtendedFormatType): data.events = filter_stages(__parse_events(data.events)) data.events = __fix_stages(data.events) if len(data.events) == 0: return {"stages": [], "totalDropDueToIssues": 0} insights = significance.get_simple_funnel(filter_d=data, - project_id=project_id, + project=project, metric_format=metric_format) return {"stages": insights, "totalDropDueToIssues": 0} diff --git a/api/chalicelib/core/significance.py b/api/chalicelib/core/significance.py index 6ea9b6ad9..fd8a3af17 100644 --- a/api/chalicelib/core/significance.py +++ b/api/chalicelib/core/significance.py @@ -238,7 +238,7 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id) return rows -def get_simple_funnel(filter_d: schemas.CardSeriesFilterSchema, project_id: int, +def get_simple_funnel(filter_d: schemas.CardSeriesFilterSchema, project: schemas.ProjectContext, metric_format: schemas.MetricExtendedFormatType) -> List[RealDictRow]: """ Add minimal timestamp @@ -307,7 +307,7 @@ def get_simple_funnel(filter_d: schemas.CardSeriesFilterSchema, project_id: int, sh.multi_conditions(f"p.base_referrer {op} %({f_k})s", f.value, is_not=is_not, value_key=f_k)) elif filter_type == events.EventType.METADATA.ui_type: if meta_keys is None: - meta_keys = metadata.get(project_id=project_id) + meta_keys = metadata.get(project_id=project.project_id) meta_keys = {m["key"]: m["index"] for m in meta_keys} if f.source in meta_keys.keys(): first_stage_extra_constraints.append( @@ -418,7 +418,7 @@ def get_simple_funnel(filter_d: schemas.CardSeriesFilterSchema, project_id: int, FROM {n_stages_query}; """ - params = {"project_id": project_id, "startTimestamp": filter_d.startTimestamp, + params = {"project_id": project.project_id, "startTimestamp": filter_d.startTimestamp, "endTimestamp": filter_d.endTimestamp, **values} with pg_client.PostgresClient() as cur: query = cur.mogrify(n_stages_query, params) diff --git a/api/routers/subs/metrics.py b/api/routers/subs/metrics.py index 26060b667..0ed28175b 100644 --- a/api/routers/subs/metrics.py +++ b/api/routers/subs/metrics.py @@ -150,9 +150,9 @@ def get_metric_funnel_issue_sessions(projectId: int, metric_id: int, issueId: st @app.post('/{projectId}/cards/{metric_id}/chart', tags=["card"]) -def get_card_chart(projectId: int, metric_id: int, request: Request, data: schemas.CardSessionsSchema = Body(...), +def get_card_chart(projectId: int, metric_id: int, data: schemas.CardSessionsSchema = Body(...), context: schemas.CurrentContext = Depends(OR_context)): - data = custom_metrics.make_chart_from_card(project_id=projectId, user_id=context.user_id, metric_id=metric_id, + data = custom_metrics.make_chart_from_card(project=context.project, user_id=context.user_id, metric_id=metric_id, data=data) return {"data": data} diff --git a/api/schemas/schemas.py b/api/schemas/schemas.py index 8ef431904..f158e9df6 100644 --- a/api/schemas/schemas.py +++ b/api/schemas/schemas.py @@ -110,7 +110,7 @@ class CreateProjectSchema(BaseModel): _transform_name = field_validator('name', mode='before')(remove_whitespace) -class CurrentProjectContext(BaseModel): +class ProjectContext(BaseModel): project_id: int = Field(..., gt=0) project_key: str = Field(...) name: str = Field(...) @@ -119,7 +119,7 @@ class CurrentProjectContext(BaseModel): class CurrentAPIContext(BaseModel): tenant_id: int = Field(...) - project: Optional[CurrentProjectContext] = Field(default=None) + project: Optional[ProjectContext] = Field(default=None) class CurrentContext(CurrentAPIContext): diff --git a/ee/api/auth/auth_project.py b/ee/api/auth/auth_project.py index 2ddb8b32b..cc9c5ed39 100644 --- a/ee/api/auth/auth_project.py +++ b/ee/api/auth/auth_project.py @@ -40,8 +40,8 @@ class ProjectAuthorizer: logger.debug(f"unauthorized project {self.project_identifier}:{value}") raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="unauthorized project.") else: - current_project = schemas.CurrentProjectContext(projectId=current_project["projectId"], - projectKey=current_project["projectKey"], - platform=current_project["platform"], - name=current_project["name"]) + current_project = schemas.ProjectContext(projectId=current_project["projectId"], + projectKey=current_project["projectKey"], + platform=current_project["platform"], + name=current_project["name"]) request.state.currentContext.project = current_project diff --git a/ee/api/chalicelib/core/custom_metrics.py b/ee/api/chalicelib/core/custom_metrics.py index e5d11ad24..ad413997d 100644 --- a/ee/api/chalicelib/core/custom_metrics.py +++ b/ee/api/chalicelib/core/custom_metrics.py @@ -23,7 +23,6 @@ else: from chalicelib.core import sessions_legacy as sessions logger = logging.getLogger(__name__) -PIE_CHART_GROUP = 5 # TODO: refactor this to split @@ -50,69 +49,73 @@ def __get_table_of_series(project_id, data: schemas.CardSchema): return results -def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int = None): +def __get_funnel_chart(project: schemas.ProjectContext, data: schemas.CardFunnel, user_id: int = None): if len(data.series) == 0: return { "stages": [], "totalDropDueToIssues": 0 } - return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, - data=data.series[0].filter, - metric_format=data.metric_format) + # return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, + # data=data.series[0].filter, + # metric_format=data.metric_format) + return funnels.get_simple_funnel(project=project, + data=data.series[0].filter, + metric_format=data.metric_format) -def __get_errors_list(project_id, user_id, data: schemas.CardSchema): +def __get_errors_list(project: schemas.ProjectContext, user_id, data: schemas.CardSchema): if len(data.series) == 0: return { "total": 0, "errors": [] } - return errors.search(data.series[0].filter, project_id=project_id, user_id=user_id) + return errors.search(data.series[0].filter, project_id=project.project_id, user_id=user_id) -def __get_sessions_list(project_id, user_id, data: schemas.CardSchema): +def __get_sessions_list(project: schemas.ProjectContext, user_id, data: schemas.CardSchema): if len(data.series) == 0: logger.debug("empty series") return { "total": 0, "sessions": [] } - return sessions.search_sessions(data=data.series[0].filter, project_id=project_id, user_id=user_id) + return sessions.search_sessions(data=data.series[0].filter, project_id=project.project_id, user_id=user_id) -def __get_heat_map_chart(project_id, user_id, data: schemas.CardHeatMap, include_mobs: bool = True): +def __get_heat_map_chart(project: schemas.ProjectContext, user_id, data: schemas.CardHeatMap, + include_mobs: bool = True): if len(data.series) == 0: return None data.series[0].filter.filters += data.series[0].filter.events data.series[0].filter.events = [] - return heatmaps.search_short_session(project_id=project_id, user_id=user_id, + return heatmaps.search_short_session(project_id=project.project_id, user_id=user_id, data=schemas.HeatMapSessionsSearch( **data.series[0].filter.model_dump()), include_mobs=include_mobs) # EE only -def __get_insights_chart(project_id: int, data: schemas.CardInsights, user_id: int = None): - return sessions_insights.fetch_selected(project_id=project_id, +def __get_insights_chart(project: schemas.ProjectContext, data: schemas.CardInsights, user_id: int = None): + return sessions_insights.fetch_selected(project_id=project.project_id, data=schemas.GetInsightsSchema(startTimestamp=data.startTimestamp, endTimestamp=data.endTimestamp, metricValue=data.metric_value, series=data.series)) -def __get_path_analysis_chart(project_id: int, user_id: int, data: schemas.CardPathAnalysis): +def __get_path_analysis_chart(project: schemas.ProjectContext, user_id: int, data: schemas.CardPathAnalysis): if len(data.series) == 0: data.series.append( schemas.CardPathAnalysisSeriesSchema(startTimestamp=data.startTimestamp, endTimestamp=data.endTimestamp)) elif not isinstance(data.series[0].filter, schemas.PathAnalysisSchema): data.series[0].filter = schemas.PathAnalysisSchema() - return product_analytics.path_analysis(project_id=project_id, data=data) + return product_analytics.path_analysis(project_id=project.project_id, data=data) -def __get_timeseries_chart(project_id: int, data: schemas.CardTimeSeries, user_id: int = None): - series_charts = __try_live(project_id=project_id, data=data) +def __get_timeseries_chart(project: schemas.ProjectContext, data: schemas.CardTimeSeries, user_id: int = None): + series_charts = __try_live(project_id=project.project_id, data=data) results = [{}] * len(series_charts[0]) for i in range(len(results)): for j, series_chart in enumerate(series_charts): @@ -125,47 +128,47 @@ def not_supported(**args): raise Exception("not supported") -def __get_table_of_user_ids(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_user_ids(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_sessions(project_id: int, data: schemas.CardTable, user_id): - return __get_sessions_list(project_id=project_id, user_id=user_id, data=data) +def __get_table_of_sessions(project: schemas.ProjectContext, data: schemas.CardTable, user_id): + return __get_sessions_list(project=project, user_id=user_id, data=data) -def __get_table_of_errors(project_id: int, data: schemas.CardTable, user_id: int): - return __get_errors_list(project_id=project_id, user_id=user_id, data=data) +def __get_table_of_errors(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int): + return __get_errors_list(project=project, user_id=user_id, data=data) -def __get_table_of_issues(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_issues(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_browsers(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_browsers(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_devises(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_devises(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_countries(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_countries(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_urls(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_urls(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_referrers(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_referrers(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_of_requests(project_id: int, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project_id, data=data) +def __get_table_of_requests(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): + return __get_table_of_series(project_id=project.project_id, data=data) -def __get_table_chart(project_id: int, data: schemas.CardTable, user_id: int): +def __get_table_chart(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int): supported = { schemas.MetricOfTable.SESSIONS: __get_table_of_sessions, schemas.MetricOfTable.ERRORS: __get_table_of_errors, @@ -178,13 +181,13 @@ def __get_table_chart(project_id: int, data: schemas.CardTable, user_id: int): schemas.MetricOfTable.REFERRER: __get_table_of_referrers, schemas.MetricOfTable.FETCH: __get_table_of_requests } - return supported.get(data.metric_of, not_supported)(project_id=project_id, data=data, user_id=user_id) + return supported.get(data.metric_of, not_supported)(project=project, data=data, user_id=user_id) -def get_chart(project_id: int, data: schemas.CardSchema, user_id: int): +def get_chart(project: schemas.ProjectContext, data: schemas.CardSchema, user_id: int): if data.is_predefined: return custom_metrics_predefined.get_metric(key=data.metric_of, - project_id=project_id, + project_id=project.project_id, data=data.model_dump()) supported = { @@ -195,7 +198,7 @@ def get_chart(project_id: int, data: schemas.CardSchema, user_id: int): schemas.MetricType.INSIGHTS: __get_insights_chart, schemas.MetricType.PATH_ANALYSIS: __get_path_analysis_chart } - return supported.get(data.metric_type, not_supported)(project_id=project_id, data=data, user_id=user_id) + return supported.get(data.metric_type, not_supported)(project=project, data=data, user_id=user_id) def get_sessions_by_card_id(project_id, user_id, metric_id, data: schemas.CardSessionsSchema): @@ -675,8 +678,8 @@ def get_funnel_sessions_by_issue(user_id, project_id, metric_id, issue_id, "issue": issue} -def make_chart_from_card(project_id, user_id, metric_id, data: schemas.CardSessionsSchema): - raw_metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, include_data=True) +def make_chart_from_card(project: schemas.ProjectContext, user_id, metric_id, data: schemas.CardSessionsSchema): + raw_metric: dict = get_card(metric_id=metric_id, project_id=project.project_id, user_id=user_id, include_data=True) if raw_metric is None: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="card not found") @@ -688,18 +691,18 @@ def make_chart_from_card(project_id, user_id, metric_id, data: schemas.CardSessi if metric.is_predefined: return custom_metrics_predefined.get_metric(key=metric.metric_of, - project_id=project_id, + project_id=project.project_id, data=data.model_dump()) elif metric.metric_type == schemas.MetricType.HEAT_MAP: if raw_metric["data"] and raw_metric["data"].get("sessionId"): - return heatmaps.get_selected_session(project_id=project_id, + return heatmaps.get_selected_session(project_id=project.project_id, session_id=raw_metric["data"]["sessionId"]) else: - return heatmaps.search_short_session(project_id=project_id, + return heatmaps.search_short_session(project_id=project.project_id, data=schemas.HeatMapSessionsSearch(**metric.model_dump()), user_id=user_id) - return get_chart(project_id=project_id, data=metric, user_id=user_id) + return get_chart(project=project, data=metric, user_id=user_id) def card_exists(metric_id, project_id, user_id) -> bool: diff --git a/ee/api/chalicelib/core/sessions_exp.py b/ee/api/chalicelib/core/sessions_exp.py index 85ad9a4a3..a2f69314a 100644 --- a/ee/api/chalicelib/core/sessions_exp.py +++ b/ee/api/chalicelib/core/sessions_exp.py @@ -5,7 +5,7 @@ from typing import List, Union import schemas from chalicelib.core import events, metadata, projects, performance_event, metrics, sessions_favorite, sessions_legacy from chalicelib.utils import pg_client, helper, metrics_helper, ch_client, exp_ch_helper -from chalicelib.utils.sql_helper import get_sql_operator +from chalicelib.utils import sql_helper as sh logger = logging.getLogger(__name__) SESSION_PROJECTION_COLS_CH = """\ @@ -57,16 +57,6 @@ SESSION_PROJECTION_COLS_CH_MAP = """\ """ -def __is_negation_operator(op: schemas.SearchEventOperator): - return op in [schemas.SearchEventOperator.IS_NOT, - schemas.SearchEventOperator.NOT_ON, - schemas.SearchEventOperator.NOT_CONTAINS] - - -def __reverse_sql_operator(op): - return "=" if op == "!=" else "!=" if op == "=" else "ILIKE" if op == "NOT ILIKE" else "NOT ILIKE" - - def _multiple_conditions(condition, values, value_key="value", is_not=False): query = [] for i in range(len(values)): @@ -599,14 +589,14 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu f.value = helper.values_for_operator(value=f.value, op=f.operator) f_k = f"f_value{i}" full_args = {**full_args, f_k: f.value, **_multiple_values(f.value, value_key=f_k)} - op = get_sql_operator(f.operator) \ + op = sh.get_sql_operator(f.operator) \ if filter_type not in [schemas.FilterType.EVENTS_COUNT] else f.operator.value is_any = _isAny_opreator(f.operator) is_undefined = _isUndefined_operator(f.operator) if not is_any and not is_undefined and len(f.value) == 0: continue is_not = False - if __is_negation_operator(f.operator): + if sh.is_negation_operator(f.operator): is_not = True if filter_type == schemas.FilterType.USER_BROWSER: if is_any: @@ -796,7 +786,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu _multiple_conditions(f"ms.rev_id {op} toString(%({f_k})s)", f.value, is_not=is_not, value_key=f_k)) elif filter_type == schemas.FilterType.PLATFORM: - # op = get_sql_operator(f.operator) + # op = sh.get_sql_operator(f.operator) extra_constraints.append( _multiple_conditions(f"s.user_device_type {op} %({f_k})s", f.value, is_not=is_not, value_key=f_k)) @@ -857,11 +847,11 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu event.value = [event.value] if not __is_valid_event(is_any=is_any, event=event): continue - op = get_sql_operator(event.operator) + op = sh.get_sql_operator(event.operator) is_not = False - if __is_negation_operator(event.operator): + if sh.is_negation_operator(event.operator): is_not = True - op = __reverse_sql_operator(op) + op = sh.reverse_sql_operator(op) # if event_index == 0 or or_events: # event_from = f"%s INNER JOIN {MAIN_SESSIONS_TABLE} AS ms USING (session_id)" event_from = "%s" @@ -1255,7 +1245,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu if is_any or len(f.value) == 0: continue f.value = helper.values_for_operator(value=f.value, op=f.operator) - op = get_sql_operator(f.operator) + op = sh.get_sql_operator(f.operator) e_k_f = e_k + f"_fetch{j}" full_args = {**full_args, **_multiple_values(f.value, value_key=e_k_f)} if f.type == schemas.FetchFilterType.FETCH_URL: @@ -1308,7 +1298,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu if is_any or len(f.value) == 0: continue f.value = helper.values_for_operator(value=f.value, op=f.operator) - op = get_sql_operator(f.operator) + op = sh.get_sql_operator(f.operator) e_k_f = e_k + f"_graphql{j}" full_args = {**full_args, **_multiple_values(f.value, value_key=e_k_f)} if f.type == schemas.GraphqlFilterType.GRAPHQL_NAME: @@ -1538,7 +1528,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu if _isAny_opreator(c.operator): continue e_k = f"ec_value{i}" - op = get_sql_operator(c.operator) + op = sh.get_sql_operator(c.operator) c.value = helper.values_for_operator(value=c.value, op=c.operator) full_args = {**full_args, **_multiple_values(c.value, value_key=e_k)} diff --git a/ee/api/chalicelib/core/significance_exp.py b/ee/api/chalicelib/core/significance_exp.py index 132ff881f..d6ce13d64 100644 --- a/ee/api/chalicelib/core/significance_exp.py +++ b/ee/api/chalicelib/core/significance_exp.py @@ -1,2 +1,262 @@ +from chalicelib.utils import ch_client +from chalicelib.utils import exp_ch_helper from .significance import * -# TODO: use clickhouse for funnels + +logger = logging.getLogger(__name__) + + +def get_simple_funnel(filter_d: schemas.CardSeriesFilterSchema, project: schemas.ProjectContext, + metric_format: schemas.MetricExtendedFormatType) -> List[RealDictRow]: + stages: List[schemas.SessionSearchEventSchema2] = filter_d.events + filters: List[schemas.SessionSearchFilterSchema] = filter_d.filters + platform = project.platform + constraints = ["e.project_id = %(project_id)s", + "e.datetime >= toDateTime(%(startTimestamp)s/1000)", + "e.datetime <= toDateTime(%(endTimestamp)s/1000)", + "e.event_type IN %(eventTypes)s"] + + full_args = {"project_id": project.project_id, "startTimestamp": filter_d.startTimestamp, + "endTimestamp": filter_d.endTimestamp} + + MAIN_EVENTS_TABLE = exp_ch_helper.get_main_events_table(timestamp=filter_d.startTimestamp, + platform=platform) + MAIN_SESSIONS_TABLE = exp_ch_helper.get_main_sessions_table(filter_d.startTimestamp) + + full_args["MAIN_EVENTS_TABLE"] = MAIN_EVENTS_TABLE + full_args["MAIN_SESSIONS_TABLE"] = MAIN_SESSIONS_TABLE + + if metric_format == schemas.MetricExtendedFormatType.SESSION_COUNT: + group_by = 'e.session_id' + else: + constraints.append("isNotNull(s.user_id)") + group_by = 's.user_id' + + n_stages_query = [] + n_stages_query_not = [] + event_types = [] + values = {} + has_filters = False + if len(filters) > 0: + meta_keys = None + for i, f in enumerate(filters): + if len(f.value) == 0: + continue + + has_filters = True + f.value = helper.values_for_operator(value=f.value, op=f.operator) + + op = sh.get_sql_operator(f.operator) + + filter_type = f.type + f_k = f"f_value{i}" + values = {**values, + **sh.multi_values(f.value, value_key=f_k)} + is_not = False + if sh.is_negation_operator(f.operator): + is_not = True + + if filter_type == schemas.FilterType.USER_BROWSER: + constraints.append( + sh.multi_conditions(f's.user_browser {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.USER_OS, schemas.FilterType.USER_OS_MOBILE]: + constraints.append( + sh.multi_conditions(f's.user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.USER_DEVICE, schemas.FilterType.USER_DEVICE_MOBILE]: + constraints.append( + sh.multi_conditions(f's.user_device {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.USER_COUNTRY, schemas.FilterType.USER_COUNTRY_MOBILE]: + constraints.append( + sh.multi_conditions(f's.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + elif filter_type == schemas.FilterType.DURATION: + if len(f.value) > 0 and f.value[0] is not None: + constraints.append(f's.duration >= %(minDuration)s') + values["minDuration"] = f.value[0] + if len(f["value"]) > 1 and f.value[1] is not None and int(f.value[1]) > 0: + constraints.append('s.duration <= %(maxDuration)s') + values["maxDuration"] = f.value[1] + elif filter_type == schemas.FilterType.REFERRER: + constraints.append( + sh.multi_conditions(f"s.base_referrer {op} %({f_k})s", f.value, is_not=is_not, value_key=f_k)) + elif filter_type == events.EventType.METADATA.ui_type: + if meta_keys is None: + meta_keys = metadata.get(project_id=project.project_id) + meta_keys = {m["key"]: m["index"] for m in meta_keys} + if f.source in meta_keys.keys(): + constraints.append( + sh.multi_conditions( + f's.{metadata.index_to_colname(meta_keys[f.source])} {op} %({f_k})s', f.value, + is_not=is_not, value_key=f_k)) + elif filter_type in [schemas.FilterType.USER_ID, schemas.FilterType.USER_ID_MOBILE]: + constraints.append( + sh.multi_conditions(f's.user_id {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + elif filter_type in [schemas.FilterType.USER_ANONYMOUS_ID, + schemas.FilterType.USER_ANONYMOUS_ID_MOBILE]: + constraints.append( + sh.multi_conditions(f's.user_anonymous_id {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + elif filter_type in [schemas.FilterType.REV_ID, schemas.FilterType.REV_ID_MOBILE]: + constraints.append( + sh.multi_conditions(f's.rev_id {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + i = -1 + for s in stages: + + if s.operator is None: + s.operator = schemas.SearchEventOperator.IS + + if not isinstance(s.value, list): + s.value = [s.value] + is_any = sh.isAny_opreator(s.operator) + if not is_any and isinstance(s.value, list) and len(s.value) == 0: + continue + i += 1 + + op = sh.get_sql_operator(s.operator) + is_not = False + if sh.is_negation_operator(s.operator): + is_not = True + op = sh.reverse_sql_operator(op) + + specific_condition = None + e_k = f"e_value{i}" + event_type = s.type + next_event_type = exp_ch_helper.get_event_type(event_type, platform=platform) + if event_type == events.EventType.CLICK.ui_type: + if platform == "web": + next_col_name = events.EventType.CLICK.column + if not is_any: + if schemas.ClickEventExtraOperator.has_value(s.operator): + specific_condition = sh.multi_conditions(f"selector {op} %({e_k})s", s.value, value_key=e_k) + else: + next_col_name = events.EventType.CLICK_MOBILE.column + elif event_type == events.EventType.INPUT.ui_type: + next_col_name = events.EventType.INPUT.column + elif event_type == events.EventType.LOCATION.ui_type: + next_col_name = 'url_path' + elif event_type == events.EventType.CUSTOM.ui_type: + next_col_name = events.EventType.CUSTOM.column + # IOS -------------- + elif event_type == events.EventType.CLICK_MOBILE.ui_type: + next_col_name = events.EventType.CLICK_MOBILE.column + elif event_type == events.EventType.INPUT_MOBILE.ui_type: + next_col_name = events.EventType.INPUT_MOBILE.column + elif event_type == events.EventType.VIEW_MOBILE.ui_type: + next_col_name = events.EventType.VIEW_MOBILE.column + elif event_type == events.EventType.CUSTOM_MOBILE.ui_type: + next_col_name = events.EventType.CUSTOM_MOBILE.column + else: + logger.warning(f"=================UNDEFINED:{event_type}") + continue + + values = {**values, **sh.multi_values(helper.values_for_operator(value=s.value, op=s.operator), value_key=e_k)} + + if next_event_type not in event_types: + event_types.append(next_event_type) + full_args[f"event_type_{i}"] = next_event_type + n_stages_query.append(f"event_type=%(event_type_{i})s") + if is_not: + n_stages_query_not.append(n_stages_query[-1] + " AND " + + (sh.multi_conditions(f' {next_col_name} {op} %({e_k})s', s.value, + is_not=is_not, value_key=e_k) + if not specific_condition else specific_condition)) + elif not is_any: + n_stages_query[-1] += " AND " + (sh.multi_conditions(f' {next_col_name} {op} %({e_k})s', s.value, + is_not=is_not, value_key=e_k) + if not specific_condition else specific_condition) + + full_args = {"eventTypes": tuple(event_types), **full_args, **values} + n_stages = len(n_stages_query) + if n_stages == 0: + return [] + + extra_from = "" + if has_filters: + extra_from = f"INNER JOIN {MAIN_SESSIONS_TABLE} AS s ON (e.session_id=s.session_id)" + constraints += ["s.project_id = %(project_id)s", + "s.datetime >= toDateTime(%(startTimestamp)s/1000)", + "s.datetime <= toDateTime(%(endTimestamp)s/1000)"] + + if len(n_stages_query_not) > 0: + value_conditions_not_base = ["project_id = %(project_id)s", + "datetime >= toDateTime(%(startTimestamp)s/1000)", + "datetime <= toDateTime(%(endTimestamp)s/1000)"] + _value_conditions_not = [] + value_conditions_not = [] + for c in n_stages_query_not: + _p = c % full_args + if _p not in _value_conditions_not: + _value_conditions_not.append(_p) + value_conditions_not.append(c) + + extra_from += f"""LEFT ANTI JOIN (SELECT DISTINCT sub.session_id + FROM {MAIN_EVENTS_TABLE} AS sub + WHERE {' AND '.join(value_conditions_not_base)} + AND ({' OR '.join([c for c in value_conditions_not])}) + ) AS sub ON(e.session_id=sub.session_id)""" + del _value_conditions_not + del value_conditions_not + del value_conditions_not_base + + sequences = [] + projections = [] + for i, s in enumerate(n_stages_query): + projections.append(f"SUM(T{i + 1}) AS stage{i + 1}") + if i == 0: + sequences.append(f"anyIf(1,{s}) AS T1") + else: + pattern = "" + conditions = [] + j = 0 + while j <= i: + pattern += f"(?{j + 1})" + conditions.append(n_stages_query[j]) + j += 1 + sequences.append(f"sequenceMatch('{pattern}')(e.datetime, {','.join(conditions)}) AS T{i + 1}") + + n_stages_query = f""" + SELECT {",".join(projections)} + FROM (SELECT {",".join(sequences)} + FROM {MAIN_EVENTS_TABLE} AS e {extra_from} + WHERE {" AND ".join(constraints)} + GROUP BY {group_by}) AS raw; + """ + + with ch_client.ClickHouseClient() as cur: + query = cur.format(n_stages_query, full_args) + logger.debug("---------------------------------------------------") + logger.debug(query) + logger.debug("---------------------------------------------------") + try: + row = cur.execute(query) + except Exception as err: + logger.warning("--------- SIMPLE FUNNEL SEARCH QUERY EXCEPTION CH-----------") + logger.warning(query) + logger.warning("--------- PAYLOAD -----------") + logger.warning(filter_d.model_dump_json()) + logger.warning("--------------------") + raise err + + stages_list = [] + row = row[0] + for i, stage in enumerate(stages): + count = row[f"stage{i + 1}"] + drop = None + if i != 0: + base_count = row[f"stage{i}"] + if base_count == 0: + drop = 0 + elif base_count > 0: + drop = int(100 * (base_count - count) / base_count) + + stages_list.append( + {"value": stage.value, + "type": stage.type, + "operator": stage.operator, + "dropPct": drop, + "count": count + } + ) + + return stages_list diff --git a/ee/api/routers/subs/metrics.py b/ee/api/routers/subs/metrics.py index 4af245947..346aa00ac 100644 --- a/ee/api/routers/subs/metrics.py +++ b/ee/api/routers/subs/metrics.py @@ -82,7 +82,7 @@ def remove_widget_from_dashboard(projectId: int, dashboardId: int, widgetId: int @app.post('/{projectId}/cards/try', tags=["cards"]) def try_card(projectId: int, data: schemas.CardSchema = Body(...), context: schemas.CurrentContext = Depends(OR_context)): - return {"data": custom_metrics.get_chart(project_id=projectId, data=data, user_id=context.user_id)} + return {"data": custom_metrics.get_chart(project=context.project, data=data, user_id=context.user_id)} @app.post('/{projectId}/cards/try/sessions', tags=["cards"]) @@ -177,9 +177,9 @@ def get_card_errors_list(projectId: int, metric_id: int, @app.post('/{projectId}/cards/{metric_id}/chart', tags=["card"]) -def get_card_chart(projectId: int, metric_id: int, request: Request, data: schemas.CardSessionsSchema = Body(...), +def get_card_chart(projectId: int, metric_id: int, data: schemas.CardSessionsSchema = Body(...), context: schemas.CurrentContext = Depends(OR_context)): - data = custom_metrics.make_chart_from_card(project_id=projectId, user_id=context.user_id, metric_id=metric_id, + data = custom_metrics.make_chart_from_card(project=context.project, user_id=context.user_id, metric_id=metric_id, data=data) return {"data": data}