From 09176692d081cf44ec63f75624a29b6c6d2c2b54 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Tue, 21 Sep 2021 20:32:52 +0200 Subject: [PATCH] feat(api): insights feature retention --- api/chalicelib/blueprints/subs/bp_insights.py | 13 ++- api/chalicelib/core/insights.py | 95 ++++++++++++++++++- 2 files changed, 102 insertions(+), 6 deletions(-) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index 8c79e2663..cdb207c21 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -37,7 +37,18 @@ def get_users_retention(projectId, context): params = app.current_request.query_params args = dashboard.dashboard_args(params) - return {"data": insights.get_retention(project_id=projectId, **{**data, **args})} + return {"data": insights.get_users_retention(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/feature_retention', methods=['GET', 'POST']) +def get_feature_retention(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.get_feature_retention(project_id=projectId, **{**data, **args})} # # diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index c39fb5cea..1df47eda2 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -167,13 +167,13 @@ def __complete_retention(rows, start_date, end_date=None): @dev.timed -def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], - **args): +def get_users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], + **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", time_constraint=True) - + pg_sub_query.append("user_id IS NOT NULL") with pg_client.PostgresClient() as cur: pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, @@ -181,8 +181,7 @@ def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTim ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users FROM (SELECT DISTINCT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week FROM sessions - WHERE {" AND ".join(pg_sub_query)} - AND user_id IS NOT NULL + WHERE {" AND ".join(pg_sub_query)} AND NOT EXISTS((SELECT 1 FROM sessions AS bsess WHERE bsess.start_ts= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + event_table = JOURNEY_TYPES["PAGES"]["table"] + event_column = JOURNEY_TYPES["PAGES"]["column"] + extra_values = {"value": "/"} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_table = JOURNEY_TYPES[f["value"]]["table"] + event_column = JOURNEY_TYPES[f["value"]]["column"] + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + extra_values["user_id"] = f["value"] + # TODO: This will change later when the search is clear + default = False + extra_values["value"] = f["value"] + pg_sub_query.append(f"feature.{event_column} = %(value)s") + + with pg_client.PostgresClient() as cur: + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + extra_values["value"] = row["value"] + + pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, + FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, + COUNT(DISTINCT connexions_list.user_id) AS users_count, + ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users + FROM (SELECT DISTINCT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week + FROM sessions INNER JOIN {event_table} AS feature USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL + AND NOT EXISTS((SELECT 1 + FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) + WHERE bsess.start_ts= %(startTimestamp)s + AND feature.timestamp < %(endTimestamp)s + AND feature.{event_column} = %(value)s + GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) + GROUP BY first_connexion_week, week + ORDER BY first_connexion_week, week;""" + + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + rows = __compute_retention_percentage(helper.list_to_camel_case(rows)) + return __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now())