diff --git a/api/chalicelib/core/errors/errors_ch.py b/api/chalicelib/core/errors/errors_ch.py index 1f7805aea..178b33701 100644 --- a/api/chalicelib/core/errors/errors_ch.py +++ b/api/chalicelib/core/errors/errors_ch.py @@ -1,7 +1,6 @@ import schemas from chalicelib.core import metadata from chalicelib.core.errors.modules import sessions -from chalicelib.core.metrics import metrics from chalicelib.utils import ch_client, exp_ch_helper from chalicelib.utils import helper, metrics_helper from chalicelib.utils.TimeUTC import TimeUTC @@ -385,10 +384,13 @@ def search(data: schemas.SearchErrorsSchema, project: schemas.ProjectContext, us ON details.error_id=time_details.error_id INNER JOIN (SELECT error_id, groupArray([timestamp, count]) AS chart FROM (SELECT JSONExtractString(toString(`$properties`), 'error_id') AS error_id, - toUnixTimestamp(toStartOfInterval(created_at, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + gs.generate_series AS timestamp, COUNT(DISTINCT session_id) AS count - FROM {MAIN_EVENTS_TABLE} + FROM generate_series(%(startDate)s, %(endDate)s, %(step_size)s) AS gs + LEFT JOIN {MAIN_EVENTS_TABLE} ON(TRUE) WHERE {" AND ".join(ch_sub_query)} + AND created_at >= toDateTime(timestamp / 1000) + AND created_at < toDateTime((timestamp + %(step_size)s) / 1000) GROUP BY error_id, timestamp ORDER BY timestamp) AS sub_table GROUP BY error_id) AS chart_details ON details.error_id=chart_details.error_id;""" @@ -405,9 +407,7 @@ def search(data: schemas.SearchErrorsSchema, project: schemas.ProjectContext, us r["chart"] = list(r["chart"]) for i in range(len(r["chart"])): r["chart"][i] = {"timestamp": r["chart"][i][0], "count": r["chart"][i][1]} - r["chart"] = metrics.__complete_missing_steps(rows=r["chart"], start_time=data.startTimestamp, - end_time=data.endTimestamp, - density=data.density, neutral={"count": 0}) + return { 'total': total, 'errors': helper.list_to_camel_case(rows) diff --git a/api/chalicelib/core/metrics/__init__.py b/api/chalicelib/core/metrics/__init__.py index 49f94d989..9906cb8bc 100644 --- a/api/chalicelib/core/metrics/__init__.py +++ b/api/chalicelib/core/metrics/__init__.py @@ -7,9 +7,7 @@ logger = logging.getLogger(__name__) if config("EXP_METRICS", cast=bool, default=False): logger.info(">>> Using experimental metrics") from chalicelib.core.metrics import heatmaps_ch as heatmaps - from chalicelib.core.metrics import metrics_ch as metrics from chalicelib.core.metrics import product_analytics_ch as product_analytics else: from chalicelib.core.metrics import heatmaps - from chalicelib.core.metrics import metrics from chalicelib.core.metrics import product_analytics diff --git a/api/chalicelib/core/metrics/custom_metrics.py b/api/chalicelib/core/metrics/custom_metrics.py index 18b2cf91b..f5255017f 100644 --- a/api/chalicelib/core/metrics/custom_metrics.py +++ b/api/chalicelib/core/metrics/custom_metrics.py @@ -6,7 +6,7 @@ from fastapi import HTTPException, status import schemas from chalicelib.core import issues from chalicelib.core.errors import errors -from chalicelib.core.metrics import heatmaps, product_analytics, funnels, custom_metrics_predefined +from chalicelib.core.metrics import heatmaps, product_analytics, funnels from chalicelib.core.sessions import sessions, sessions_search from chalicelib.utils import helper, pg_client from chalicelib.utils.TimeUTC import TimeUTC @@ -153,11 +153,6 @@ def __get_table_chart(project: schemas.ProjectContext, data: schemas.CardTable, def get_chart(project: schemas.ProjectContext, data: schemas.CardSchema, user_id: int): - if data.is_predefined: - return custom_metrics_predefined.get_metric(key=data.metric_of, - project_id=project.project_id, - data=data.model_dump()) - supported = { schemas.MetricType.TIMESERIES: __get_timeseries_chart, schemas.MetricType.TABLE: __get_table_chart, @@ -195,8 +190,6 @@ def get_sessions(project: schemas.ProjectContext, user_id, data: schemas.CardSes def get_issues(project: schemas.ProjectContext, user_id: int, data: schemas.CardSchema): - if data.is_predefined: - return not_supported() if data.metric_of == schemas.MetricOfTable.ISSUES: return __get_table_of_issues(project=project, user_id=user_id, data=data) supported = { @@ -598,11 +591,7 @@ def make_chart_from_card(project: schemas.ProjectContext, user_id, metric_id, da raw_metric["density"] = data.density metric: schemas.CardSchema = schemas.CardSchema(**raw_metric) - if metric.is_predefined: - return custom_metrics_predefined.get_metric(key=metric.metric_of, - project_id=project.project_id, - data=data.model_dump()) - elif metric.metric_type == schemas.MetricType.HEAT_MAP: + if metric.metric_type == schemas.MetricType.HEAT_MAP: if raw_metric["data"] and raw_metric["data"].get("sessionId"): return heatmaps.get_selected_session(project_id=project.project_id, session_id=raw_metric["data"]["sessionId"]) diff --git a/api/chalicelib/core/metrics/custom_metrics_predefined.py b/api/chalicelib/core/metrics/custom_metrics_predefined.py deleted file mode 100644 index a087b7fa9..000000000 --- a/api/chalicelib/core/metrics/custom_metrics_predefined.py +++ /dev/null @@ -1,15 +0,0 @@ -import logging - -import schemas -from chalicelib.core.metrics import metrics - -logger = logging.getLogger(__name__) - - -def get_metric(key: schemas.MetricOfWebVitals, project_id: int, data: dict): - supported = { - schemas.MetricOfWebVitals.AVG_VISITED_PAGES: metrics.get_user_activity_avg_visited_pages, - schemas.MetricOfWebVitals.COUNT_USERS: metrics.get_unique_users - } - - return supported.get(key, lambda *args: None)(project_id=project_id, **data) diff --git a/api/chalicelib/core/metrics/metrics.py b/api/chalicelib/core/metrics/metrics.py deleted file mode 100644 index 1835eec4d..000000000 --- a/api/chalicelib/core/metrics/metrics.py +++ /dev/null @@ -1,219 +0,0 @@ -import logging - -import schemas -from chalicelib.core import metadata -from chalicelib.utils import helper -from chalicelib.utils import pg_client -from chalicelib.utils.TimeUTC import TimeUTC -from chalicelib.utils.metrics_helper import get_step_size - -logger = logging.getLogger(__name__) - - -def __get_constraints(project_id, time_constraint=True, chart=False, duration=True, project=True, - project_identifier="project_id", - main_table="sessions", time_column="start_ts", data={}): - pg_sub_query = [] - main_table = main_table + "." if main_table is not None and len(main_table) > 0 else "" - if project: - pg_sub_query.append(f"{main_table}{project_identifier} =%({project_identifier})s") - if duration: - pg_sub_query.append(f"{main_table}duration>0") - if time_constraint: - pg_sub_query.append(f"{main_table}{time_column} >= %(startTimestamp)s") - pg_sub_query.append(f"{main_table}{time_column} < %(endTimestamp)s") - if chart: - pg_sub_query.append(f"{main_table}{time_column} >= generated_timestamp") - pg_sub_query.append(f"{main_table}{time_column} < generated_timestamp + %(step_size)s") - return pg_sub_query + __get_meta_constraint(project_id=project_id, data=data) - - -def __merge_charts(list1, list2, time_key="timestamp"): - if len(list1) != len(list2): - raise Exception("cannot merge unequal lists") - result = [] - for i in range(len(list1)): - timestamp = min(list1[i][time_key], list2[i][time_key]) - result.append({**list1[i], **list2[i], time_key: timestamp}) - return result - - -def __get_constraint_values(data): - params = {} - for i, f in enumerate(data.get("filters", [])): - params[f"{f['key']}_{i}"] = f["value"] - return params - - -def __get_meta_constraint(project_id, data): - if len(data.get("filters", [])) == 0: - return [] - constraints = [] - meta_keys = metadata.get(project_id=project_id) - meta_keys = {m["key"]: m["index"] for m in meta_keys} - - for i, f in enumerate(data.get("filters", [])): - if f["key"] in meta_keys.keys(): - key = f"sessions.metadata_{meta_keys[f['key']]})" - if f["value"] in ["*", ""]: - constraints.append(f"{key} IS NOT NULL") - else: - constraints.append(f"{key} = %({f['key']}_{i})s") - else: - filter_type = f["key"].upper() - filter_type = [filter_type, "USER" + filter_type, filter_type[4:]] - if any(item in [schemas.FilterType.USER_BROWSER] \ - for item in filter_type): - constraints.append(f"sessions.user_browser = %({f['key']}_{i})s") - elif any(item in [schemas.FilterType.USER_OS, schemas.FilterType.USER_OS_MOBILE] \ - for item in filter_type): - constraints.append(f"sessions.user_os = %({f['key']}_{i})s") - elif any(item in [schemas.FilterType.USER_DEVICE, schemas.FilterType.USER_DEVICE_MOBILE] \ - for item in filter_type): - constraints.append(f"sessions.user_device = %({f['key']}_{i})s") - elif any(item in [schemas.FilterType.USER_COUNTRY, schemas.FilterType.USER_COUNTRY_MOBILE] \ - for item in filter_type): - constraints.append(f"sessions.user_country = %({f['key']}_{i})s") - elif any(item in [schemas.FilterType.USER_ID, schemas.FilterType.USER_ID_MOBILE] \ - for item in filter_type): - constraints.append(f"sessions.user_id = %({f['key']}_{i})s") - elif any(item in [schemas.FilterType.USER_ANONYMOUS_ID, schemas.FilterType.USER_ANONYMOUS_ID_MOBILE] \ - for item in filter_type): - constraints.append(f"sessions.user_anonymous_id = %({f['key']}_{i})s") - elif any(item in [schemas.FilterType.REV_ID, schemas.FilterType.REV_ID_MOBILE] \ - for item in filter_type): - constraints.append(f"sessions.rev_id = %({f['key']}_{i})s") - return constraints - - -def __get_neutral(rows, add_All_if_empty=True): - neutral = {l: 0 for l in [i for k in [list(v.keys()) for v in rows] for i in k]} - if add_All_if_empty and len(neutral.keys()) <= 1: - neutral = {"All": 0} - return neutral - - -def __merge_rows_with_neutral(rows, neutral): - for i in range(len(rows)): - rows[i] = {**neutral, **rows[i]} - return rows - - -def __nested_array_to_dict_array(rows, key="url_host", value="count"): - for r in rows: - for i in range(len(r["keys"])): - r[r["keys"][i][key]] = r["keys"][i][value] - r.pop("keys") - return rows - - -def get_user_activity_avg_visited_pages(project_id, startTimestamp=TimeUTC.now(delta_days=-1), - endTimestamp=TimeUTC.now(), **args): - with pg_client.PostgresClient() as cur: - row = __get_user_activity_avg_visited_pages(cur, project_id, startTimestamp, endTimestamp, **args) - results = helper.dict_to_camel_case(row) - results["chart"] = __get_user_activity_avg_visited_pages_chart(cur, project_id, startTimestamp, - endTimestamp, **args) - - diff = endTimestamp - startTimestamp - endTimestamp = startTimestamp - startTimestamp = endTimestamp - diff - row = __get_user_activity_avg_visited_pages(cur, project_id, startTimestamp, endTimestamp, **args) - - previous = helper.dict_to_camel_case(row) - results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"]) - results["unit"] = schemas.TemplatePredefinedUnits.COUNT - return results - - -def __get_user_activity_avg_visited_pages(cur, project_id, startTimestamp, endTimestamp, **args): - pg_sub_query = __get_constraints(project_id=project_id, data=args) - pg_sub_query.append("sessions.pages_count>0") - pg_query = f"""SELECT COALESCE(CEIL(AVG(sessions.pages_count)),0) AS value - FROM public.sessions - WHERE {" AND ".join(pg_sub_query)};""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, - **__get_constraint_values(args)} - - cur.execute(cur.mogrify(pg_query, params)) - row = cur.fetchone() - return row - - -def __get_user_activity_avg_visited_pages_chart(cur, project_id, startTimestamp, endTimestamp, density=20, **args): - step_size = get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density, factor=1) - params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp} - pg_sub_query_subset = __get_constraints(project_id=project_id, time_constraint=True, - chart=False, data=args) - pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=False, project=False, - chart=True, data=args, main_table="sessions", time_column="start_ts", - duration=False) - pg_sub_query_subset.append("sessions.duration IS NOT NULL") - - pg_query = f"""WITH sessions AS(SELECT sessions.pages_count, sessions.start_ts - FROM public.sessions - WHERE {" AND ".join(pg_sub_query_subset)} - ) - SELECT generated_timestamp AS timestamp, - COALESCE(AVG(sessions.pages_count),0) AS value - FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp - LEFT JOIN LATERAL ( - SELECT sessions.pages_count - FROM sessions - WHERE {" AND ".join(pg_sub_query_chart)} - ) AS sessions ON (TRUE) - GROUP BY generated_timestamp - ORDER BY generated_timestamp;""" - cur.execute(cur.mogrify(pg_query, {**params, **__get_constraint_values(args)})) - rows = cur.fetchall() - return rows - - -def get_unique_users(project_id, startTimestamp=TimeUTC.now(delta_days=-1), - endTimestamp=TimeUTC.now(), - density=7, **args): - step_size = get_step_size(startTimestamp, endTimestamp, density, factor=1) - pg_sub_query = __get_constraints(project_id=project_id, data=args) - pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True, - chart=True, data=args) - pg_sub_query.append("user_id IS NOT NULL") - pg_sub_query.append("user_id != ''") - pg_sub_query_chart.append("user_id IS NOT NULL") - pg_sub_query_chart.append("user_id != ''") - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT generated_timestamp AS timestamp, - COALESCE(COUNT(sessions), 0) AS value - FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp - LEFT JOIN LATERAL ( SELECT DISTINCT user_id - FROM public.sessions - WHERE {" AND ".join(pg_sub_query_chart)} - ) AS sessions ON (TRUE) - GROUP BY generated_timestamp - ORDER BY generated_timestamp;""" - params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)} - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - results = { - "value": sum([r["value"] for r in rows]), - "chart": rows - } - - diff = endTimestamp - startTimestamp - endTimestamp = startTimestamp - startTimestamp = endTimestamp - diff - - pg_query = f"""SELECT COUNT(DISTINCT sessions.user_id) AS count - FROM public.sessions - WHERE {" AND ".join(pg_sub_query)};""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, - **__get_constraint_values(args)} - - cur.execute(cur.mogrify(pg_query, params)) - - count = cur.fetchone()["count"] - - results["progress"] = helper.__progress(old_val=count, new_val=results["value"]) - results["unit"] = schemas.TemplatePredefinedUnits.COUNT - return results diff --git a/api/chalicelib/core/metrics/metrics_ch.py b/api/chalicelib/core/metrics/metrics_ch.py deleted file mode 100644 index c809719cc..000000000 --- a/api/chalicelib/core/metrics/metrics_ch.py +++ /dev/null @@ -1,278 +0,0 @@ -import logging -from math import isnan - -import schemas -from chalicelib.utils import ch_client -from chalicelib.utils import exp_ch_helper -from chalicelib.utils import helper -from chalicelib.utils.TimeUTC import TimeUTC -from chalicelib.utils.metrics_helper import get_step_size - -logger = logging.getLogger(__name__) - - -def __get_basic_constraints(table_name=None, time_constraint=True, round_start=False, data={}, identifier="project_id"): - if table_name: - table_name += "." - else: - table_name = "" - ch_sub_query = [f"{table_name}{identifier} =toUInt16(%({identifier})s)"] - if time_constraint: - if round_start: - ch_sub_query.append( - f"toStartOfInterval({table_name}datetime, INTERVAL %(step_size)s second) >= toDateTime(%(startTimestamp)s/1000)") - else: - ch_sub_query.append(f"{table_name}datetime >= toDateTime(%(startTimestamp)s/1000)") - ch_sub_query.append(f"{table_name}datetime < toDateTime(%(endTimestamp)s/1000)") - return ch_sub_query + __get_generic_constraint(data=data, table_name=table_name) - - -def __get_basic_constraints_events(table_name=None, time_constraint=True, round_start=False, data={}, - identifier="project_id"): - if table_name: - table_name += "." - else: - table_name = "" - ch_sub_query = [f"{table_name}{identifier} =toUInt16(%({identifier})s)"] - if time_constraint: - if round_start: - ch_sub_query.append( - f"toStartOfInterval({table_name}created_at, INTERVAL %(step_size)s second) >= toDateTime(%(startTimestamp)s/1000)") - else: - ch_sub_query.append(f"{table_name}created_at >= toDateTime(%(startTimestamp)s/1000)") - ch_sub_query.append(f"{table_name}created_at < toDateTime(%(endTimestamp)s/1000)") - return ch_sub_query + __get_generic_constraint(data=data, table_name=table_name) - - -def __frange(start, stop, step): - result = [] - i = start - while i < stop: - result.append(i) - i += step - return result - - -def __add_missing_keys(original, complete): - for missing in [key for key in complete.keys() if key not in original.keys()]: - original[missing] = complete[missing] - return original - - -def __complete_missing_steps(start_time, end_time, density, neutral, rows, time_key="timestamp", time_coefficient=1000): - if len(rows) == density: - return rows - step = get_step_size(start_time, end_time, density, decimal=True) - optimal = [(int(i * time_coefficient), int((i + step) * time_coefficient)) for i in - __frange(start_time // time_coefficient, end_time // time_coefficient, step)] - result = [] - r = 0 - o = 0 - for i in range(density): - neutral_clone = dict(neutral) - for k in neutral_clone.keys(): - if callable(neutral_clone[k]): - neutral_clone[k] = neutral_clone[k]() - if r < len(rows) and len(result) + len(rows) - r == density: - result += rows[r:] - break - if r < len(rows) and o < len(optimal) and rows[r][time_key] < optimal[o][0]: - # complete missing keys in original object - rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone) - result.append(rows[r]) - r += 1 - elif r < len(rows) and o < len(optimal) and optimal[o][0] <= rows[r][time_key] < optimal[o][1]: - # complete missing keys in original object - rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone) - result.append(rows[r]) - r += 1 - o += 1 - else: - neutral_clone[time_key] = optimal[o][0] - result.append(neutral_clone) - o += 1 - return result - - -def __get_constraint(data, fields, table_name): - constraints = [] - # for k in fields.keys(): - for i, f in enumerate(data.get("filters", [])): - if f["key"] in fields.keys(): - if f["value"] in ["*", ""]: - constraints.append(f"isNotNull({table_name}{fields[f['key']]})") - else: - constraints.append(f"{table_name}{fields[f['key']]} = %({f['key']}_{i})s") - # TODO: remove this in next release - offset = len(data.get("filters", [])) - for i, f in enumerate(data.keys()): - if f in fields.keys(): - if data[f] in ["*", ""]: - constraints.append(f"isNotNull({table_name}{fields[f]})") - else: - constraints.append(f"{table_name}{fields[f]} = %({f}_{i + offset})s") - return constraints - - -def __get_constraint_values(data): - params = {} - for i, f in enumerate(data.get("filters", [])): - params[f"{f['key']}_{i}"] = f["value"] - - # TODO: remove this in next release - offset = len(data.get("filters", [])) - for i, f in enumerate(data.keys()): - params[f"{f}_{i + offset}"] = data[f] - return params - - -METADATA_FIELDS = {"userId": "user_id", - "userAnonymousId": "user_anonymous_id", - "metadata1": "metadata_1", - "metadata2": "metadata_2", - "metadata3": "metadata_3", - "metadata4": "metadata_4", - "metadata5": "metadata_5", - "metadata6": "metadata_6", - "metadata7": "metadata_7", - "metadata8": "metadata_8", - "metadata9": "metadata_9", - "metadata10": "metadata_10"} - - -def __get_meta_constraint(data): - return __get_constraint(data=data, fields=METADATA_FIELDS, table_name="sessions_metadata.") - - -SESSIONS_META_FIELDS = {"revId": "rev_id", - "country": "user_country", - "os": "user_os", - "platform": "user_device_type", - "device": "user_device", - "browser": "user_browser"} - - -def __get_generic_constraint(data, table_name): - return __get_constraint(data=data, fields=SESSIONS_META_FIELDS, table_name=table_name) - - -def get_user_activity_avg_visited_pages(project_id, startTimestamp=TimeUTC.now(delta_days=-1), - endTimestamp=TimeUTC.now(), **args): - results = {} - - with ch_client.ClickHouseClient() as ch: - rows = __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args) - if len(rows) > 0: - results = helper.dict_to_camel_case(rows[0]) - for key in results: - if isnan(results[key]): - results[key] = 0 - results["chart"] = __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp, - endTimestamp, **args) - - diff = endTimestamp - startTimestamp - endTimestamp = startTimestamp - startTimestamp = endTimestamp - diff - rows = __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args) - - if len(rows) > 0: - previous = helper.dict_to_camel_case(rows[0]) - results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"]) - results["unit"] = schemas.TemplatePredefinedUnits.COUNT - return results - - -def __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args): - ch_sub_query = __get_basic_constraints(table_name="pages", data=args) - ch_sub_query.append("pages.event_type='LOCATION'") - meta_condition = __get_meta_constraint(args) - ch_sub_query += meta_condition - - ch_query = f"""SELECT COALESCE(CEIL(avgOrNull(count)),0) AS value - FROM (SELECT COUNT(1) AS count - FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages - WHERE {" AND ".join(ch_sub_query)} - GROUP BY session_id) AS groupped_data - WHERE count>0;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, - **__get_constraint_values(args)} - - rows = ch.execute(query=ch_query, parameters=params) - - return rows - - -def __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp, endTimestamp, density=20, **args): - step_size = get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) - ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) - ch_sub_query_chart.append("pages.event_type='LOCATION'") - meta_condition = __get_meta_constraint(args) - ch_sub_query_chart += meta_condition - - params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)} - ch_query = f"""SELECT timestamp, COALESCE(avgOrNull(count), 0) AS value - FROM (SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, - session_id, COUNT(1) AS count - FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages - WHERE {" AND ".join(ch_sub_query_chart)} - GROUP BY timestamp,session_id - ORDER BY timestamp) AS groupped_data - WHERE count>0 - GROUP BY timestamp - ORDER BY timestamp;""" - rows = ch.execute(query=ch_query, parameters=params) - rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, - end_time=endTimestamp, - density=density, neutral={"value": 0}) - return rows - - -def get_unique_users(project_id, startTimestamp=TimeUTC.now(delta_days=-1), - endTimestamp=TimeUTC.now(), density=7, **args): - step_size = get_step_size(startTimestamp, endTimestamp, density) - ch_sub_query = __get_basic_constraints(table_name="sessions", data=args) - ch_sub_query_chart = __get_basic_constraints(table_name="sessions", round_start=True, data=args) - meta_condition = __get_meta_constraint(args) - ch_sub_query += meta_condition - ch_sub_query_chart += meta_condition - ch_sub_query_chart.append("isNotNull(sessions.user_id)") - ch_sub_query_chart.append("sessions.user_id!=''") - with ch_client.ClickHouseClient() as ch: - ch_query = f"""\ - SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, - COUNT(DISTINCT sessions.user_id) AS value - FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions - WHERE {" AND ".join(ch_sub_query_chart)} - GROUP BY timestamp - ORDER BY timestamp;\ - """ - params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)} - - rows = ch.execute(query=ch_query, parameters=params) - - results = { - "value": sum([r["value"] for r in rows]), - "chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp, - density=density, - neutral={"value": 0}) - } - - diff = endTimestamp - startTimestamp - endTimestamp = startTimestamp - startTimestamp = endTimestamp - diff - - ch_query = f""" SELECT COUNT(DISTINCT user_id) AS count - FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions - WHERE {" AND ".join(ch_sub_query)};""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, - **__get_constraint_values(args)} - - count = ch.execute(query=ch_query, parameters=params) - - count = count[0]["count"] - - results["progress"] = helper.__progress(old_val=count, new_val=results["value"]) - results["unit"] = schemas.TemplatePredefinedUnits.COUNT - return results diff --git a/api/chalicelib/core/metrics/product_analytics_ch.py b/api/chalicelib/core/metrics/product_analytics_ch.py index c4f429f08..0ea70ce7e 100644 --- a/api/chalicelib/core/metrics/product_analytics_ch.py +++ b/api/chalicelib/core/metrics/product_analytics_ch.py @@ -1,18 +1,14 @@ -from typing import List - -import schemas -from chalicelib.core.metrics.metrics_ch import __get_basic_constraints, __get_meta_constraint, \ - __get_basic_constraints_events -from chalicelib.core.metrics.metrics_ch import __get_constraint_values, __complete_missing_steps -from chalicelib.utils import ch_client, exp_ch_helper -from chalicelib.utils import helper, dev -from chalicelib.utils.TimeUTC import TimeUTC -from chalicelib.utils import sql_helper as sh -from chalicelib.core import metadata +import logging from time import time -import logging +import schemas +from chalicelib.core import metadata from chalicelib.core.metrics.product_analytics import __transform_journey +from chalicelib.utils import ch_client, exp_ch_helper +from chalicelib.utils import helper +from chalicelib.utils import sql_helper as sh +from chalicelib.utils.TimeUTC import TimeUTC +from chalicelib.utils.metrics_helper import get_step_size logger = logging.getLogger(__name__) @@ -24,6 +20,67 @@ JOURNEY_TYPES = { } +def __get_basic_constraints_events(table_name=None, identifier="project_id"): + if table_name: + table_name += "." + else: + table_name = "" + ch_sub_query = [f"{table_name}{identifier} =toUInt16(%({identifier})s)"] + ch_sub_query.append(f"{table_name}created_at >= toDateTime(%(startTimestamp)s/1000)") + ch_sub_query.append(f"{table_name}created_at < toDateTime(%(endTimestamp)s/1000)") + return ch_sub_query + + +def __frange(start, stop, step): + result = [] + i = start + while i < stop: + result.append(i) + i += step + return result + + +def __add_missing_keys(original, complete): + for missing in [key for key in complete.keys() if key not in original.keys()]: + original[missing] = complete[missing] + return original + + +def __complete_missing_steps(start_time, end_time, density, neutral, rows, time_key="timestamp", time_coefficient=1000): + if len(rows) == density: + return rows + step = get_step_size(start_time, end_time, density, decimal=True) + optimal = [(int(i * time_coefficient), int((i + step) * time_coefficient)) for i in + __frange(start_time // time_coefficient, end_time // time_coefficient, step)] + result = [] + r = 0 + o = 0 + for i in range(density): + neutral_clone = dict(neutral) + for k in neutral_clone.keys(): + if callable(neutral_clone[k]): + neutral_clone[k] = neutral_clone[k]() + if r < len(rows) and len(result) + len(rows) - r == density: + result += rows[r:] + break + if r < len(rows) and o < len(optimal) and rows[r][time_key] < optimal[o][0]: + # complete missing keys in original object + rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone) + result.append(rows[r]) + r += 1 + elif r < len(rows) and o < len(optimal) and optimal[o][0] <= rows[r][time_key] < optimal[o][1]: + # complete missing keys in original object + rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone) + result.append(rows[r]) + r += 1 + o += 1 + else: + neutral_clone[time_key] = optimal[o][0] + result.append(neutral_clone) + o += 1 + return result + + # startPoints are computed before ranked_events to reduce the number of window functions over rows # compute avg_time_from_previous at the same level as sessions_count (this was removed in v1.22) # if start-point is selected, the selected event is ranked n°1 @@ -50,8 +107,8 @@ def path_analysis(project_id: int, data: schemas.CardPathAnalysis): sub_events.append({"column": JOURNEY_TYPES[s.type]["column"], "eventType": JOURNEY_TYPES[s.type]["eventType"]}) step_1_post_conditions.append( - f"(`$event_name`='{JOURNEY_TYPES[s.type]["eventType"]}' AND event_number_in_session = 1 \ - OR `$event_name`!='{JOURNEY_TYPES[s.type]["eventType"]}' AND event_number_in_session > 1)") + f"(`$event_name`='{JOURNEY_TYPES[s.type]['eventType']}' AND event_number_in_session = 1 \ + OR `$event_name`!='{JOURNEY_TYPES[s.type]['eventType']}' AND event_number_in_session > 1)") extra_metric_values.append(s.type) if not q2_extra_col: # This is used in case start event has different type of the visible event, @@ -453,7 +510,7 @@ WITH pre_ranked_events AS (SELECT * FROM start_points INNER JOIN pre_ranked_events USING (session_id)) SELECT * FROM ranked_events -{q2_extra_condition if q2_extra_condition else ""}""" +{q2_extra_condition if q2_extra_condition else ""};""" logger.debug("---------Q2-----------") ch.execute(query=ch_query2, parameters=params) if time() - _now > 2: @@ -465,9 +522,9 @@ FROM ranked_events sub_cte = "" if data.hide_excess: sub_cte = f""", - top_n AS ({"\nUNION ALL\n".join(top_query)}), - top_n_with_next AS ({"\nUNION ALL\n".join(top_with_next_query)}), - others_n AS ({"\nUNION ALL\n".join(other_query)})""" + top_n AS ({" UNION ALL ".join(top_query)}), + top_n_with_next AS ({" UNION ALL ".join(top_with_next_query)}), + others_n AS ({" UNION ALL ".join(other_query)})""" projection_query = """\ -- Top to Top: valid SELECT top_n_with_next.* @@ -549,13 +606,13 @@ FROM ranked_events next_value, sessions_count FROM drop_n""") - projection_query = "\nUNION ALL\n".join(projection_query) + projection_query = " UNION ALL ".join(projection_query) ch_query3 = f"""\ WITH ranked_events AS (SELECT * FROM ranked_events_{time_key}), - {",\n".join(steps_query)}, - drop_n AS ({"\nUNION ALL\n".join(drop_query)}) + {", ".join(steps_query)}, + drop_n AS ({" UNION ALL ".join(drop_query)}) {sub_cte} SELECT event_number_in_session, `$event_name` AS event_type, diff --git a/api/chalicelib/core/sessions/sessions_ch.py b/api/chalicelib/core/sessions/sessions_ch.py index 916346a73..a4de2fc48 100644 --- a/api/chalicelib/core/sessions/sessions_ch.py +++ b/api/chalicelib/core/sessions/sessions_ch.py @@ -1,12 +1,8 @@ import logging from typing import List, Union -import logging -from typing import List, Union - import schemas from chalicelib.core import events, metadata -from chalicelib.core.metrics import metrics from chalicelib.core.sessions import performance_event, sessions_legacy from chalicelib.utils import pg_client, helper, metrics_helper, ch_client, exp_ch_helper from chalicelib.utils import sql_helper as sh @@ -17,8 +13,8 @@ logger = logging.getLogger(__name__) def search2_series(data: schemas.SessionsSearchPayloadSchema, project_id: int, density: int, metric_type: schemas.MetricType, metric_of: schemas.MetricOfTimeseries | schemas.MetricOfTable, metric_value: List): - step_size = int(metrics_helper.get_step_size(endTimestamp=data.endTimestamp, startTimestamp=data.startTimestamp, - density=density)) + step_size = metrics_helper.get_step_size(endTimestamp=data.endTimestamp, startTimestamp=data.startTimestamp, + density=density, factor=1) extra_event = None if metric_of == schemas.MetricOfTable.VISITED_URL: extra_event = f"""SELECT DISTINCT ev.session_id, ev.url_path @@ -38,25 +34,27 @@ def search2_series(data: schemas.SessionsSearchPayloadSchema, project_id: int, d with ch_client.ClickHouseClient() as cur: if metric_type == schemas.MetricType.TIMESERIES: if metric_of == schemas.MetricOfTimeseries.SESSION_COUNT: - query = f"""SELECT toUnixTimestamp( - toStartOfInterval(processed_sessions.datetime, INTERVAL %(step_size)s second) - ) * 1000 AS timestamp, - COUNT(processed_sessions.session_id) AS count - FROM (SELECT s.session_id AS session_id, - s.datetime AS datetime - {query_part}) AS processed_sessions + query = f"""SELECT gs.generate_series AS timestamp, + COALESCE(COUNT(DISTINCT processed_sessions.session_id),0) AS count + FROM generate_series(%(startDate)s, %(endDate)s, %(step_size)s) AS gs + LEFT JOIN (SELECT s.session_id AS session_id, + s.datetime AS datetime + {query_part}) AS processed_sessions ON(TRUE) + WHERE processed_sessions.datetime >= toDateTime(timestamp / 1000) + AND processed_sessions.datetime < toDateTime((timestamp + %(step_size)s) / 1000) GROUP BY timestamp ORDER BY timestamp;""" elif metric_of == schemas.MetricOfTimeseries.USER_COUNT: - query = f"""SELECT toUnixTimestamp( - toStartOfInterval(processed_sessions.datetime, INTERVAL %(step_size)s second) - ) * 1000 AS timestamp, - COUNT(DISTINCT processed_sessions.user_id) AS count - FROM (SELECT s.user_id AS user_id, - s.datetime AS datetime - {query_part} - WHERE isNotNull(s.user_id) - AND s.user_id != '') AS processed_sessions + query = f"""SELECT gs.generate_series AS timestamp, + COALESCE(COUNT(DISTINCT processed_sessions.user_id),0) AS count + FROM generate_series(%(startDate)s, %(endDate)s, %(step_size)s) AS gs + LEFT JOIN (SELECT s.user_id AS user_id, + s.datetime AS datetime + {query_part} + WHERE isNotNull(s.user_id) + AND s.user_id != '') AS processed_sessions ON(TRUE) + WHERE processed_sessions.datetime >= toDateTime(timestamp / 1000) + AND processed_sessions.datetime < toDateTime((timestamp + %(step_size)s) / 1000) GROUP BY timestamp ORDER BY timestamp;""" else: @@ -67,8 +65,6 @@ def search2_series(data: schemas.SessionsSearchPayloadSchema, project_id: int, d logging.debug(main_query) logging.debug("--------------------") sessions = cur.execute(main_query) - sessions = metrics.__complete_missing_steps(start_time=data.startTimestamp, end_time=data.endTimestamp, - density=density, neutral={"count": 0}, rows=sessions) elif metric_type == schemas.MetricType.TABLE: full_args["limit_s"] = 0 diff --git a/api/chalicelib/utils/metrics_helper.py b/api/chalicelib/utils/metrics_helper.py index b31363453..fa9bca6a6 100644 --- a/api/chalicelib/utils/metrics_helper.py +++ b/api/chalicelib/utils/metrics_helper.py @@ -4,4 +4,4 @@ def get_step_size(startTimestamp, endTimestamp, density, decimal=False, factor=1 return step_size if decimal: return step_size / density - return step_size // (density - 1) + return step_size // density diff --git a/api/schemas/schemas.py b/api/schemas/schemas.py index d6bd7338b..51d8660e1 100644 --- a/api/schemas/schemas.py +++ b/api/schemas/schemas.py @@ -870,19 +870,12 @@ class MetricType(str, Enum): TIMESERIES = "timeseries" TABLE = "table" FUNNEL = "funnel" - ERRORS = "errors" - PERFORMANCE = "performance" - RESOURCES = "resources" - WEB_VITAL = "webVitals" PATH_ANALYSIS = "pathAnalysis" RETENTION = "retention" STICKINESS = "stickiness" HEAT_MAP = "heatMap" -class MetricOfWebVitals(str, Enum): - AVG_VISITED_PAGES = "avgVisitedPages" - COUNT_USERS = "userCount" class MetricOfTable(str, Enum): @@ -1035,12 +1028,6 @@ class __CardSchema(CardSessionsSchema): # This is used to specify the number of top values for PathAnalysis rows: int = Field(default=3, ge=1, le=10) - @computed_field - @property - def is_predefined(self) -> bool: - return self.metric_type in [MetricType.ERRORS, MetricType.PERFORMANCE, - MetricType.RESOURCES, MetricType.WEB_VITAL] - class CardTimeSeries(__CardSchema): metric_type: Literal[MetricType.TIMESERIES] @@ -1110,23 +1097,6 @@ class CardFunnel(__CardSchema): return self -class CardWebVital(__CardSchema): - metric_type: Literal[MetricType.WEB_VITAL] - metric_of: MetricOfWebVitals = Field(default=MetricOfWebVitals.AVG_VISITED_PAGES) - view_type: MetricOtherViewType = Field(...) - - @model_validator(mode="before") - @classmethod - def __enforce_default(cls, values): - values["series"] = [] - return values - - @model_validator(mode="after") - def __transform(self): - self.metric_of = MetricOfWebVitals(self.metric_of) - return self - - class CardHeatMap(__CardSchema): metric_type: Literal[MetricType.HEAT_MAP] metric_of: MetricOfHeatMap = Field(default=MetricOfHeatMap.HEAT_MAP_URL) @@ -1216,8 +1186,7 @@ class CardPathAnalysis(__CardSchema): # Union of cards-schemas that doesn't change between FOSS and EE __cards_union_base = Union[ - CardTimeSeries, CardTable, CardFunnel, - CardWebVital, CardHeatMap, CardPathAnalysis] + CardTimeSeries, CardTable, CardFunnel, CardHeatMap, CardPathAnalysis] CardSchema = ORUnion(__cards_union_base, discriminator='metric_type') diff --git a/ee/api/.gitignore b/ee/api/.gitignore index e7c89b57d..ae8b4fca0 100644 --- a/ee/api/.gitignore +++ b/ee/api/.gitignore @@ -192,14 +192,11 @@ Pipfile.lock /chalicelib/core/canvas.py /chalicelib/core/collaborations/* /chalicelib/core/countries.py -/chalicelib/core/metrics/metrics.py /chalicelib/core/metrics/custom_metrics.py -/chalicelib/core/metrics/custom_metrics_predefined.py /chalicelib/core/metrics/dashboards.py /chalicelib/core/metrics/funnels.py /chalicelib/core/metrics/heatmaps.py /chalicelib/core/metrics/heatmaps_ch.py -/chalicelib/core/metrics/metrics_ch.py /chalicelib/core/metrics/product_analytics.py /chalicelib/core/metrics/product_analytics_ch.py /chalicelib/core/metrics/product_anaytics2.py diff --git a/ee/api/clean-dev.sh b/ee/api/clean-dev.sh index bc37582b4..ba6628328 100755 --- a/ee/api/clean-dev.sh +++ b/ee/api/clean-dev.sh @@ -12,14 +12,11 @@ rm -rf ./chalicelib/core/authorizers.py rm -rf ./chalicelib/core/autocomplete/autocomplete.py rm -rf ./chalicelib/core/collaborations rm -rf ./chalicelib/core/countries.py -rm -rf ./chalicelib/core/metrics/metrics.py rm -rf ./chalicelib/core/metrics/custom_metrics.py -rm -rf ./chalicelib/core/metrics/custom_metrics_predefined.py rm -rf ./chalicelib/core/metrics/funnels.py rm -rf ./chalicelib/core/metrics/dashboards.py rm -rf ./chalicelib/core/metrics/heatmaps.py rm -rf ./chalicelib/core/metrics/heatmaps_ch.py -rm -rf ./chalicelib/core/metrics/metrics_ch.py rm -rf ./chalicelib/core/metrics/product_analytics.py rm -rf ./chalicelib/core/metrics/product_analytics_ch.py rm -rf ./chalicelib/core/metrics/product_anaytics2.py diff --git a/ee/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql b/ee/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql index 785fd8478..d7ed6c5ba 100644 --- a/ee/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql +++ b/ee/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql @@ -38,7 +38,8 @@ FROM public.metrics WHERE metric_of IN ('domainsErrors4xx', 'domainsErrors5xx', 'countSessions', 'countRequests', 'errorsPerDomains', 'errorsPerType', 'impactedSessionsByJsErrors', 'resourcesByParty', 'userOs', - 'speedLocation'); + 'speedLocation', 'avgVisitedPages') + OR metric_type IN ('webVitals', 'errors', 'performance', 'resources'); COMMIT; diff --git a/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql b/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql index 8bd56a918..340929e68 100644 --- a/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql +++ b/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql @@ -38,7 +38,8 @@ FROM public.metrics WHERE metric_of IN ('domainsErrors4xx', 'domainsErrors5xx', 'countSessions', 'countRequests', 'errorsPerDomains', 'errorsPerType', 'impactedSessionsByJsErrors', 'resourcesByParty', 'userOs', - 'speedLocation'); + 'speedLocation', 'avgVisitedPages') + OR metric_type IN ('webVitals', 'errors', 'performance', 'resources'); COMMIT;