def calculate_paths(self, filter: PathFilter, team: Team): parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk) event, path_type, start_comparator = self._determine_path_type( filter.path_type if filter else None) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts) # Step 0. Event culling subexpression for step 1. # Make an expression that removes events in a session that are definitely unused. # For example the 4th, 5th, etc row after a "new_session = 1" or "marked_session_start = 1" row gets removed excess_row_filter = "(" for i in range(4): if i > 0: excess_row_filter += " or " excess_row_filter += "neighbor(new_session, {}, 0) = 1".format(-i) if filter and filter.start_point: excess_row_filter += " or neighbor(marked_session_start, {}, 0) = 1".format( -i) excess_row_filter += ")" paths_query = PATHS_QUERY_FINAL.format( event_query="event = %(event)s" if event else "event NOT IN ('$autocapture', '$pageview', '$identify', '$pageleave', '$screen')", path_type=path_type, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, marked_session_start="{} = %(start_point)s".format( start_comparator) if filter and filter.start_point else "new_session", excess_row_filter=excess_row_filter, select_elements_chain=", events.elements_chain as elements_chain" if event == AUTOCAPTURE_EVENT else "", group_by_elements_chain=", events.elements_chain" if event == AUTOCAPTURE_EVENT else "", ) params: Dict = { "team_id": team.pk, "property": "$current_url", "event": event, "start_point": filter.start_point, } params = {**params, **prop_filter_params} rows = sync_execute(paths_query, params) resp: List[Dict[str, str]] = [] for row in rows: resp.append({ "source": row[0], "source_id": row[1], "target": row[2], "target_id": row[3], "value": row[4], }) resp = sorted(resp, key=lambda x: x["value"], reverse=True) return resp
def list(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = self.team filter = Filter(request=request) if request.GET.get("after"): filter._date_from = request.GET["after"] if request.GET.get("before"): filter._date_to = request.GET["before"] limit = "LIMIT 101" conditions, condition_params = determine_event_conditions( request.GET.dict()) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) if request.GET.get("action_id"): action = Action.objects.get(pk=request.GET["action_id"]) if action.steps.count() == 0: return Response({"next": False, "results": []}) action_query, params = format_action_filter(action) prop_filters += " AND {}".format(action_query) prop_filter_params = {**prop_filter_params, **params} if prop_filters != "": query_result = sync_execute( SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions, limit=limit, filters=prop_filters), { "team_id": team.pk, **condition_params, **prop_filter_params }, ) else: query_result = sync_execute( SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions, limit=limit), { "team_id": team.pk, **condition_params }, ) result = ClickhouseEventSerializer( query_result[0:100], many=True, context={ "people": self._get_people(query_result, team), }, ).data if len(query_result) > 100: path = request.get_full_path() reverse = request.GET.get("orderBy", "-timestamp") != "-timestamp" next_url: Optional[str] = request.build_absolute_uri( "{}{}{}={}".format( path, "&" if "?" in path else "?", "after" if reverse else "before", query_result[99][3].strftime("%Y-%m-%dT%H:%M:%S.%fZ"), )) else: next_url = None return Response({"next": next_url, "results": result})
def _serialize_lifecycle(self, entity: Entity, filter: Filter, team_id: int) -> List[Dict[str, Any]]: date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval = get_time_diff( interval, filter.date_from, filter.date_to, team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id) _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = Action.objects.get(pk=entity.id) event_query, event_params = format_action_filter(action) except: return [] else: event_query = "event = %(event)s" event_params = {"event": entity.id} result = sync_execute( LIFECYCLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, }, ) res = [] for val in result: label = "{} - {}".format(entity.name, val[2]) additional_values = {"label": label, "status": val[2]} parsed_result = parse_response(val, filter, additional_values) res.append(parsed_result) return res
def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team): period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME trunc_func = get_trunc_func_ch(period) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) returning_entity = filter.returning_entity if filter.selected_interval > 0 else filter.target_entity target_query, target_params = self._get_condition(filter.target_entity, table="e") target_query_formatted = "AND {target_query}".format( target_query=target_query) return_query, return_params = self._get_condition(returning_entity, table="e", prepend="returning") return_query_formatted = "AND {return_query}".format( return_query=return_query) first_event_sql = (REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL if is_first_time_retention else REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) default_event_query = ( DEFAULT_REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL if is_first_time_retention else DEFAULT_REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) date_from = filter.date_from + filter.selected_interval * filter.period_increment date_to = filter.date_to new_data = filter._data new_data.update({ "total_intervals": filter.total_intervals - filter.selected_interval }) filter = RetentionFilter(data=new_data) query_result = sync_execute( RETENTION_PEOPLE_PER_PERIOD_SQL.format( returning_query=return_query_formatted, filters=prop_filters, first_event_sql=first_event_sql, first_event_default_sql=default_event_query, trunc_func=trunc_func, ), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "offset": filter.offset, "limit": 100, "period": period, **target_params, **return_params, **prop_filter_params, }, ) people_dict = {} from posthog.api.person import PersonSerializer for person in Person.objects.filter( team_id=team.pk, uuid__in=[val[0] for val in query_result]): people_dict.update( {str(person.uuid): PersonSerializer(person).data}) result = self.process_people_in_period(filter, query_result, people_dict) return result
def execute_query(query: str) -> Any: from ee.clickhouse.client import sync_execute return sync_execute(query)
def get_property_values_for_key(key: str, team: Team, value: Optional[str] = None): if value: return sync_execute( SELECT_PROP_VALUES_SQL_WITH_FILTER, {"team_id": team.pk, "key": key, "value": "%{}%".format(value)} ) return sync_execute(SELECT_PROP_VALUES_SQL, {"team_id": team.pk, "key": key})
def run(self, *args, **kwargs) -> SessionRecordingQueryResult: query, query_params = self.get_query() query_results = sync_execute(query, query_params) session_recordings = self._data_to_return(query_results) return self._paginate_results(session_recordings)
def get_persons(team_id: int): result = sync_execute(GET_PERSON_SQL, {"team_id": team_id}) return ClickhousePersonSerializer(result, many=True).data
def get_person_distinct_ids(team_id: int): result = sync_execute(GET_DISTINCT_IDS_SQL, {"team_id": team_id}) return ClickhousePersonDistinctIdSerializer(result, many=True).data
def _exec_query(self) -> List[Tuple]: query = self.get_query() return sync_execute(query, self.params)
def distinct_ids_exist(team_id: int, ids: List[str]) -> bool: return bool( sync_execute( PERSON_DISTINCT_ID_EXISTS_SQL.format([str(id) for id in ids]), {"team_id": team_id})[0][0])
def _team_ids(self): return list( sorted(row[0] for row in sync_execute( "SELECT DISTINCT team_id FROM person_distinct_id")))
def _get_people(): return [Person(p) for p in sync_execute("select * from person")]
def _get_events(): return sync_execute("select * from events")
def perform_query(self): sql = self._configure_sql() results = sync_execute(sql, self.params) summary = self._summarize_data(results) return summary
def update_person_properties(team_id: int, id: str, properties: Dict) -> None: sync_execute(UPDATE_PERSON_PROPERTIES, { "team_id": team_id, "id": id, "properties": json.dumps(properties) })
def _run_formula_query(self, filter: Filter, team_id: int): letters = [chr(65 + i) for i in range(0, len(filter.entities))] queries = [] params: Dict[str, Any] = {} for idx, entity in enumerate(filter.entities): sql, entity_params, _ = self._get_sql_for_entity( filter, entity, team_id) # type: ignore sql = sql.replace("%(", "%({}_".format(idx)) entity_params = { "{}_{}".format(idx, key): value for key, value in entity_params.items() } queries.append(sql) params = {**params, **entity_params} breakdown_value = (", sub_A.breakdown_value" if filter.breakdown_type == "cohort" else ", trim(BOTH '\"' FROM sub_A.breakdown_value)") is_aggregate = filter.display in [TRENDS_TABLE, TRENDS_PIE] sql = """SELECT {date_select} arrayMap(({letters_select}) -> {formula}, {selects}) {breakdown_value} FROM ({first_query}) as sub_A {queries} """.format( date_select="'' as date," if is_aggregate else "sub_A.date,", letters_select=", ".join(letters), formula=filter. formula, # formula is properly escaped in the filter # Need to wrap aggregates in arrays so we can still use arrayMap selects=", ".join([ ("[sub_{}.data]" if is_aggregate else "sub_{}.data").format( letters[i]) for i in range(0, len(filter.entities)) ]), breakdown_value=breakdown_value if filter.breakdown else "", first_query=queries[0], queries="".join([ "FULL OUTER JOIN ({query}) as sub_{letter} ON sub_A.breakdown_value = sub_{letter}.breakdown_value " .format(query=query, letter=letters[i + 1]) for i, query in enumerate(queries[1:]) ]) if filter.breakdown else "".join([ " CROSS JOIN ({}) as sub_{}".format(query, letters[i + 1]) for i, query in enumerate(queries[1:]) ]), ) result = sync_execute(sql, params) response = [] for item in result: additional_values: Dict[str, Any] = { "label": self._label(filter, item, team_id), } if is_aggregate: additional_values["data"] = [] additional_values["aggregated_value"] = item[1][0] else: additional_values["data"] = [ round(number, 2) if not math.isnan(number) and not math.isinf(number) else 0.0 for number in item[1] ] if filter.display == TRENDS_CUMULATIVE: additional_values["data"] = list( accumulate(additional_values["data"])) additional_values["count"] = float(sum(additional_values["data"])) response.append(parse_response(item, filter, additional_values)) return response
def get_events(): events = sync_execute(GET_EVENTS_SQL) return ClickhouseEventSerializer(events, many=True, context={"elements": None, "people": None}).data
def test_person_properties_filter(self): filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [ { "id": "viewed", "order": 0 }, ], "properties": [ { "key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person" }, { "key": "key", "value": "val" }, ], }) entity = Entity({"id": "viewed", "type": "events"}) global_prop_query, global_prop_query_params = TrendsEventQuery( filter=filter, entity=entity, team_id=self.team.pk).get_query() sync_execute(global_prop_query, global_prop_query_params) filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [ { "id": "viewed", "order": 0 }, ], }) entity = Entity({ "id": "viewed", "type": "events", "properties": [ { "key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person" }, { "key": "key", "value": "val" }, ], }) entity_prop_query, entity_prop_query_params = TrendsEventQuery( filter=filter, entity=entity, team_id=self.team.pk).get_query() # global queries and enttiy queries should be the same self.assertEqual(sqlparse.format(global_prop_query, reindent=True), sqlparse.format(entity_prop_query, reindent=True)) sync_execute(entity_prop_query, entity_prop_query_params)
def get_events_by_team(team_id: Union[str, int]): events = sync_execute(GET_EVENTS_BY_TEAM_SQL, {"team_id": str(team_id)}) return ClickhouseEventSerializer(events, many=True, context={"elements": None, "people": None}).data
def _retrieve_people(self, filter: RetentionFilter, team: Team): period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME trunc_func = get_trunc_func_ch(period) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) returning_entity = filter.returning_entity if filter.selected_interval > 0 else filter.target_entity target_query, target_params = self._get_condition(filter.target_entity, table="e") target_query_formatted = "AND {target_query}".format( target_query=target_query) return_query, return_params = self._get_condition(returning_entity, table="e", prepend="returning") return_query_formatted = "AND {return_query}".format( return_query=return_query) reference_event_query = (REFERENCE_EVENT_UNIQUE_SQL if is_first_time_retention else REFERENCE_EVENT_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) reference_date_from = filter.date_from reference_date_to = filter.date_from + filter.period_increment date_from = filter.date_from + filter.selected_interval * filter.period_increment date_to = date_from + filter.period_increment result = sync_execute( RETENTION_PEOPLE_SQL.format( reference_event_query=reference_event_query, target_query=return_query_formatted, filters=prop_filters), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_start_date": reference_date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_end_date": reference_date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "offset": filter.offset, **target_params, **return_params, **prop_filter_params, }, ) people = Person.objects.filter(team_id=team.pk, uuid__in=[val[0] for val in result]) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def _format_breakdown_query(self, entity: Entity, filter: Filter, team: Team) -> List[Dict[str, Any]]: params = {"team_id": team.pk} interval_annotation = get_interval_annotation_ch(filter.interval) num_intervals, seconds_in_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to) parsed_date_from, parsed_date_to = parse_timestamps(filter=filter) props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( "uuid", props_to_filter, team) aggregate_operation, join_condition, math_params = self._process_math( entity) action_query = "" action_params: Dict = {} if entity.type == TREND_FILTER_TYPE_ACTIONS: action = Action.objects.get(pk=entity.id) action_query, action_params = format_action_filter(action) null_sql = NULL_BREAKDOWN_SQL.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=((filter.date_to or timezone.now())).strftime("%Y-%m-%d %H:%M:%S"), ) params = {**params, **math_params, **prop_filter_params} top_elements_array = [] if filter.breakdown_type == "cohort": breakdown = filter.breakdown if filter.breakdown and isinstance( filter.breakdown, list) else [] if "all" in breakdown: params = {**params, "event": entity.id, **action_params} null_sql = NULL_SQL.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=((filter.date_to or timezone.now())).strftime("%Y-%m-%d %H:%M:%S"), ) conditions = BREAKDOWN_CONDITIONS_SQL.format( parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, actions_query="AND uuid IN ({})".format(action_query) if action_query else "", event_filter="AND event = %(event)s" if not action_query else "", filters="{filters}".format( filters=prop_filters) if props_to_filter else "", ) breakdown_query = BREAKDOWN_DEFAULT_SQL.format( null_sql=null_sql, conditions=conditions, event_join=join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, ) else: cohort_queries, cohort_ids, cohort_params = self._format_breakdown_cohort_join_query( breakdown, team) params = { **params, "values": cohort_ids, "event": entity.id, **action_params, **cohort_params } breakdown_filter = BREAKDOWN_COHORT_JOIN_SQL.format( cohort_queries=cohort_queries, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, actions_query="AND uuid IN ({})".format(action_query) if action_query else "", event_filter="AND event = %(event)s" if not action_query else "", filters="{filters}".format( filters=prop_filters) if props_to_filter else "", ) breakdown_query = BREAKDOWN_QUERY_SQL.format( null_sql=null_sql, breakdown_filter=breakdown_filter, event_join=join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, ) elif filter.breakdown_type == "person": top_elements_array = self._get_top_elements( TOP_PERSON_PROPS_ARRAY_OF_KEY_SQL, filter, parsed_date_from, parsed_date_to, team) params = { **params, "values": top_elements_array, "key": filter.breakdown, "event": entity.id, **action_params, } breakdown_filter = BREAKDOWN_PERSON_PROP_JOIN_SQL.format( parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, actions_query="AND uuid IN ({})".format(action_query) if action_query else "", event_filter="AND event = %(event)s" if not action_query else "", ) breakdown_query = BREAKDOWN_QUERY_SQL.format( null_sql=null_sql, breakdown_filter=breakdown_filter, event_join=join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, ) else: top_elements_array = self._get_top_elements( TOP_ELEMENTS_ARRAY_OF_KEY_SQL, filter, parsed_date_from, parsed_date_to, team) params = { **params, "values": top_elements_array, "key": filter.breakdown, "event": entity.id, **action_params, } breakdown_filter = BREAKDOWN_PROP_JOIN_SQL.format( parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, actions_query="AND uuid IN ({})".format(action_query) if action_query else "", event_filter="AND event = %(event)s" if not action_query else "", filters="{filters}".format( filters=prop_filters) if props_to_filter else "", ) breakdown_query = BREAKDOWN_QUERY_SQL.format( null_sql=null_sql, breakdown_filter=breakdown_filter, event_join=join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, ) try: result = sync_execute(breakdown_query, params) except: result = [] parsed_results = [] for idx, stats in enumerate(result): breakdown_value = stats[ 2] if not filter.breakdown_type == "cohort" else "" stripped_value = breakdown_value.strip('"') if isinstance( breakdown_value, str) else breakdown_value extra_label = self._determine_breakdown_label( idx, filter.breakdown_type, filter.breakdown, stripped_value) label = "{} - {}".format(entity.name, extra_label) additional_values = { "label": label, "breakdown_value": filter.breakdown[idx] if isinstance(filter.breakdown, list) else filter.breakdown if filter.breakdown_type == "cohort" else stripped_value, } parsed_result = self._parse_response(stats, filter, additional_values) parsed_results.append(parsed_result) return parsed_results
def _execute_sql( self, filter: RetentionFilter, team: Team, ) -> Dict[Tuple[int, int], Dict[str, Any]]: period = filter.period prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) target_entity = filter.target_entity returning_entity = filter.returning_entity is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME date_from = filter.date_from date_to = filter.date_to target_query = "" target_params: Dict = {} trunc_func = get_trunc_func_ch(period) target_query, target_params = self._get_condition(target_entity, table="e") returning_query, returning_params = self._get_condition( returning_entity, table="e", prepend="returning") target_query_formatted = "AND {target_query}".format( target_query=target_query) returning_query_formatted = "AND {returning_query}".format( returning_query=returning_query) reference_event_sql = (REFERENCE_EVENT_UNIQUE_SQL if is_first_time_retention else REFERENCE_EVENT_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) target_condition, _ = self._get_condition(target_entity, table="reference_event") if is_first_time_retention: target_condition = target_condition.replace( "reference_event.uuid", "reference_event.min_uuid") target_condition = target_condition.replace( "reference_event.event", "reference_event.min_event") returning_condition, _ = self._get_condition(returning_entity, table="event", prepend="returning") result = sync_execute( RETENTION_SQL.format( target_query=target_query_formatted, returning_query=returning_query_formatted, filters=prop_filters, trunc_func=trunc_func, extra_union="UNION ALL {} ".format(reference_event_sql), reference_event_sql=reference_event_sql, target_condition=target_condition, returning_condition=returning_condition, ), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_end_date": ((date_from + filter.period_increment) if filter.display == TRENDS_LINEAR else date_to).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), **prop_filter_params, **target_params, **returning_params, "period": period, }, ) initial_interval_result = sync_execute( INITIAL_INTERVAL_SQL.format( reference_event_sql=reference_event_sql), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_end_date": ((date_from + filter.period_increment) if filter.display == TRENDS_LINEAR else date_to).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), **prop_filter_params, **target_params, **returning_params, "period": period, }, ) result_dict = {} for initial_res in initial_interval_result: result_dict.update({ (initial_res[0], 0): { "count": initial_res[1], "people": [] } }) for res in result: result_dict.update({ (res[0], res[1]): { "count": res[2], "people": [] } }) return result_dict
def _format_normal_query(self, entity: Entity, filter: Filter, team: Team) -> List[Dict[str, Any]]: interval_annotation = get_interval_annotation_ch(filter.interval) num_intervals, seconds_in_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to) parsed_date_from, parsed_date_to = parse_timestamps(filter=filter) props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( "uuid", props_to_filter, team) aggregate_operation, join_condition, math_params = self._process_math( entity) params: Dict = {"team_id": team.pk} params = {**params, **prop_filter_params, **math_params} if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = Action.objects.get(pk=entity.id) action_query, action_params = format_action_filter(action) params = {**params, **action_params} content_sql = VOLUME_ACTIONS_SQL.format( interval=interval_annotation, timestamp="timestamp", team_id=team.pk, actions_query=action_query, parsed_date_from=(parsed_date_from or ""), parsed_date_to=(parsed_date_to or ""), filters="{filters}".format( filters=prop_filters) if props_to_filter else "", event_join=join_condition, aggregate_operation=aggregate_operation, ) except: return [] else: content_sql = VOLUME_SQL.format( interval=interval_annotation, timestamp="timestamp", team_id=team.pk, parsed_date_from=(parsed_date_from or ""), parsed_date_to=(parsed_date_to or ""), filters="{filters}".format( filters=prop_filters) if props_to_filter else "", event_join=join_condition, aggregate_operation=aggregate_operation, ) params = {**params, "event": entity.id} null_sql = NULL_SQL.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=((filter.date_to or timezone.now())).strftime("%Y-%m-%d %H:%M:%S"), ) final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql) try: result = sync_execute(final_query, params) except: result = [] parsed_results = [] for _, stats in enumerate(result): parsed_result = self._parse_response(stats, filter) parsed_results.append(parsed_result) return parsed_results
def _format_normal_query(self, entity: Entity, filter: Filter, team_id: int) -> List[Dict[str, Any]]: interval_annotation = get_trunc_func_ch(filter.interval) num_intervals, seconds_in_interval = get_time_diff(filter.interval or "day", filter.date_from, filter.date_to, team_id=team_id) parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team_id) props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id) aggregate_operation, join_condition, math_params = process_math(entity) params: Dict = {"team_id": team_id} params = {**params, **prop_filter_params, **math_params} content_sql_params = { "interval": interval_annotation, "timestamp": "timestamp", "team_id": team_id, "parsed_date_from": parsed_date_from, "parsed_date_to": parsed_date_to, "filters": prop_filters, "event_join": join_condition, "aggregate_operation": aggregate_operation, } entity_params, entity_format_params = self._populate_entity_params( entity) params = {**params, **entity_params} content_sql_params = {**content_sql_params, **entity_format_params} if filter.display == TRENDS_TABLE or filter.display == TRENDS_PIE: agg_query = self._determine_single_aggregate_query(filter, entity) content_sql = agg_query.format(**content_sql_params) try: result = sync_execute(content_sql, params) except: result = [] return [{ "aggregated_value": result[0][0] if result and len(result) else 0 }] else: content_sql = self._determine_trend_aggregate_query(filter, entity) content_sql = content_sql.format(**content_sql_params) null_sql = NULL_SQL.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"), ) final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql) try: result = sync_execute(final_query, params) except: result = [] parsed_results = [] for _, stats in enumerate(result): parsed_result = parse_response(stats, filter) parsed_results.append(parsed_result) return parsed_results
def get_properties(self, request: Request): rows = sync_execute(GET_PERSON_PROPERTIES_COUNT, {"team_id": self.team.pk}) return [{"name": name, "count": count} for name, count in rows]
def get_people( self, filter: Filter, team_id: int, target_date: datetime, lifecycle_type: str, limit: int = 100, ): entity = filter.entities[0] date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval = get_time_diff(interval, filter.date_from, filter.date_to, team_id=team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = Action.objects.get(pk=entity.id) event_query, event_params = format_action_filter(action) except: return [] else: event_query = "event = %(event)s" event_params = {"event": entity.id} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id) result = sync_execute( LIFECYCLE_PEOPLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, "status": lifecycle_type, "target_date": target_date.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "offset": filter.offset, "limit": limit, }, ) people = get_persons_by_uuids(team_id=team_id, uuids=[p[0] for p in result]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def test_create_trends_cohort(self, _insert_cohort_from_query): _create_person(team_id=self.team.pk, distinct_ids=["blabla"]) with freeze_time("2021-01-01 00:06:34"): _create_event( team=self.team, event="$pageview", distinct_id="blabla", properties={"$math_prop": 1}, timestamp="2021-01-01T12:00:00Z", ) with freeze_time("2021-01-02 00:06:34"): _create_event( team=self.team, event="$pageview", distinct_id="blabla", properties={"$math_prop": 4}, timestamp="2021-01-01T12:00:00Z", ) response = self.client.post( f"/api/projects/{self.team.id}/cohorts/?interval=day&display=ActionsLineGraph&events=%5B%7B%22id%22%3A%22%24pageview%22%2C%22name%22%3A%22%24pageview%22%2C%22type%22%3A%22events%22%2C%22order%22%3A0%7D%5D&properties=%5B%5D&entity_id=%24pageview&entity_type=events&date_from=2021-01-01&date_to=2021-01-01&label=%24pageview", {"name": "test", "is_static": True}, ).json() cohort_id = response["id"] _insert_cohort_from_query.assert_called_once_with( cohort_id, "TRENDS", { "date_from": "2021-01-01", "date_to": "2021-01-01", "display": "ActionsLineGraph", "events": [ { "id": "$pageview", "type": "events", "order": 0, "name": "$pageview", "custom_name": None, "math": None, "math_property": None, "math_group_type_index": None, "properties": [], } ], "entity_id": "$pageview", "entity_type": "events", "insight": "TRENDS", "interval": "day", }, entity_data={ "id": "$pageview", "type": "events", "order": None, "name": "$pageview", "custom_name": None, "math": None, "math_property": None, "math_group_type_index": None, "properties": [], }, ) insert_cohort_from_query( cohort_id, "TRENDS", { "date_from": "2021-01-01", "date_to": "2021-01-01", "display": "ActionsLineGraph", "events": [ { "id": "$pageview", "type": "events", "order": 0, "name": "$pageview", "math": None, "math_property": None, "math_group_type_index": None, "properties": [], } ], "entity_id": "$pageview", "entity_type": "events", "insight": "TRENDS", "interval": "day", }, entity_data={ "id": "$pageview", "type": "events", "order": 0, "name": "$pageview", "math": None, "math_property": None, "math_group_type_index": None, "properties": [], }, ) cohort = Cohort.objects.get(pk=cohort_id) people = Person.objects.filter(cohort__id=cohort.pk) self.assertEqual(cohort.errors_calculating, 0) self.assertEqual( len(people), 1, { "a": sync_execute( "select person_id from person_static_cohort where team_id = {} and cohort_id = {} ".format( self.team.id, cohort.pk ) ), "b": sync_execute( "select person_id from person_static_cohort FINAL where team_id = {} and cohort_id = {} ".format( self.team.id, cohort.pk ) ), }, )
def test_prop_cohort_basic_event_days(self): _create_person(distinct_ids=["some_other_id"], team_id=self.team.pk, properties={"$some_prop": "something"}) _create_person( distinct_ids=["some_id"], team_id=self.team.pk, properties={ "$some_prop": "something", "$another_prop": "something" }, ) _create_event( event="$pageview", team=self.team, distinct_id="some_id", properties={"attr": "some_val"}, timestamp=datetime(2020, 1, 9, 12, 0, 1), ) _create_event( event="$pageview", team=self.team, distinct_id="some_other_id", properties={"attr": "some_val"}, timestamp=datetime(2020, 1, 5, 12, 0, 1), ) with freeze_time("2020-01-10"): cohort1 = Cohort.objects.create( team=self.team, groups=[{ "event_id": "$pageview", "days": 1 }], name="cohort1", ) filter = Filter( data={ "properties": [{ "key": "id", "value": cohort1.pk, "type": "cohort" }], }) query, params = parse_prop_clauses(filter.properties, self.team.pk) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format( query) result = sync_execute(final_query, { **params, "team_id": self.team.pk }) self.assertEqual(len(result), 1) cohort2 = Cohort.objects.create( team=self.team, groups=[{ "event_id": "$pageview", "days": 7 }], name="cohort2", ) filter = Filter( data={ "properties": [{ "key": "id", "value": cohort2.pk, "type": "cohort" }], }) query, params = parse_prop_clauses(filter.properties, self.team.pk) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format( query) result = sync_execute(final_query, { **params, "team_id": self.team.pk }) self.assertEqual(len(result), 2)
def get_elements(event_id: Union[int, UUID]) -> List[Element]: return chain_to_elements( sync_execute("select elements_chain from events where uuid = %(id)s", {"id": event_id})[0][0] )