def run(self, *args, **kwargs) -> List[Dict[str, Any]]: with connection.cursor() as cursor: qstring = self._build_query(self._gen_lateral_bodies()).as_string( cursor.connection) cursor.execute(qstring) results = namedtuplefetchall(cursor) return self.data_to_return(results)
def query_sessions_in_range(team: Team, start_time: datetime.datetime, end_time: datetime.datetime, filter: SessionsFilter) -> List[dict]: filter_query, filter_params = "", {} if filter.recording_duration_filter: filter_query = f"AND duration {OPERATORS[filter.recording_duration_filter.operator]} INTERVAL '%(min_recording_duration)s seconds'" filter_params = { "min_recording_duration": filter.recording_duration_filter.value, } with connection.cursor() as cursor: cursor.execute( SESSIONS_IN_RANGE_QUERY.format(filter_query=filter_query), { "team_id": team.id, "start_time": start_time, "end_time": end_time, **filter_params, }, ) results = namedtuplefetchall(cursor) return [row._asdict() for row in results]
def _get_trends(self) -> List[Dict[str, Any]]: serialized: Dict[str, Any] = {"count": 0, "data": [], "days": [], "labels": []} with connection.cursor() as cursor: qstring = self._build_trends_query(self._filter).as_string(cursor.connection) cursor.execute(qstring) steps_at_dates = namedtuplefetchall(cursor) date_range = get_daterange( self._filter.date_from or steps_at_dates[0].date, self._filter.date_to, frequency=self._filter.interval ) data_array = [ {"date": step.date, "count": round(self._get_last_step_attr(step) / step.step_0_count * 100)} for step in steps_at_dates ] if self._filter.interval == "week": for df in data_array: df["date"] -= timedelta(days=df["date"].weekday() + 1) elif self._filter.interval == "month": for df in data_array: df["date"] = df["date"].replace(day=1) for df in data_array: df["date"] = df["date"].replace(tzinfo=pytz.utc).isoformat() datewise_data = {d["date"]: d["count"] for d in data_array} values = [(key, datewise_data.get(key.isoformat(), 0)) for key in date_range] for item in values: serialized["days"].append(item[0]) serialized["data"].append(item[1]) serialized["labels"].append(format_label_date(item[0], self._filter.interval)) return [serialized]
def _execute_sql(self, filter: RetentionFilter, team: Team,) -> Dict[Tuple[int, int], Dict[str, Any]]: format_fields, params = self._determine_query_params(filter, team) final_query = """ SELECT {fields} COUNT(DISTINCT "events"."person_id"), array_agg(DISTINCT "events"."person_id") as people FROM ({event_query}) events LEFT JOIN ({reference_event_query}) first_event_date ON (events.person_id = first_event_date.person_id) WHERE event_date >= first_date AND {target_condition} AND {return_condition} OR ({target_condition} AND event_date = first_date) GROUP BY date, first_date """.format( **format_fields ) with connection.cursor() as cursor: cursor.execute(final_query, params) data = namedtuplefetchall(cursor) by_dates = {} for row in data: by_dates.update({(int(row.first_date), int(row.date)): {"count": row.count}}) return by_dates
def run(self, *args, **kwargs) -> SessionRecordingQueryResult: with connection.cursor() as cursor: query, query_params = self._build_query() cursor.execute(query, query_params) query_results = namedtuplefetchall(cursor) session_recordings = self._data_to_return(query_results) return self._paginate_results(session_recordings)
def run(self, *args, **kwargs) -> List[Dict[str, Any]]: """ Builds and runs a query to get all persons that have been in the funnel steps defined by `self._filter.entities`. For example, entities may be defined as: 1. event with event name "user signed up" 2. event with event name "user looked at report" For a person to match they have to have gone through all `entities` in order. We also only return one such chain of entities, the earliest one we find. """ # If no steps are defined, then there's no point in querying the database if len(self._filter.entities) == 0: return [] if self._filter.display == TRENDS_LINEAR: return self._get_trends() with connection.cursor() as cursor: # Then we build a query to query for them in order qstring = self._build_query(within_time=None) cursor.execute(qstring) results = namedtuplefetchall(cursor) return self.data_to_return(results)
def run(self, *args, **kwargs) -> List[Dict[str, Any]]: if len(self._filter.entities) == 0: return [] if self._filter.display == TRENDS_LINEAR: return self._get_trends() with connection.cursor() as cursor: qstring = self._build_query(self._gen_lateral_bodies()).as_string(cursor.connection) cursor.execute(qstring) results = namedtuplefetchall(cursor) return self.data_to_return(results)
def fetch_sql(sql_: str, params: Tuple[Any, ...]) -> List[Any]: with connection.cursor() as cursor: cursor.execute(sql.SQL(sql_), params) return namedtuplefetchall(cursor)
def status_report() -> None: period_start, period_end = get_previous_week() report: Dict[str, Any] = { "posthog_version": VERSION, "period": { "start_inclusive": period_start.isoformat(), "end_inclusive": period_end.isoformat() }, } report["users_who_logged_in"] = [{ "id": user.id, "distinct_id": user.distinct_id } if user.anonymize_data else { "id": user.id, "distinct_id": user.distinct_id, "first_name": user.first_name, "email": user.email } for user in User.objects.filter(last_login__gte=period_start)] report["teams"] = {} for team in Team.objects.all(): team_report: Dict[str, Any] = {} events_considered_total = Event.objects.filter(team_id=team.id) events_considered_new_in_period = events_considered_total.filter( created_at__gte=period_start, created_at__lte=period_end, ) persons_considered_total = Event.objects.filter(team_id=team.id) persons_considered_total_new_in_period = persons_considered_total.filter( created_at__gte=period_start, created_at__lte=period_end, ) team_report["events_count_total"] = events_considered_total.count() team_report[ "events_count_new_in_period"] = events_considered_new_in_period.count( ) team_report["persons_count_total"] = persons_considered_total.count() team_report[ "persons_count_new_in_period"] = persons_considered_total_new_in_period.count( ) with connection.cursor() as cursor: cursor.execute( sql.SQL(""" SELECT COUNT(DISTINCT person_id) as persons_count FROM posthog_event JOIN posthog_persondistinctid ON (posthog_event.distinct_id = posthog_persondistinctid.distinct_id) WHERE posthog_event.team_id = %s AND posthog_event.created_at >= %s AND posthog_event.created_at < %s """), (team.id, report["period"]["start_inclusive"], report["period"]["end_exclusive"]), ) team_report["persons_count_active_in_period"] = cursor.fetchone( )[0] cursor.execute( sql.SQL(""" SELECT properties->>'$lib' as lib, COUNT(*) as count FROM posthog_event WHERE team_id = %s AND created_at >= %s AND created_at < %s GROUP BY lib """), (team.id, report["period"]["start_inclusive"], report["period"]["end_exclusive"]), ) team_report["events_count_by_lib"] = { result.lib: result.count for result in namedtuplefetchall(cursor) } cursor.execute( sql.SQL(""" SELECT event as name, COUNT(*) as count FROM posthog_event WHERE team_id = %s AND created_at >= %s AND created_at < %s GROUP BY name """), (team.id, report["period"]["start_inclusive"], report["period"]["end_exclusive"]), ) team_report["events_count_by_name"] = { result.name: result.count for result in namedtuplefetchall(cursor) } report["teams"][team.id] = team_report posthoganalytics.api_key = "sTMFPsFhdP1Ssg" disabled = posthoganalytics.disabled posthoganalytics.disabled = False posthoganalytics.capture(get_machine_id(), "instance status report", report) posthoganalytics.disabled = disabled
def _execute_sql( self, filter: RetentionFilter, team: Team, ) -> Dict[Tuple[int, int], Dict[str, Any]]: period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME events: QuerySet = QuerySet() entity_condition, entity_condition_strigified = self.get_entity_condition( filter.target_entity, "first_event_date") returning_condition, returning_condition_stringified = self.get_entity_condition( filter.returning_entity, "events") events = Event.objects.filter(team_id=team.pk).add_person_id( team.pk).annotate(event_date=F("timestamp")) trunc, fields = self._get_trunc_func("timestamp", period) if is_first_time_retention: filtered_events = events.filter( filter.properties_to_Q(team_id=team.pk)) first_date = (filtered_events.filter(entity_condition).values( "person_id", "event", "action").annotate(first_date=Min(trunc)).filter( filter.custom_date_filter_Q("first_date")).distinct()) final_query = (filtered_events.filter( filter.date_filter_Q).filter(returning_condition).values_list( "person_id", "event_date", "event", "action").union( first_date.values_list("first_date", "person_id", "event", "action"))) else: filtered_events = events.filter(filter.date_filter_Q).filter( filter.properties_to_Q(team_id=team.pk)) first_date = (filtered_events.filter(entity_condition).annotate( first_date=trunc).values("first_date", "person_id", "event", "action").distinct()) final_query = ( filtered_events.filter(returning_condition).values_list( "person_id", "event_date", "event", "action").union( first_date.values_list("first_date", "person_id", "event", "action"))) event_query, events_query_params = final_query.query.sql_with_params() reference_event_query, first_date_params = first_date.query.sql_with_params( ) final_query = """ SELECT {fields} COUNT(DISTINCT "events"."person_id"), array_agg(DISTINCT "events"."person_id") as people FROM ({event_query}) events LEFT JOIN ({reference_event_query}) first_event_date ON (events.person_id = first_event_date.person_id) WHERE event_date >= first_date AND {target_condition} AND {return_condition} OR ({target_condition} AND event_date = first_date) GROUP BY date, first_date """.format( event_query=event_query, reference_event_query=reference_event_query, fields=fields, return_condition=returning_condition_stringified, target_condition=entity_condition_strigified, ) event_params = (filter.target_entity.id, filter.returning_entity.id, filter.target_entity.id) start_params = ((filter.date_from, filter.date_from) if period == "Month" or period == "Hour" else (filter.date_from, )) with connection.cursor() as cursor: cursor.execute( final_query, start_params + events_query_params + first_date_params + event_params, ) data = namedtuplefetchall(cursor) scores: dict = {} for datum in data: key = round(datum.first_date, 1) if not scores.get(key, None): scores.update({key: {}}) for person in datum.people: if not scores[key].get(person, None): scores[key].update({person: 1}) else: scores[key][person] += 1 by_dates = {} for row in data: people = sorted( row.people, key=lambda p: scores[round(row.first_date, 1)][int(p)], reverse=True, ) random_key = "".join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) cache_key = generate_cache_key("{}{}{}".format( random_key, str(round(row.first_date, 0)), str(team.pk))) cache.set( cache_key, people, 600, ) by_dates.update({ (int(row.first_date), int(row.date)): { "count": row.count, "people": people[0:100], "offset": 100, "next": cache_key if len(people) > 100 else None, } }) return by_dates