def find_trending_event_type( self, mysql: MySQLHelper, from_date: Optional[datetime], to_date: Optional[datetime], days_back: Optional[int] = None) -> List[TrendingFollowEvent]: """Find users that have both followed each other or unfollowed each other :param from_date: from_date to query from :param to_date: to_date to query :param days_back: If given, ignore from_date/to_date """ ts_filter, params = self.build_ts_filter(from_date, to_date, days_back) sql = """ select dst_user_name, follow_type_id, min(ts) as first_ts, max(ts) as last_ts, count(*) cnt, group_concat(src_user_name) as users from follow_events where {ts_filter} group by dst_user_name, follow_type_id having count(*) > 1 order by cnt desc; """.format(ts_filter=ts_filter) trending_events_records = mysql.query(sql, params) events = [ TrendingFollowEvent(row.users.split(','), row.dst_user_name, row.first_ts, row.last_ts, row.follow_type_id, row.cnt) for row in trending_events_records ] return events
def find_mutual_event_type( self, mysql: MySQLHelper, from_date: Optional[datetime], to_date: Optional[datetime], mutual_event_timeframe_days: int, days_back: Optional[int] = None) -> Set[MutualFollowEvent]: """Find users that have both followed each other or unfollowed each other :param from_date: from_date to query from :param to_date: to_date to query :param days_back: If given, ignore from_date/to_date :param mutual_event_timeframe_days: Maximum amount of days for it to be considered a mutual event type For example, mutual unfollow is only if they have both unfollowed each other in the past 2 days. """ ts_filter_1, params_1 = self.build_ts_filter(from_date, to_date, days_back, ts_col="fe1.ts") ts_filter_2, params_2 = self.build_ts_filter(from_date, to_date, days_back, ts_col="fe2.ts") ts_filter = "({}) and ({})".format(ts_filter_1, ts_filter_2) params = params_1 + params_2 sql = """ select fe1.src_user_name as user_name_1, fe1.src_user_id as user_id_1, fe2.src_user_name as user_name_2, fe2.src_user_id as user_id_2, fe1.ts as user_1_event_ts, fe2.ts as user_2_event_ts, fe1.follow_type_id as follow_type_id, abs(timestampdiff(day, fe1.ts, fe2.ts)) as day_diff from follow_events fe1 join follow_events fe2 on fe1.dst_user_id = fe2.src_user_id and fe1.src_user_id = fe2.dst_user_id and fe1.follow_type_id = fe2.follow_type_id where {ts_filter} and abs(timestampdiff(day, fe1.ts, fe2.ts)) < ? """.format(ts_filter=ts_filter) params.append(mutual_event_timeframe_days) mutual_events_records = mysql.query(sql, params) events = set() for row in mutual_events_records: mutual_event = MutualFollowEvent( UserEvent(InstaUser(row.user_id_1, row.user_name_1), row.user_1_event_ts, row.follow_type_id), UserEvent(InstaUser(row.user_id_2, row.user_name_2), row.user_2_event_ts, row.follow_type_id)) events.add(mutual_event) return events
def get_new_media(self, mysql: MySQLHelper, from_date: Optional[datetime], to_date: Optional[datetime], days_back: Optional[int]): assert from_date is not None or to_date is not None or days_back is not None ts_filter, ts_params = self.build_ts_filter(from_date, to_date, days_back, ts_col="taken_at_ts") query = """ select * from media where {ts_filter} order by scrape_ts desc, taken_at_ts asc """.format(ts_filter=ts_filter) records = mysql.query(query, ts_params) media_records = [MediaRecord.from_row(record) for record in records] return media_records
def get_users(self, group_name: str, mysql: MySQLHelper, limit: Optional[int] = None) -> List[InstaUser]: """Gets users to scrape it's media objects. Ordered by ascending last_scrape_ts So it will start parsing users we haven't scraped lately """ self.logger.debug("Getting users for group %s", group_name) query = self.GET_USERS_QUERY if limit is not None: query += " limit {}".format(limit) params = [group_name] res = mysql.query(query, params) users = [InstaUser(row.user_id, row.user_name) for row in res] self.logger.debug("Done querying users") return users
def get_current_follows( self, mysql: MySQLHelper, user: str, cursor: Optional[Cursor] = None) -> Optional[UserFollows]: res = mysql.query( "select * from {0} where src_user_name = ?".format( self.FOLLOWS_TABLE), [user], cursor) followers = set() follows = set() if len(res) == 0: return None for r in res: if r.dst_follows: followers.add(InstaUser(r.dst_user_id, r.dst_user_name)) if r.src_follows: follows.add(InstaUser(r.dst_user_id, r.dst_user_name)) return UserFollows( InstaUser(res[0].src_user_id, res[0].src_user_name, res[0].src_user_name), followers, follows)