def find_trending_event_type(
            self,
            mysql: MySQLHelper,
            from_date: Optional[datetime],
            to_date: Optional[datetime],
            days_back: Optional[int] = None) -> List[TrendingFollowEvent]:
        """Find users that have both followed each other or unfollowed each other

        :param from_date: from_date to query from
        :param to_date: to_date to query
        :param days_back: If given, ignore from_date/to_date
        """
        ts_filter, params = self.build_ts_filter(from_date, to_date, days_back)
        sql = """
            select dst_user_name,
                   follow_type_id,
                   min(ts)                     as first_ts,
                   max(ts)                     as last_ts,
                   count(*)                       cnt,
                   group_concat(src_user_name) as users
            from follow_events
            where {ts_filter}
            group by dst_user_name, follow_type_id
            having count(*) > 1
            order by cnt desc;
        """.format(ts_filter=ts_filter)

        trending_events_records = mysql.query(sql, params)
        events = [
            TrendingFollowEvent(row.users.split(','), row.dst_user_name,
                                row.first_ts, row.last_ts, row.follow_type_id,
                                row.cnt) for row in trending_events_records
        ]
        return events
    def find_mutual_event_type(
            self,
            mysql: MySQLHelper,
            from_date: Optional[datetime],
            to_date: Optional[datetime],
            mutual_event_timeframe_days: int,
            days_back: Optional[int] = None) -> Set[MutualFollowEvent]:
        """Find users that have both followed each other or unfollowed each other

        :param from_date: from_date to query from
        :param to_date: to_date to query
        :param days_back: If given, ignore from_date/to_date
        :param mutual_event_timeframe_days: Maximum amount of days for it to be considered a mutual event type
                                            For example, mutual unfollow is only if they have both unfollowed each other
                                            in the past 2 days.
        """
        ts_filter_1, params_1 = self.build_ts_filter(from_date,
                                                     to_date,
                                                     days_back,
                                                     ts_col="fe1.ts")
        ts_filter_2, params_2 = self.build_ts_filter(from_date,
                                                     to_date,
                                                     days_back,
                                                     ts_col="fe2.ts")
        ts_filter = "({}) and ({})".format(ts_filter_1, ts_filter_2)
        params = params_1 + params_2
        sql = """
                select fe1.src_user_name as user_name_1,
                       fe1.src_user_id as user_id_1,
                       fe2.src_user_name as user_name_2,
                       fe2.src_user_id as user_id_2,
                       fe1.ts as user_1_event_ts,
                       fe2.ts as user_2_event_ts,
                       fe1.follow_type_id as follow_type_id,
                       abs(timestampdiff(day, fe1.ts, fe2.ts)) as day_diff
                from follow_events fe1
                         join follow_events fe2 on fe1.dst_user_id = fe2.src_user_id
                    and fe1.src_user_id = fe2.dst_user_id and fe1.follow_type_id = fe2.follow_type_id
                where {ts_filter} and abs(timestampdiff(day, fe1.ts, fe2.ts)) < ?
        """.format(ts_filter=ts_filter)

        params.append(mutual_event_timeframe_days)
        mutual_events_records = mysql.query(sql, params)
        events = set()
        for row in mutual_events_records:
            mutual_event = MutualFollowEvent(
                UserEvent(InstaUser(row.user_id_1, row.user_name_1),
                          row.user_1_event_ts, row.follow_type_id),
                UserEvent(InstaUser(row.user_id_2, row.user_name_2),
                          row.user_2_event_ts, row.follow_type_id))
            events.add(mutual_event)
        return events
Пример #3
0
 def get_new_media(self, mysql: MySQLHelper, from_date: Optional[datetime], to_date: Optional[datetime],
                   days_back: Optional[int]):
     assert from_date is not None or to_date is not None or days_back is not None
     ts_filter, ts_params = self.build_ts_filter(from_date, to_date, days_back, ts_col="taken_at_ts")
     query = """
     select *
     from media
     where {ts_filter}
     order by scrape_ts desc, taken_at_ts asc
     """.format(ts_filter=ts_filter)
     records = mysql.query(query, ts_params)
     media_records = [MediaRecord.from_row(record) for record in records]
     return media_records
 def get_users(self,
               group_name: str,
               mysql: MySQLHelper,
               limit: Optional[int] = None) -> List[InstaUser]:
     """Gets users to scrape it's media objects. Ordered by ascending last_scrape_ts
     So it will start parsing users we haven't scraped lately
     """
     self.logger.debug("Getting users for group %s", group_name)
     query = self.GET_USERS_QUERY
     if limit is not None:
         query += " limit {}".format(limit)
     params = [group_name]
     res = mysql.query(query, params)
     users = [InstaUser(row.user_id, row.user_name) for row in res]
     self.logger.debug("Done querying users")
     return users
Пример #5
0
 def get_current_follows(
         self,
         mysql: MySQLHelper,
         user: str,
         cursor: Optional[Cursor] = None) -> Optional[UserFollows]:
     res = mysql.query(
         "select * from {0} where src_user_name = ?".format(
             self.FOLLOWS_TABLE), [user], cursor)
     followers = set()
     follows = set()
     if len(res) == 0:
         return None
     for r in res:
         if r.dst_follows:
             followers.add(InstaUser(r.dst_user_id, r.dst_user_name))
         if r.src_follows:
             follows.add(InstaUser(r.dst_user_id, r.dst_user_name))
     return UserFollows(
         InstaUser(res[0].src_user_id, res[0].src_user_name,
                   res[0].src_user_name), followers, follows)