def main(self):
     mysql = MySQLHelper('mysql-insta-local')
     scraper = StoryViewersScraper()
     scrape_result = scraper.scrape_viewers()
     self.save_results(scrape_result, mysql)
     mysql.commit()
     mysql.close()
Exemplo n.º 2
0
    def main(self, user: InstaUser, scraper: Optional[MediaScraper] = None, scrape_likers: bool = False,
             scrape_comments: bool = False, likers_scrape_threshold: Optional[int] = None,
             media_max_likers_amount: Optional[int] = None, max_media_limit: Optional[int] = None):
        scraper = scraper or MediaScraper()

        if not user.from_full_profile:
            user = scraper.scrape_user(user.username)

        if user.is_private and not user.followed_by_viewer:
            self.logger.warning("user is private and not followed by viewer. skipping scraping...")
        else:
            mysql = MySQLHelper('mysql-insta-local')
            scrape_result = scraper.scrape_media(user, scrape_likers, scrape_comments, likers_scrape_threshold,
                                                 media_max_likers_amount, max_media_limit)
            self.save_results(scrape_result, mysql)
            mysql.commit()
            mysql.close()
Exemplo n.º 3
0
    def main():
        with open('/opt/InstaProfiler/logs/user-follows.log') as fp:
            txt = fp.read()
        matches = LOG_RECORD_REGEX.findall(txt)
        all_queries = []
        unfollow_users_distinct = set()
        for match in matches:
            if 'UPDATE follows' in match[5]:
                unfollow_params = UNFOLLOW_PARAMS_REGEX.search(match[5])
                ts_params = [
                    unfollow_params.group('year'),
                    unfollow_params.group('month'),
                    unfollow_params.group('day'),
                    unfollow_params.group('hour'),
                    unfollow_params.group('minute'),
                    unfollow_params.group('second'),
                    unfollow_params.group('frac')
                ]
                unfollow_ts = datetime(*[int(x) for x in ts_params])
                src_user = unfollow_params.group('src_user')
                unfollow_users = UNFOLLOW_USERS_REGEX.search(
                    match[5]).group('users').split(', ')
                if len(unfollow_users) < 8:
                    for u in unfollow_users:
                        unfollow_users_distinct.add(u.strip("'"))
                    query = "UPDATE follows set dst_follows=0, dst_unfollows_latest_timestamp=? where src_user_name=? and dst_user_id in ({users})".format(
                        users=','.join(unfollow_users))
                    params = (unfollow_ts, src_user)
                    print(query)
                    all_queries.append((query, params))
                else:
                    print("Too much users", len(unfollow_users))

        print(','.join(unfollow_users_distinct))
        odbc_helper = MySQLHelper('mysql-insta-local')
        cursor = odbc_helper.get_cursor()
        for query in all_queries:
            odbc_helper.execute(query[0], query[1], cursor)
        odbc_helper.commit()
        odbc_helper.close()
Exemplo n.º 4
0
    def main(self,
             user: Union[InstaUser, str] = DEFAULT_USER_NAME,
             only_mutual: bool = False,
             scrape_follows: bool = True,
             scrape_followers: bool = True,
             max_follow_amount: Optional[int] = None,
             scraper: Optional[UserFollowsScraper] = None):
        """
        :param user: User to parse its follows.
        :param only_mutual: If set to True, will save only people that are both followers and follows.
                            Useful when src has many followers. This will make sure only "relevant" people are saved
        :param scrape_follows: If given, will only scrape user's follow
        :param scrape_followers: If given, will only scrape user's followers
        :param max_follow_amount: If given, will only scrape follows (follows/followers apart) if amount is
                                  under max_follow_amount
        :return:
        """
        scraper = scraper or UserFollowsScraper()
        if isinstance(user, str):
            user = scraper.scrape_user(user)

        if user is None:
            raise UserDoesNotExist()
        if scrape_followers and max_follow_amount is not None and user.followed_by_amount > max_follow_amount:
            self.logger.warning(
                "user is followed by too many people (followed by %d, max %d), skipping followers...",
                user.followed_by_amount, max_follow_amount)
            scrape_followers = False

        if scrape_follows and max_follow_amount is not None and user.follows_amount > max_follow_amount:
            self.logger.warning(
                "user follows too many people (follows %d, max %d), skipping follows...",
                user.follows_amount, max_follow_amount)
            scrape_follows = False

        mysql = MySQLHelper('mysql-insta-local')
        cursor = mysql.get_cursor()

        if user.is_private and not user.followed_by_viewer:
            self.logger.warning(
                "user is private and not followed by viewer. skipping scraping..."
            )
            scrape_ts = datetime.now()
        else:
            follow_scrape = scraper.parse_user_follows([user], scrape_follows,
                                                       scrape_followers)
            follows, scrape_id, scrape_ts = follow_scrape.follows, follow_scrape.scrape_id, follow_scrape.scrape_ts

            current_follows = self.get_current_follows(mysql, user.username,
                                                       cursor)
            analyzed, users = UserFollowsAnalyzer.analyze_follows(
                follows, only_mutual)

            # Update unfollowers
            if scrape_follows:
                # No followers are scraped so they could all be considered as unfollowed
                dst_has_unfollowed = set(
                ) if current_follows is None else current_follows.followers.difference(
                    follows[0].followers)
                if len(dst_has_unfollowed) > 0:
                    self.update_agg_dst_unfollowers(mysql, cursor,
                                                    dst_has_unfollowed,
                                                    user.username, scrape_ts)

            src_has_unfollowed = set(
            ) if current_follows is None else current_follows.follows.difference(
                follows[0].follows)
            if len(src_has_unfollowed):
                self.handle_unfollowers(mysql, cursor, src_has_unfollowed,
                                        user, scrape_ts)

            # Insert new records
            if sum(len(x.follows) for x in follows) > 0:
                self.update_agg_followers(mysql, cursor, follows, analyzed,
                                          users, src_has_unfollowed, scrape_ts)
                new_follows = follows[
                    0].follows if current_follows is None else follows[
                        0].follows.difference(current_follows.follows)
                if len(new_follows) > 0:
                    self.insert_raw_followers(mysql, cursor, new_follows, user,
                                              scrape_ts)

        # Update user info
        self.persist_user(mysql, cursor, user, scrape_ts)

        mysql.commit()
        cursor.close()
        mysql.close()
        self.logger.info("Done UserFollowsScraper main")