session.set_quota_supervisor(enabled=True,
                                 sleep_after=['server_calls'],
                                 sleepyhead=True,
                                 stochastic_flow=True,
                                 notify_me=True,
                                 peak_server_calls_hourly=150,
                                 peak_server_calls_daily=3000)
    # session.set_quota_supervisor(enabled=True, sleep_after=['server_calls'], sleepyhead=True, stochastic_flow=True, notify_me=True,
    #                       peak_server_calls_daily=152 )

    print('[INFO]: Logging in..')
    try:
        with smart_run(session):
            #get follower, following count

            followers_count, following_count = get_relationship_counts(
                session.browser, targetAccount, session.logger)

            amount = getPercentageCount(int(followers_count), percentage)
            print('[INFO] Going to extract ' + str(amount) +
                  ' followers out of ' + str(followers_count))
            followers_list = session.grab_followers(username=targetAccount,
                                                    amount=amount,
                                                    live_match=False,
                                                    store_locally=True)
            print('[INFO]: Followers grabbed successfully. Saving to file..')
            result = ''
            for el in followers_list:
                result = result + el + '\n'
            with open("./" + str(targetAccount) + "_followers_new2.txt",
                      "w") as text_file:
                text_file.write(result)
Exemplo n.º 2
0
def OnePass(username, password, username1, amountToQuickscan, amountToLookup,
            iCnt, iMax):

    # start the scan

    session = InstaPy(username=username, password=password)
    session.login()
    prepare_my_database(session.logger)

    randomize = False
    media = MEDIA_PHOTO
    taggedImages = True

    links = []
    try:

        #   build list of links to ignore from the data base:  those recently queried (less than 20 hours old) and those posted more than 72 hours ago.
        ignore_links = getFreshTaggedLinks(username1)
        ignore_links = list(dict.fromkeys(ignore_links))

        fresh_links = get_links_for_username(session.browser, session.username,
                                             username1, amountToQuickscan,
                                             session.logger, session.logfolder,
                                             randomize, media, taggedImages)
        links = list(set(fresh_links) - set(ignore_links))
        session.logger.info("Links quickscanned: {}".format(len(fresh_links)))
        session.logger.info("Links to ignore: {}".format(len(ignore_links)))

        links = links[:amountToLookup] if len(
            links) > amountToLookup else links
        session.logger.info("Links to lookup: {}".format(len(links)))

    except NoSuchElementException:
        session.logger.error("Element not found, skipping this username")

    i = 0
    # for each link in the list, follow it and grab lots of statistics.  This consumes instagram resources.
    # then follow the posting username and get their statistics.  This consumes instragram resources.

    sleep(1.2)

    for i, posted_link in enumerate(links):

        sleep(3.0)
        #        session.logger.info("Processing link {}:{}".format(i+1,len(links)))
        session.logger.info(
            "Processing link:{}   {}/{}:{}/{}  totalLinks: {}/{}".format(
                posted_link, iCnt, iMax, username, username1, i + 1,
                len(links)))

        try:
            try:
                inappropriate, posted_by_username, posted_link_likes_count, posted_link_comments_count, posted_link_datetime_str, posted_link_location_name, posted_link_image_text, is_video, reason, scope = check_link2(
                    session.browser,
                    posted_link,
                    session.dont_like,
                    session.mandatory_words,
                    session.mandatory_language,
                    session.is_mandatory_character,
                    session.mandatory_character,
                    session.check_character_set,
                    session.ignore_if_contains,
                    session.logger,
                )
            except:
                inappropriate = True

            if not inappropriate:

                sleep(1.5)
                try:
                    posted_by_followers_count, posted_by_following_count = get_relationship_counts(
                        session.browser, posted_by_username, session.logger)
                    #                    posted_by_posts_count = get_number_of_posts(session.browser)
                    posted_by_posts_count = None

                except:
                    # if you can't get follower count, then you're probably out of daily page pulls
                    # Might as well quit!
                    session.logger.warning(
                        "Might be out of daily page pulls.  Closing session.")
                    posted_by_posts_count = None
                    posted_by_followers_count = None
                    posted_by_following_count = None
                    session.end()
                    return False

                session.logger.info(
                    "-Posted on: {}  by: {}  posts: {}  followers: {}  following: {}"
                    .format(posted_link_datetime_str, posted_by_username,
                            posted_by_posts_count, posted_by_followers_count,
                            posted_by_following_count))

                now = datetime.now(timezone.utc)
                last_checked_datetime_str = now.strftime(
                    "%Y-%m-%dT%H:%M:%S.%fZ")

                posted_link_datetime = datetime.strptime(
                    posted_link_datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
                last_checked_datetime = datetime.strptime(
                    last_checked_datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")

                deltatime = (last_checked_datetime - posted_link_datetime)
                last_checked_age_in_hrs = deltatime.seconds / 3600.00

                storeRecord(username1, last_checked_datetime, posted_link,
                            posted_link_datetime, posted_link_location_name,
                            posted_link_likes_count,
                            posted_link_comments_count, posted_by_username,
                            posted_by_posts_count, posted_by_followers_count,
                            posted_by_following_count)

                storeTaggedActivitytoDB(
                    session.logger, username1, last_checked_datetime,
                    posted_link, posted_link_datetime,
                    posted_link_location_name, posted_link_likes_count,
                    posted_link_comments_count, posted_by_username,
                    posted_by_posts_count, posted_by_followers_count,
                    posted_by_following_count)

                sleep(1.0)
        except NoSuchElementException as err:
            session.logger.error("Invalid Page: {}".format(err))

    session.end()
    return True
Exemplo n.º 3
0
def nf_validate_user_call(self,
                          username: str,
                          post_link: str = None) -> Tuple[bool, str]:
    """Checks if user can be liked according to declared settings

   Also stores post data in database if appropriate

   :returns: valid, reason
   """
    followers_count = None
    following_count = None
    number_of_posts = None
    if username == self.username:
        reason = "---> Username '{}' is yours!\t~skipping user\n".format(
            self.username)
        return False, reason

    if username in self.ignore_users:
        reason = ("---> '{}' is in the `ignore_users` list\t~skipping "
                  "user\n".format(username))
        return False, reason

    blacklist_file = "{}blacklist.csv".format(self.logfolder)
    blacklist_file_exists = os.path.isfile(blacklist_file)
    if blacklist_file_exists:
        with open("{}blacklist.csv".format(self.logfolder), "rt") as f:
            reader = csv.reader(f, delimiter=",")
            for row in reader:
                for field in row:
                    if field == username:
                        return (
                            False,
                            "---> {} is in blacklist  ~skipping "
                            "user\n".format(username),
                        )

    potency_ratio = self.potency_ratio
    delimit_by_numbers = self.delimit_by_numbers
    max_followers = self.max_followers
    max_following = self.max_following
    min_followers = self.min_followers
    min_following = self.min_following
    min_posts = self.min_posts
    max_posts = self.max_posts
    skip_private = self.skip_private
    skip_private_percentage = self.skip_private_percentage
    skip_no_profile_pic = self.skip_no_profile_pic
    skip_no_profile_pic_percentage = self.skip_no_profile_pic_percentage
    skip_business = self.skip_business
    skip_non_business = self.skip_non_business
    skip_business_percentage = self.skip_business_percentage
    skip_business_categories = self.skip_business_categories
    dont_skip_business_categories = self.dont_skip_business_categories
    skip_bio_keyword = self.skip_bio_keyword

    if not any([
            potency_ratio, delimit_by_numbers, max_followers, max_following,
            min_followers, min_following, min_posts, max_posts, skip_private,
            skip_private_percentage, skip_no_profile_pic,
            skip_no_profile_pic_percentage, skip_business, skip_non_business,
            skip_business_percentage, skip_business_categories,
            skip_bio_keyword
    ]):
        # Nothing to check, skip going to user page and then back for nothing
        return True, "Valid user"

    try:
        if post_link:
            nf_go_from_post_to_profile(self, username)
        else:
            post_link = get_current_url(self.browser)
        self.logger.info("about to start checking user page")
        # Checks the potential of target user by relationship status in order
        # to delimit actions within the desired boundary
        if (potency_ratio
                or delimit_by_numbers and (max_followers or max_following
                                           or min_followers or min_following)):

            relationship_ratio = None
            reverse_relationship = False

            # get followers & following counts
            self.logger.info("About to get relationship counts")
            followers_count, following_count = get_relationship_counts(
                self.browser, username, self.logger)

            if potency_ratio and potency_ratio < 0:
                potency_ratio *= -1
                reverse_relationship = True

            # division by zero is bad
            followers_count = 1 if followers_count == 0 else followers_count
            following_count = 1 if following_count == 0 else following_count

            if followers_count and following_count:
                relationship_ratio = (float(followers_count) /
                                      float(following_count)
                                      if not reverse_relationship else
                                      float(following_count) /
                                      float(followers_count))

            self.logger.info(
                "User: '******'  |> followers: {}  |> following: {}  |> relationship "
                "ratio: {}".format(
                    username,
                    followers_count if followers_count else "unknown",
                    following_count if following_count else "unknown",
                    truncate_float(relationship_ratio, 2)
                    if relationship_ratio else "unknown",
                ))

            if followers_count or following_count:
                if potency_ratio and not delimit_by_numbers:
                    if relationship_ratio and relationship_ratio < potency_ratio:
                        reason = (
                            "'{}' is not a {} with the relationship ratio of {}  "
                            "~skipping user\n".format(
                                username,
                                "potential user" if not reverse_relationship
                                else "massive follower",
                                truncate_float(relationship_ratio, 2),
                            ))
                        return False, reason

                elif self.delimit_by_numbers:
                    if followers_count:
                        if max_followers:
                            if followers_count > max_followers:
                                reason = (
                                    "User '{}'s followers count exceeds maximum "
                                    "limit  ~skipping user\n".format(username))
                                return False, reason

                        if min_followers:
                            if followers_count < min_followers:
                                reason = (
                                    "User '{}'s followers count is less than "
                                    "minimum limit  ~skipping user\n".format(
                                        username))
                                return False, reason

                    if following_count:
                        if max_following:
                            if following_count > max_following:
                                reason = (
                                    "User '{}'s following count exceeds maximum "
                                    "limit  ~skipping user\n".format(username))
                                return False, reason

                        if min_following:
                            if following_count < min_following:
                                reason = (
                                    "User '{}'s following count is less than "
                                    "minimum limit  ~skipping user\n".format(
                                        username))
                                return False, reason

                    if potency_ratio:
                        if relationship_ratio and relationship_ratio < potency_ratio:
                            reason = (
                                "'{}' is not a {} with the relationship ratio of "
                                "{}  ~skipping user\n".format(
                                    username,
                                    "potential user"
                                    if not reverse_relationship else "massive "
                                    "follower",
                                    truncate_float(relationship_ratio, 2),
                                ))
                            return False, reason

        if min_posts or max_posts:
            # if you are interested in relationship number of posts boundaries
            try:
                number_of_posts = getUserData(
                    "graphql.user.edge_owner_to_timeline_media.count",
                    self.browser)
            except WebDriverException:
                self.logger.error("~cannot get number of posts for username")
                reason = "---> Sorry, couldn't check for number of posts of " "username\n"
                return False, reason

            if max_posts:
                if number_of_posts > max_posts:
                    reason = (
                        "Number of posts ({}) of '{}' exceeds the maximum limit "
                        "given {}\n".format(number_of_posts, username,
                                            max_posts))
                    return False, reason
            if min_posts:
                if number_of_posts < min_posts:
                    reason = (
                        "Number of posts ({}) of '{}' is less than the minimum "
                        "limit given {}\n".format(number_of_posts, username,
                                                  min_posts))
                    return False, reason

        # Skip users

        # skip private
        if skip_private:
            try:
                self.browser.find_element_by_xpath(
                    "//*[contains(text(), 'This Account is Private')]")
                is_private = True
            except NoSuchElementException:
                is_private = False
            if is_private and (random.randint(0, 100) <=
                               skip_private_percentage):
                return False, "{} is private account, by default skip\n".format(
                    username)

        # skip no profile pic
        if skip_no_profile_pic:
            try:
                profile_pic = getUserData("graphql.user.profile_pic_url",
                                          self.browser)
            except WebDriverException:
                self.logger.error("~cannot get the post profile pic url")
                return False, "---> Sorry, couldn't get if user profile pic url\n"
            if (profile_pic in default_profile_pic_instagram
                    or str(profile_pic).find(
                        "11906329_960233084022564_1448528159_a.jpg") > 0) and (
                            random.randint(
                                0, 100) <= skip_no_profile_pic_percentage):
                return False, "{} has default instagram profile picture\n".format(
                    username)

        # skip business
        if skip_business or skip_non_business:
            # if is business account skip under conditions
            try:
                is_business_account = getUserData(
                    "graphql.user.is_business_account", self.browser)
            except WebDriverException:
                self.logger.error(
                    "~cannot get if user has business account active")
                return (
                    False,
                    "---> Sorry, couldn't get if user has business "
                    "account active\n",
                )

            if skip_non_business and not is_business_account:
                return (
                    False,
                    "---> Skipping non business because skip_non_business set to True",
                )

            if is_business_account:
                try:
                    category = getUserData(
                        "graphql.user.business_category_name", self.browser)
                except WebDriverException:
                    self.logger.error("~cannot get category name for user")
                    return False, "---> Sorry, couldn't get category name for " "user\n"

                if len(skip_business_categories) == 0:
                    # skip if not in dont_include
                    if category not in dont_skip_business_categories:
                        if len(dont_skip_business_categories) == 0 and (
                                random.randint(
                                    0, 100) <= skip_business_percentage):
                            return False, "'{}' has a business account\n".format(
                                username)
                        else:
                            return (
                                False,
                                ("'{}' has a business account in the "
                                 "undesired category of '{}'\n".format(
                                     username, category)),
                            )
                else:
                    if category in skip_business_categories:
                        return (
                            False,
                            ("'{}' has a business account in the "
                             "undesired category of '{}'\n".format(
                                 username, category)),
                        )

        if len(skip_bio_keyword) != 0:
            # if contain stop words then skip
            try:
                profile_bio = getUserData("graphql.user.biography",
                                          self.browser)
            except WebDriverException:
                self.logger.error("~cannot get user bio")
                return False, "---> Sorry, couldn't get get user bio " "account active\n"
            for bio_keyword in skip_bio_keyword:
                if bio_keyword.lower() in profile_bio.lower():
                    return (
                        False,
                        "{} has a bio keyword of {}, by default skip\n".format(
                            username, bio_keyword),
                    )

        # if everything is ok
        return True, "Valid user"

    except NoSuchElementException:
        return False, "Unable to locate element"
    finally:
        if self.store_in_database:
            try:
                user = db_get_or_create_user(self, username)
                self.db.session.add(user)
                user.date_checked = datetime.now()
                if followers_count:
                    user.followers_count = followers_count
                if following_count:
                    user.following_count = following_count
                if number_of_posts:
                    user.posts_count = number_of_posts
            except SQLAlchemyError:
                self.db.session.rollback()
            finally:
                self.db.session.commit()

        if post_link:
            nf_find_and_press_back(self, post_link)
Exemplo n.º 4
0
def gatherAndStoreLinkData(session, username1, posted_link):
    "Given a posted_link, gather it's info and update the database"
    #    session.logger.info("Processing link: {}".format(posted_link))
    try:
        inappropriate = True
        sleep(1.0)
        try:
            inappropriate, posted_by_username, posted_link_likes_count, posted_link_comments_count, posted_link_datetime_str, posted_link_location_name, posted_link_image_text, is_video, reason, scope = check_link2(
                session.browser,
                posted_link,
                session.dont_like,
                session.mandatory_words,
                session.mandatory_language,
                session.is_mandatory_character,
                session.mandatory_character,
                session.check_character_set,
                session.ignore_if_contains,
                session.logger,
            )
            session.logger.info("-Posted on: {}  by: {} ".format(
                posted_link_datetime_str, posted_by_username))

        except:
            inappropriate = True

        updateSessionActivityToDB(session)

        if not inappropriate:

            sleep(1.0)
            try:
                posted_by_followers_count, posted_by_following_count = get_relationship_counts(
                    session.browser, posted_by_username, session.logger)
                #                    posted_by_posts_count = get_number_of_posts(session.browser)
                posted_by_posts_count = None

            except:
                session.logger.warning(
                    "Might be out of daily page pulls.  Closing session.")
                posted_by_posts_count = None
                posted_by_followers_count = None
                posted_by_following_count = None
                return False

            session.logger.info(
                "-Posts: {}  followers: {}  following: {}".format(
                    posted_by_posts_count, posted_by_followers_count,
                    posted_by_following_count))

            now = datetime.now(timezone.utc)
            last_checked_datetime_str = now.strftime("%Y-%m-%dT%H:%M:%S.%fZ")

            posted_link_datetime = datetime.strptime(posted_link_datetime_str,
                                                     "%Y-%m-%dT%H:%M:%S.%fZ")
            last_checked_datetime = datetime.strptime(
                last_checked_datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")

            deltatime = (last_checked_datetime - posted_link_datetime)
            #            last_checked_age_in_hrs = deltatime.seconds / 3600.00

            storeRecord(username1, last_checked_datetime, posted_link,
                        posted_link_datetime, posted_link_location_name,
                        posted_link_likes_count, posted_link_comments_count,
                        posted_by_username, posted_by_posts_count,
                        posted_by_followers_count, posted_by_following_count)

            storeTaggedActivitytoDB(
                session.logger, username1, last_checked_datetime, posted_link,
                posted_link_datetime, posted_link_location_name,
                posted_link_likes_count, posted_link_comments_count,
                posted_by_username, posted_by_posts_count,
                posted_by_followers_count, posted_by_following_count)

        if reason == "Unavailable Page":
            # delete link from data base
            deleteLinkFromDB(session, username1, posted_link)

        sleep(1.0)

    except NoSuchElementException as err:
        session.logger.error("Invalid Page: {}".format(err))

    return True
Exemplo n.º 5
0
def delta_followers(session, user_name, max_amount, past_followers=None):
    """
    Given an instagram username and an optional list of past_followers, retrieves the list of new followers.
    :param session:
    :param user_name:
    :param max_amount:
    :param old_followers:
    :return:
    """
    if past_followers is None:
        past_followers = []

    session.quotient_breach = False

    try:

        user_name = user_name.strip()

        user_link = "https://www.instagram.com/{}/".format(user_name)
        web_address_navigator(session.browser, user_link)

        if not is_page_available(browser=session.browser,
                                 logger=session.logger):
            return []

        # check how many people are following this user.
        allfollowers, allfollowing = get_relationship_counts(
            browser=session.browser, username=user_name, logger=session.logger)
        # print(allfollowers)
        # print(allfollowing)

        # skip early for no followers
        if not allfollowers:
            session.logger.info("'{}' has no followers".format(user_name))
            return []

        elif allfollowers < max_amount:
            session.logger.warning(
                "'{}' has less followers- {}, than the given amount of {}".
                format(user_name, allfollowers, max_amount))

        # locate element to user's followers
        try:
            followers_link = session.browser.find_elements_by_xpath(
                '//a[@href="/{}/followers/"]'.format(user_name))
            if len(followers_link) > 0:
                click_element(session.browser, followers_link[0])
            else:
                session.logger.error("'{} is private'".format(user_name))
                return []
        except NoSuchElementException:
            session.logger.error(
                'Could not find followers\' link for {}'.format(user_name))
            return []

        except BaseException as e:
            session.logger.error("`followers_link` error {}".format(str(e)))
            return []

        person_list, _ = get_users_through_dialog(
            browser=session.browser,
            login=session.username,
            user_name=user_name,
            amount=max_amount,
            users_count=allfollowers,
            randomize=False,
            dont_include=[],
            blacklist=session.blacklist,
            follow_times=session.follow_times,
            simulation={
                "enabled": False,
                "percentage": 100
            },
            channel="Follow",
            jumps=session.jumps,
            logger=session.logger,
            logfolder=session.logfolder,
            past_followers=past_followers,
            wait_seconds=10,
        )

    except (TypeError, RuntimeWarning) as err:
        session.logger.error('Sorry, an error occurred: {}'.format(err))
        session.aborting = True
        return []

    return person_list