Exemplo n.º 1
0
def run_selenium(driver: WebDriver, user, passwd, id):
    # tor_process = setupTor(id)
    # setup_proxy(driver, "127.0.0.1", int(id) * 2 + 9050 + 1)
    driver.get(address)
    print(f"{user}: start get /shell")
    try:
        while True:
            try:
                WebDriverWait(driver, 180).until(
                    EC.presence_of_element_located(
                        (By.CLASS_NAME, "login-form__realm-user-id-row")))
                user_form = driver.find_element_by_class_name(
                    "login-form__realm-user-id-row")
                user_form.find_element_by_id("userid").send_keys(user)
                user_form.find_element_by_tag_name("button").click()
                print(f"{user}: send username")
                try:
                    WebDriverWait(driver, 30).until(
                        EC.presence_of_element_located(
                            (By.CLASS_NAME, "error-header")))
                    print(
                        f'{user}: err={driver.find_element_by_class_name("error-header").get_attribute("innerHTML")}'
                    )
                    raise WebDriverException(f"{user} tor ip invalidate")
                except TimeoutException:
                    print(f"{user}: pass ip validate,start clear proxy")
                    # tor_process.terminate()
                    # driver.execute_script("window.open('')")
                    # default_handle = driver.current_window_handle
                    # handles = list(driver.window_handles)
                    # handles.remove(default_handle)
                    # driver.switch_to.window(handles[0])
                    # setup_proxy(driver, "", 0)
                    # driver.close()
                    # driver.switch_to.window(default_handle)
                break
            except (ElementClickInterceptedException,
                    ElementNotInteractableException):
                check_dialog(driver, user)
                continue
        while True:
            try:
                WebDriverWait(driver, 180).until(
                    EC.presence_of_element_located(
                        (By.CSS_SELECTOR,
                         "[class='login-form__password-row ']")))
                passwd_form = driver.find_element_by_class_name(
                    "login-form__password-row ")
                passwd_input = passwd_form.find_element_by_id("password")
                passwd_input.clear()
                passwd_input.send_keys(passwd)
                print(user + ": send passwd")
                driver.find_elements_by_css_selector(
                    '[class="login-form__button bx--btn bx--btn--primary"]'
                )[1].click()
                print(user + ": click login")
                break
            except (ElementClickInterceptedException,
                    ElementNotInteractableException) as e:
                print(f'{user}:do login failed,msg= {e.msg}')
                check_dialog(driver, user)
                continue
        s_time = time.time()
        while True:
            if address in driver.current_url:
                print(f"{user} enter shell success")
                break
            else:
                c_time = time.time()
                if c_time - s_time > 180:
                    print(
                        f"{user}: enter shell too long,re enter,curentUrl={driver.current_url}"
                    )
                    raise WebDriverException(
                        f"{user}: userid or passwd error,relogin")
                sleep(2)
    except WebDriverException as e:
        print(f"{user}: login failed msg={e.msg}")
        driver.delete_all_cookies()
        # tor_process.terminate()
        run_selenium(driver, user, passwd, id)

    # change to tokyo
    while True:
        try:
            WebDriverWait(driver, 120).until(
                EC.presence_of_element_located(
                    (By.CLASS_NAME, "bx--header__global")))
            header = driver.find_element_by_class_name("bx--header__global")
            if header.find_element_by_class_name(
                    "header__location-name").text != "Tokyo":
                print(f"{user}: region is not tokyo,begin to switch")
                header.find_element_by_css_selector(
                    "[class='header__location-change-button bx--btn bx--btn--ghost']"
                ).click()
                selects = safelyFindId(driver, "selectRegion")
                selects.click()
                selects.find_element_by_css_selector(
                    "[value='jp-tok']").click()
                safelyFindCSS(driver,
                              "[class='bx--btn bx--btn--primary']").click()
            break
        except (ElementClickInterceptedException,
                ElementNotInteractableException):
            check_dialog(driver, user)
            continue
        except WebDriverException:
            print(user + ": enter shell too long,refresh page")
            driver.refresh()
    return switch_to_frame_execute(driver,
                                   str(user).split("@")[0].replace('.', "_"),
                                   id)
Exemplo n.º 2
0
class IGAnalytics:
    def __init__(self, username, password, actionstring, inspoaccounts,
                 inspohashtags):
        self.username = username
        self.password = password
        self.actionString = str(actionstring)
        self.inspoAccounts = (str(inspoaccounts).split('|', 30))
        self.inspoHashtags = (str(inspohashtags).split('|', 30))
        self.hashtags = []
        chrome_options2 = Options()
        chrome_options2.add_experimental_option("mobileEmulation",
                                                {"deviceName": "Galaxy S5"})
        self.driver = WebDriver(
            executable_path="venv/Lib/chromedriver_win32/chromedriver.exe",
            chrome_options=chrome_options2)
        self.driver.set_window_size(331, 731)
        self.driver.delete_all_cookies()

    # region UTILITY METHODS

    def login(self):
        driver = self.driver
        driver.get(
            "https://www.instagram.com/accounts/login/?source=auth_switcher")
        time.sleep(random.randint(1, 2))
        username_element = driver.find_element_by_xpath(
            "//input[@name='username']")
        username_element.clear()
        username_element.send_keys(self.username)
        time.sleep(random.randint(1, 3))
        password_element = driver.find_element_by_xpath(
            "//input[@name='password']")
        password_element.clear()
        password_element.send_keys(self.password)
        time.sleep(random.randint(2, 3))
        password_element.send_keys(Keys.ENTER)
        time.sleep(random.randint(1, 2))

    def logout(self):
        self.driver.close()

    '''
    def resolve_actions_to_perform(self):
        if self.actionString[0] == 'y':
            self.debug_method()
    '''

    def ensure_folder_system_exists(self):
        if not os.path.exists(
                os.path.join(
                    os.path.dirname(__file__) + '/Photos/', self.username)):
            os.makedirs(
                os.path.join(
                    os.path.dirname(__file__) + '/Photos/', self.username))
        for inspo in self.inspoAccounts:
            if not os.path.exists(
                    os.path.join(
                        os.path.dirname(__file__) + '/Photos/', self.username,
                        ('@' + inspo))):
                os.makedirs(
                    os.path.join(
                        os.path.dirname(__file__) + '/Photos/', self.username,
                        ('@' + inspo)))
        for inspo in self.inspoHashtags:
            if not os.path.exists(
                    os.path.join(
                        os.path.dirname(__file__) + '/Photos/', self.username,
                        ('#' + inspo))):
                os.makedirs(
                    os.path.join(
                        os.path.dirname(__file__) + '/Photos/', self.username,
                        ('#' + inspo)))

    @staticmethod
    def clean_path(path):
        new_path = (str(path)).replace('\\', '/', 10)
        new_path2 = new_path.replace('/', '\\', 10)
        return new_path2

    @staticmethod
    def debug_method(self):
        # incomplete
        a = 1

    # endregion

    def combomash_method(self):
        first_hashtag = input(
            'What hashtag do you want to use as the first reference: ')
        while True:
            what_is_second_ref = input(
                'Do you want to use a location or a second hashtag as your second source '
                '(enter H '
                'for hashtag and L for location): ')
            if (what_is_second_ref == 'h') or (what_is_second_ref == 'h'):
                # second_ref = input('What hashtag do you want to use as the second reference: ')
                break
            elif (what_is_second_ref == 'l') or (what_is_second_ref == 'L'):
                # holder = input('What location do you want to use as the second reference: ')
                # second_ref = self.resolveLocation(holder)
                break
            else:
                print('Please pic chose one of the two options')
        num_to_act = int(
            input('How many pictures from each source do you want to get: '))

        driver = self.driver
        driver.get("https://www.instagram.com/explore/tags/" + first_hashtag +
                   "/")
        time.sleep(random.randint(3, 5))
        pic_hrefs = []
        hrefs_scraped = 0
        all_hrefs = []

        while hrefs_scraped <= num_to_act:
            try:
                driver.execute_script(
                    "window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(random.randint(2, 7))
                all_hrefs = driver.find_elements_by_tag_name('a')
                all_hrefs = [
                    elem.get_attribute('href') for elem in all_hrefs
                    if '.com/p/' in elem.get_attribute('href')
                ]
                [
                    pic_hrefs.append(href) for href in all_hrefs
                    if href not in pic_hrefs
                ]
                hrefs_scraped = pic_hrefs.__len__()
            except Exception:
                # self.error_file.write('Exception thrown when getting hrefs for #' + first_hashtag)
                continue
        first_hrefs = []
        counter = 0
        for h in all_hrefs:
            if counter < num_to_act:
                if not first_hrefs.__contains__(h):
                    first_hrefs.append(h)
                    counter += 1
            else:
                break

        if (what_is_second_ref == 'h') or (what_is_second_ref == 'h'):
            driver.get("https://www.instagram.com/explore/tags/" +
                       first_hashtag + "/")
        # else:
        # driver.get("https://www.instagram.com/explore/locations/" + second_ref + "/")

        time.sleep(random.randint(3, 7))
        pic_hrefs = []
        hrefs_scraped = 0
        while hrefs_scraped <= num_to_act:
            try:
                driver.execute_script(
                    "window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(random.randint(2, 7))
                all_hrefs = driver.find_elements_by_tag_name('a')
                all_hrefs = [
                    elem.get_attribute('href') for elem in all_hrefs
                    if '.com/p/' in elem.get_attribute('href')
                ]
                [
                    pic_hrefs.append(href) for href in all_hrefs
                    if href not in pic_hrefs
                ]
                hrefs_scraped = pic_hrefs.__len__()
            except Exception:
                # self.errorFile.write('Exception thrown when getting hrefs for #' + first_hashtag)
                continue
        second_hrefs = []
        counter = 0
        for h in all_hrefs:
            if counter < num_to_act:
                if not second_hrefs.__contains__(h):
                    second_hrefs.append(h)
                    counter += 1
            else:
                break
        self.compare_hrefs(first_hrefs, second_hrefs)

    def compare_hrefs(self, hrefs1, hrefs2):
        driver = self.driver
        first_user_array = []
        second_user_array = []
        for h1 in hrefs1:
            driver.get(h1)
            time.sleep(random.randint(1, 3))
            username_link = driver.find_element_by_xpath(
                '// *[ @ id = "react-root"] / section '
                '/ main / div / div / article / header '
                '/ div[2] / div[1] / div[1] / h2 / a')
            user1 = username_link.text
            first_user_array.append(user1)
            time.sleep(random.randint(1, 3))
        for h2 in hrefs2:
            driver.get(h2)
            time.sleep(random.randint(1, 3))
            username_link = driver.find_element_by_xpath(
                '// *[ @ id = "react-root"] / section /'
                ' main / div / div / article / header /'
                ' div[2] / div[1] / div[1] / h2 / a')
            user2 = username_link.text
            second_user_array.append(user2)
            time.sleep(random.randint(1, 3))
        '''
        first_user_array = np.array(first_user_array)
        second_user_array = np.array(second_user_array)
        result_user_array = np.intersect1d(first_user_array, second_user_array)

        self.userleadsFile.write('User Leads:\n')
        self.userleadsFile.write('Started At: ' + datetime.datetime.now().strftime("%I%M%p_%d%b%y") + '\n')
        self.userleadsFile.write('Total objects compared: ' + (hrefs1.count() * 2) + '\n\n')

        self.userleadsFile.write('Users in both sources: ' '\n')
        for u in result_user_array:
            self.userleadsFile.write(u + '\n')
        if result_user_array.count() == 0:
            self.userleadsFile('None.\n')
        self.userleadsFile.write('Stopped At: ' + datetime.datetime.now().strftime("%I%M%p_%d%b%y") + '\n\n')
        self.userleadsFile.close()
        '''

    def check_for_following(self):
        driver = self.driver
        time.sleep(5)
        driver.get('https://www.instagram.com/' + self.username + '/')
        followers_string = "//a[@href='/" + self.username + "/followers/']"
        waiter.find_element(driver, followers_string, by=XPATH).click()
        time.sleep(random.randint(2, 4))
        user_hrefs = []
        all_hrefs = ""
        # hrefs_scraped = 0
        counter = 0
        while counter < 4:
            try:
                driver.execute_script(
                    "window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(random.randint(4, 5))
                all_hrefs = driver.find_elements_by_tag_name('a')
                all_hrefs = [
                    elem.get_attribute('href') for elem in all_hrefs
                    if '.com/' in elem.get_attribute('href')
                ]
                all_user_hrefs = []
                for ref in all_hrefs[19:]:
                    u = list(ref.split("/"))
                    if u.__len__() == 5:
                        if (u[3] != 'explore') or (u[3] != self.username):
                            all_user_hrefs.append(u)
                [
                    user_hrefs.append(href) for href in all_user_hrefs
                    if href not in user_hrefs
                ]
                # hrefs_scraped = user_hrefs.__len__()
            except Exception:
                # self.errorFile.write('Exception thrown when getting hrefs for people that follow you')
                continue
            counter += 1
        href = []
        counter = 0
        for h in all_hrefs:
            if counter < 100:
                if not href.__contains__(h):
                    href.append(h)
                    counter += 1
            else:
                break
        return href