def scrape_followers(driver, account, members): # Load account page driver.get("https://www.instagram.com/{}/".format(account)) sleep(4) driver.find_element_by_partial_link_text("follower").click() # Wait for the followers modal to load waiter.find_element(driver, "//div[@role='dialog']", by=XPATH) allfoll = int( (driver.find_element_by_xpath("//li[2]/a/span").text).replace(",", "")) posts = driver.find_element_by_xpath( '//*[@id="react-root"]/section/main/div/header/section/ul/li[1]/span/span' ).text print(allfoll) print("Posts ", posts) follower_css = "ul div li:nth-child({}) a.notranslate" # Taking advange of CSS's nth-child functionality for group in itertools.count(start=1, step=12): for follower_index in range(group, group + 12): if follower_index > members: return None yield [ waiter.find_element(driver, follower_css.format(follower_index)).text, allfoll, posts ] last_follower = waiter.find_element(driver, follower_css.format(group + 11)) driver.execute_script("arguments[0].scrollIntoView();", last_follower)
def get_targets(self): # conta alvo: uma lista de contas que pegamos nos likers de outras instituições with open('alvo.txt') as f: alvo = [line.rstrip() for line in f] print(alvo) # acessar uma a uma for account in alvo: self.driver.get("https://www.instagram.com/{}/".format(account)) time.sleep(random.uniform(5, 7)) #Followers #try: # allfoll = int(self.driver.find_element_by_xpath("//li[2]/a/span").text) #except ValueError: allfoll = 10 self.driver.find_element_by_partial_link_text("follower").click() time.sleep(random.uniform(1.5, 2)) trick_css = "ul div li:nth-child({}) a.notranslate" # Taking advange of CSS's nth-child functionality wait = WebDriverWait(self.driver, 20) for group in itertools.count(start=1, step=12): for follower_index in range(group, group + 12): if follower_index > allfoll: break #TEM ERRO AQUI #time.sleep (random.uniform (5, 7)) yield waiter.find_element( self.driver, trick_css.format(follower_index)).text last_follower = waiter.find_element(self.driver, trick_css.format(group + 11)) self.driver.execute_script("arguments[0].scrollIntoView();", last_follower)
def __getuserfollowingcript(self, targetaccount): # The link we must go to linkprofile = "https://www.instagram.com/" + targetaccount self.driver.get(linkprofile) time.sleep(round(random.uniform(4, 6), 2)) self.driver.find_element_by_xpath( '//*[@id="react-root"]/section/main/div/header/section/ul/li[3]/a' ).click() time.sleep(round(random.uniform(4, 6), 2)) waiter.find_element(self.driver, "//div[@role='dialog']", by=XPATH) following_css = "ul div li:nth-child({}) a.notranslate" for group in itertools.count(start=1, step=12): # A sleep is necessary so that instagram doesen't block us, FollowDelay is give to the constructor # time.sleep(round(random.uniform(self.FollowerDelay-1, self.FollowerDelay+1), 2)) for following_index in range(group, group + 12): try: current_following = waiter.find_element( self.driver, following_css.format(following_index) ) self.driver.execute_script( "arguments[0].scrollIntoView(true);", current_following ) time.sleep(0.1) yield waiter.find_element( self.driver, following_css.format(following_index) ).text except: return -1 last_following = waiter.find_element( self.driver, following_css.format(following_index) ) self.driver.execute_script( "arguments[0].scrollIntoView(true);", last_following ) time.sleep(0.5)
def list_following(self): trick_css = "ul div li:nth-child({}) a.notranslate" # Taking advange of CSS's nth-child functionality #allfow = int(self.driver.find_element_by_xpath("//li[3]/a/span").text) #a cada 20 vamos checar quem nos segue de volta allfow = 20 self.driver.find_element_by_partial_link_text("following").click() time.sleep(random.uniform(4, 6)) # for group1 in itertools.count(start=1, step=12): for following_index in range(group1, group1 + 12): if following_index > allfow: break yield waiter.find_element(self.driver, trick_css.format(following_index)).text # following.append(waiter.find_element(self.driver, trick_css.format(following_index)).text) # else: # raise StopIteration last_following = waiter.find_element(self.driver, trick_css.format(group1 + 11)) self.driver.execute_script("arguments[0].scrollIntoView();", last_following)
def getWords(driver, list): index = [] for follower in list: url = 'https://www.instagram.com/' + follower + '/' driver.get(url) WebDriverWait(driver, 20).until( EC.url_changes('https://www.instagram.com/accounts/login/?next=/' + follower + '/')) waiter.find_element(driver, '//*[@class="-vDIg"]', by=XPATH) try: accountWords = re.split( '[^a-zA-Z]', driver.find_element_by_xpath('//*[@class="-vDIg"]/span').text) accountWords = [ word.translate(str.maketrans('', '', string.punctuation)).lower() for word in accountWords if ((word not in stopwords.words('english')) and word != "") ] except NoSuchElementException: continue accountWordsD = { "account": follower, "wordsTotal": len(accountWords), "words": [] } for word in accountWords: newWordAccount = {"word": word, "count": 1} temp1 = findWord(accountWordsD, newWordAccount["word"]) if (temp1 == -1): accountWordsD["words"].append(newWordAccount) else: temp1["count"] += 1 newWord = {"word": word, "count": 1} if (('words' not in locals())): words = [newWord] else: temp = next((item for item in words if item["word"] == word), -1) if (temp == -1): words.append(newWord) else: temp['count'] += 1 index.append(accountWordsD) words = (sorted(words, key=itemgetter('count'), reverse=True)) print(words) print(index) return words, index
def list_followers(driver, account): # profile driver.get("https://www.instagram.com/{0}/".format(account)) time.sleep(random.uniform(1.5, 2)) # Followers allfoll = int(driver.find_element_by_xpath("//li[2]/a/span").text) driver.find_element_by_partial_link_text("follower").click() time.sleep(random.uniform(1.5, 2)) trick_css = "ul div li:nth-child({}) a.notranslate" # Taking advange of CSS's nth-child functionality wait = WebDriverWait(driver, 20) for group in itertools.count(start=1, step=12): for follower_index in range(group, group + 12): if follower_index > allfoll: raise StopIteration yield waiter.find_element(driver, trick_css.format(follower_index)).text # followers.append(waiter.find_element(driver, trick_css.format(follower_index)).text) # https://stackoverflow.com/questions/37233803/how-to-web-scrape-followers-from-instagram-web-browser # Instagram loads followers 12 at a time. Find the last follower element # and scroll it into view, forcing instagram to load another 12 # Even though we just found this elem in the previous for loop, there can # potentially be large amount of time between that call and this one, # and the element might have gone stale. Lets just re-acquire it to avoid # that last_follower = waiter.find_element(driver, trick_css.format(group + 11)) driver.execute_script("arguments[0].scrollIntoView();", last_follower)
def insta_scrape(driver, account): print('Started Scraping URLs') for profile in account: href_list = [] driver.get("https://www.instagram.com/{0}/".format(profile)) sleep(10) driver.find_element_by_partial_link_text("follower").click() waiter.find_element(driver, "//div[@role='dialog']", by=XPATH) try: follower_css = "ul div li:nth-child({}) a.notranslate" for group in itertools.count(start=1, step=12): for follower_index in range(group, group + 12): get_href = waiter.find_element( driver, follower_css.format(follower_index)).text name = get_href.replace('_', ' ') href_list.append('/' + name + '/') data = {'Links': href_list} df = pd.DataFrame(data) df.to_csv('href_list.csv', index=False) yield name last_follower = waiter.find_element( driver, follower_css.format(group + 11)) driver.execute_script("arguments[0].scrollIntoView();", last_follower) except: print('Scraping Done')
def getFollowing(account, driver): data["url"] = 'https://www.instagram.com/' + account + '/followers/' driver.get(data["url"]) driver.maximize_window() WebDriverWait(driver, 20).until( EC.url_changes('https://www.instagram.com/accounts/login/?next=/' + account + '/followers/')) # click on following followers_btn = driver.find_elements_by_class_name('g47SY') followers_btn[2].click() waiter.find_element(driver, "//div[@role='dialog']", by=XPATH) # find the following window dialog = driver.find_element_by_xpath('/html/body/div[3]/div/div/div[2]') # find number of following follamount = driver.find_element_by_xpath("//li[3]/a/span").text follamount = follamount.replace(',', "") if 'k' in follamount: follamount = follamount[:-1] follamount = int(float(follamount)) * 1000 else: follamount = int(float(follamount)) elem = driver.find_element_by_xpath( '//*[@class="FPmhX notranslate _0imsa "]') time.sleep(2) for i in range(0, 6): elem.send_keys(Keys.PAGE_UP) elem.send_keys(Keys.PAGE_UP) elem.send_keys(Keys.PAGE_DOWN) # scroll down the page num = driver.find_elements_by_xpath( '//*[@class="FPmhX notranslate _0imsa "]').__len__() while (int(follamount) != num): driver.execute_script( "arguments[0].scrollTop = arguments[0].scrollHeight", dialog) time.sleep(random.randint(500, 1000) / 1000) num = driver.find_elements_by_xpath( '//*[@class="FPmhX notranslate _0imsa "]').__len__() a = os.system('CLS') print("Extracting friends {} of {} ({}%)".format( num, follamount, round((num / follamount) * 100, 2))) followers = BeautifulSoup(driver.page_source, features="lxml").find_all( "a", {'class': 'FPmhX notranslate _0imsa '}) followers_arr = [] for follower in followers: followers_arr.append(follower.get_text()) return followers_arr
def login(driver, username, password): driver.get("https://www.instagram.com/accounts/login/") waiter.find_write(driver, "//div/input[@name='username']", username, by=XPATH) waiter.find_write(driver, "//div/input[@name='password']", password, by=XPATH) waiter.find_element(driver, "//div/button[@type='submit']", by=XPATH).click() waiter.find_element(driver, "//a/span[@aria-label='Find People']", by=XPATH)
def getFollowers(driver): driver.get("https://www.instagram.com/ali.alaoui12/") numFollowers = int( waiter.find_element( driver, "//li[2]/a/span", by=XPATH).text) # li[2] refers to second li in structure print("You have " + str(numFollowers) + " followers") waiter.find_element(driver, "//a[@href='/ali.alaoui12/followers/']", by=XPATH).click() dialog = waiter.find_element( driver, "//div[@role='dialog']/div[2]/ul", by=XPATH ) # makes sure followers have loaded. select ul in div with role of dialog dialog.click() units = waiter.find_elements(driver, "//div[@role='dialog']/div[2]/ul/div/li", by=XPATH) numUnits = len(units) print("There are " + str(numUnits) + " followers per scroll.") scrollFinished = False sameCounter = 0 # makes sure we should truly stop scrolling, once this reaches 5 we stop while (scrollFinished is False and (numFollowers > 12) ): # no need to scroll if numfollowers is 12 or less followers = waiter.find_elements( driver, "//div[@role='dialog']/div[2]/ul/div/li", by=XPATH) lastFollower = len(followers) - 1 dialog.click() # ensures dialog is focused try: driver.execute_script("arguments[0].scrollIntoView();", followers[lastFollower]) except StaleElementReferenceException: # in case an element reloads, we're "taking a breather" time.sleep(3.5) continue if (lastFollower > 12 and lastFollower == oldLastFollower ): # only stop when it has loaded all followers sameCounter = sameCounter + 1 if (sameCounter == 5): scrollFinished = True # to avoid any bugs oldLastFollower = lastFollower time.sleep(.5) units = waiter.find_elements(driver, "//div[@role='dialog']/div[2]/ul/div/li", by=XPATH) numUnits = len(units) print("To confirm, you have " + str(numUnits) + " followers.") followers = parseHTML() print(len(followers)) return followers
def getFollowg(driver, instaUsr): findField = driver.find_element_by_xpath( f"//a[@href='/{instaUsr}/following/']") findField.click() print('> Catching usernames in "Following" list ...') sleep(2.5) getFollowingCSS = 'ul div li:nth-child({}) a.notranslate' for listedUsers in count(start=1, step=12): for usrIndex in range(listedUsers, listedUsers + 12): yield find_element(driver, getFollowingCSS.format(usrIndex)).text lastUserListed = find_element(driver, getFollowingCSS.format(usrIndex)) driver.execute_script("arguments[0].scrollIntoView()", lastUserListed)
def login(driver): username = #insert username here password = #insert password here # Load page driver.get("https://www.instagram.com/accounts/login/") # Login waiter.find_write(driver, "//input[@name='username']", username, by=XPATH) waiter.find_write(driver, "//input[@name='password']", password, by=XPATH) waiter.find_element(driver, "//div/button[@type='submit']", by=XPATH).click() # Wait for the user dashboard page to load waiter.find_element(driver, "//a/span[@aria-label='Find People']", by=XPATH)
def getFollowing(driver): driver.get("https://www.instagram.com/ali.alaoui12/") numFollowing = int( waiter.find_element(driver, "//li[3]/a/span", by=XPATH).text) print("You are following " + str(numFollowing) + " people") waiter.find_element(driver, "//a[@href='/ali.alaoui12/following/']", by=XPATH).click() dialog = waiter.find_element(driver, "//div[@role='dialog']/div[2]/ul", by=XPATH) dialog.click() units = waiter.find_elements(driver, "//div[@role='dialog']/div[2]/ul/div/li", by=XPATH) numUnits = len(units) print("There are " + str(numUnits) + " followers per scroll.") scrollFinished = False sameCounter = 0 while (scrollFinished is False and (numFollowing > 12) ): # no need to scroll if numfollowers is 12 or less following = waiter.find_elements( driver, "//div[@role='dialog']/div[2]/ul/div/li", by=XPATH) lastFollowing = len(following) - 1 dialog.click() try: driver.execute_script("arguments[0].scrollIntoView();", following[lastFollowing]) except StaleElementReferenceException: # in case an element reloads, we're "taking a breather" time.sleep(3.5) continue if (lastFollowing > 12 and lastFollowing == oldLastFollowing): sameCounter = sameCounter + 1 if (sameCounter == 5): scrollFinished = True # to avoid any bugs oldLastFollowing = lastFollowing time.sleep(.5) following = parseHTML() print(len(following)) return following
def login(driver, user, pwd): driver.get("https://www.instagram.com/accounts/login/") # find and write in input fields to login waiter.find_write(driver, "//div/input[@name='username']", user, by=XPATH) waiter.find_write(driver, "//div/input[@name='password']", pwd, by=XPATH) # submit waiter.find_element(driver, "//div/button[@type='submit']", by=XPATH).click() # waits until the homepage loads before exiting function waiter.find_element(driver, "//a/span[@aria-label='Find People']", by=XPATH)
def scrape(driver, account, type, max): ''' Scrapes an instagram page follower or following modal. Parameters: driver: web driver account: instagram account of interest type: modal type (i.e. "follower" or "following") max: max count of followers/followed to scrape Returns: generator of users (either followers or followed) ''' # load account page driver.get("https://www.instagram.com/{0}/".format(account)) # grab modal driver.find_element_by_partial_link_text(type).click() followers_modal = WebDriverWait(driver, 2).until( EC.element_to_be_clickable((By.XPATH, "//div[@class='isgrP']"))) # scroll through list follower_index = 1 follower_css = "ul div li:nth-child({}) a.notranslate" # Taking advange of CSS's nth-child functionality while follower_index < num_followers: driver.execute_script( 'arguments[0].scrollTop = arguments[0].scrollTop + arguments[0].offsetHeight;', followers_modal) yield waiter.find_element(driver, follower_css.format(follower_index)).text follower_index += 1
def login(driver): username = "" password = "" driver.get("http://danharoo.com/member/login.html") waiter.find_write(driver, "//*[@id='member_id']", username, by=XPATH) waiter.find_write(driver, "//*[@id='member_passwd']", password, by=XPATH) submit = driver.find_element_by_xpath( "/html/body/div[4]/div/div/form/div/div/fieldset/a") submit.click() waiter.find_element( driver, "//*[@id='contents_main']/div[1]/div[1]/ul/li[11]/a/img", by=XPATH)
def scrape(url: str, league: str, year: int, n_teams=20) -> None: # get ChromeDriver driver = webdriver.Chrome('E:/USDE/scrapers/chromedriver.exe') # this snippet of code gets all the match urls, it's pretty slow so I don't want to do it everytime if os.path.exists(f'{league}_{year}_urls.pickle'): with open(f'{league}_{year}_urls.pickle', 'rb') as f: list_url = pickle.load(f) else: print(f'Non esiste {league}_{year}_urls.pickle') driver.get(url) driver.implicitly_wait(3) list_url = [] for i in tqdm(range(2, (n_teams - 1) * 2 + 2)): table = driver.find_element_by_xpath( f'//*[@id="main"]/div[10]/div[{i}]/div/table') for row in table.find_elements_by_css_selector('tr'): if row.get_attribute('class') != 'bg_blau_20': for td in row.find_elements_by_css_selector('td'): if td.get_attribute('class') == 'zentriert hauptlink': list_url.append( td.find_elements_by_css_selector('a') [0].get_attribute('href')) with open(f'{league}_{year}_urls.pickle', 'wb') as f: pickle.dump(list_url, f) print(f'Total matches: {len(list_url)}') if os.path.exists(f'{league}_{year}_lineup_urls.pickle'): with open(f'{league}_{year}_lineup_urls.pickle', 'rb') as f: lineup_url = pickle.load(f) else: lineup_url = [] for u in tqdm(list_url): time.sleep(2) driver.get(u) lineup_url.append( waiter.find_element(driver, '//*[@id="line-ups"]/a', by=XPATH).get_attribute('href')) with open(f'{league}_{year}_lineup_urls.pickle', 'wb') as f: pickle.dump(lineup_url, f) print(f'Total matches lineups: {len(lineup_url)}') for lu in lineup_url[:2]: driver.get(lu) print('Home Lineup:') for i in range(1, 12): a = driver.find_element_by_xpath( f'//*[@id="main"]/div[12]/div[1]/div/div[2]/table/tbody/tr[{i}]/td[2]/table/tbody/tr[1]/td[2]/a' ) print( f"{a.text} {a.get_attribute('id')} {a.get_attribute('href')}") print('Away Lineup:') for i in range(1, 12): a = driver.find_element_by_xpath( f'//*[@id="main"]/div[12]/div[2]/div/div[2]/table/tbody/tr[{i}]/td[2]/table/tbody/tr[1]/td[2]/a' ) print( f"{a.text} {a.get_attribute('id')} {a.get_attribute('href')}")
def check_for_following(self): driver = self.driver time.sleep(5) driver.get('https://www.instagram.com/' + self.username + '/') followers_string = "//a[@href='/" + self.username + "/followers/']" waiter.find_element(driver, followers_string, by=XPATH).click() time.sleep(random.randint(2, 4)) user_hrefs = [] all_hrefs = "" # hrefs_scraped = 0 counter = 0 while counter < 4: try: driver.execute_script( "window.scrollTo(0, document.body.scrollHeight);") time.sleep(random.randint(4, 5)) all_hrefs = driver.find_elements_by_tag_name('a') all_hrefs = [ elem.get_attribute('href') for elem in all_hrefs if '.com/' in elem.get_attribute('href') ] all_user_hrefs = [] for ref in all_hrefs[19:]: u = list(ref.split("/")) if u.__len__() == 5: if (u[3] != 'explore') or (u[3] != self.username): all_user_hrefs.append(u) [ user_hrefs.append(href) for href in all_user_hrefs if href not in user_hrefs ] # hrefs_scraped = user_hrefs.__len__() except Exception: # self.errorFile.write('Exception thrown when getting hrefs for people that follow you') continue counter += 1 href = [] counter = 0 for h in all_hrefs: if counter < 100: if not href.__contains__(h): href.append(h) counter += 1 else: break return href
def log_in_username(self): """ Returns the current text in the username field Returns: str """ username_elem = waiter.find_element(self.driver, 'username', by=NAME) return username_elem.get_attribute('value')
def log_in_password(self): """ Returns the current text in the password field Returns: str """ password_elem = waiter.find_element(self.driver, 'password', by=NAME) return password_elem.get_attribute('value')
def log_in_link_click(self, wait_until_displayed=True): """ Click the `log in` link on the log in page Args: wait_until_displayed (bool): Block until all the log in fields are displayed Returns: None Raises: TimeoutException: Raised if the link isn't found, and if the fields aren't displayed """ waiter.find_element(self.driver, LOG_IN_LINK_XPATH, XPATH).click() if wait_until_displayed: Wait(self, 30).until(lambda obj: obj.log_in_fields_displayed)
def Login(self): # Check that if we already logged in there is no need to do it again if self.loggedin is True: print("Already Logged in") return 1 try: # Open driver and load cookies self.driver.get("https://www.instagram.com/accounts/login") cookies = pickle.load(open(self.cookiepath, "rb")) for cookie in cookies: if "expiry" in cookie: del cookie["expiry"] self.driver.add_cookie(cookie) time.sleep(round(random.uniform(5, 7), 2)) # Check if we are not already logged in on last session cururl = self.driver.current_url if cururl == "https://www.instagram.com/": print("Already Logged In no need To Login") return -1 # Everything is fine we are logged in Let's fill the form waiter.find_write(self.driver, "//input", self.username, by=XPATH) time.sleep(round(random.uniform(2, 3), 2)) waiter.find_write(self.driver, "//div[3]/div/label/input", self.password, by=XPATH) time.sleep(round(random.uniform(2, 3), 2)) waiter.find_element(self.driver, "//button/div", by=XPATH).click() time.sleep(round(random.uniform(4, 6), 2)) # Check if login was successful cururl = self.driver.current_url if cururl == "https://www.instagram.com/": print("Login success") pickle.dump(self.driver.get_cookies(), open(self.cookiepath, "wb")) self.loggedin = True return 0 # We didn't get to the main page so there was a problem else: print("Error: Login Was not successful") return -1 except Exception as err: print("Error: in Login") traceback.print_exc() print(str(err)) return -1
def scrape_followers(driver, account): # Load account page driver.get("https://www.instagram.com/{0}/".format(account)) sleep(2) driver.find_element_by_partial_link_text("follower").click() waiter.find_element(driver, "//div[@role='dialog']", by=XPATH) allfoll = int(driver.find_element_by_xpath("//li[2]/a/span").text) follower_css = "ul div li:nth-child({}) a.notranslate" for group in itertools.count(start=1, step=12): for follower_index in range(group, group + 12): if follower_index > allfoll: raise StopIteration yield waiter.find_element(driver, follower_css.format(follower_index)).text last_follower = waiter.find_element(driver, follower_css.format(group + 11)) driver.execute_script("arguments[0].scrollIntoView();", last_follower)
def scrape_following(self, username): browser = self.browser try: browser.get("https://www.instagram.com/" + username + "/") browser.find_element_by_partial_link_text('following').click() waiter.find_element(browser, "//div[@role='dialog']", by=XPATH) following_css = "ul div li:nth-child({}) a.notranslate" following_index = 0 for group in itertools.count(start=1, step=12): for following_index in range(group, group + 12): yield waiter.find_element( browser, following_css.format(following_index)).text last_following = waiter.find_element( browser, following_css.format(following_index)) browser.execute_script("arguments[0].scrollIntoView();", last_following) except Exception as e: print("Error Code: ", e)
def scrape_followers(driver, account): # Load account page driver.get("https://www.instagram.com/{0}/".format(account)) # Click the 'Follower(s)' link # driver.find_element_by_partial_link_text("follower").click() xpathMod = "//a[@href='/"+account+"/followers/']" waiter.find_element(driver, xpathMod, by=XPATH).click() # Wait for the followers modal to load waiter.find_element(driver, "//div[@role='dialog']", by=XPATH) # At this point a Followers modal pops open. If you immediately scroll to the bottom, # you hit a stopping point and a "See All Suggestions" link. If you fiddle with the # model by scrolling up and down, you can force it to load additional followers for # that person. # Now the modal will begin loading followers every time you scroll to the bottom. # Keep scrolling in a loop until you've hit the desired number of followers. # In this instance, I'm using a generator to return followers one-by-one follower_css = "ul div li:nth-child({}) a.notranslate" # Taking advange of CSS's nth-child functionality for group in itertools.count(start=1, step=12): for follower_index in range(group, group + 12): yield waiter.find_element(driver, follower_css.format(follower_index)).text # Instagram loads followers 12 at a time. Find the last follower element # and scroll it into view, forcing instagram to load another 12 # Even though we just found this elem in the previous for loop, there can # potentially be large amount of time between that call and this one, # and the element might have gone stale. Lets just re-acquire it to avoid # that last_follower = waiter.find_element(driver, follower_css.format(follower_index)) driver.execute_script("arguments[0].scrollIntoView();", last_follower)
def __getuserusedhashtag(self, targetaccount): try: hashtags = [] LikedPostsLinks = commenters_util.get_photo_urls_from_profile( self.driver, targetaccount, 50, False, ) for i in range(0, 5): LikedPostsLinks2 = commenters_util.get_photo_urls_from_profile( self.driver, targetaccount, 50, False, ) Linksgot = len(LikedPostsLinks2) if Linksgot > len(LikedPostsLinks): LikedPostsLinks = LikedPostsLinks2 for x in LikedPostsLinks: for i in range(0, 2): try: time.sleep(2) self.driver.get(x) time.sleep(3) ss = waiter.find_element( self.driver, "/html/body/div[1]/section/main/div/div/article/div[2]/div[1]/ul/div/li/div/div/div[2]/span", by=XPATH, ) ss = self.driver.find_element_by_xpath( "/html/body/div[1]/section/main/div/div/article/div[2]/div[1]/ul/div/li/div/div/div[2]/span" ) posttext = ss.text pp = posttext.split() for word in pp: if ( word[0] == "#" and (word not in hashtags) and ("#" not in word[1:]) ): hashtags.append(word) break except: print("Error Try Again") return hashtags except: print("some error in hashtag") return -1
def login(driver, username, password): ''' Logs into instagram. ''' # load page driver.get("https://www.instagram.com/accounts/login/") # login waiter.find_write(driver, "//div/label/input[@name='username']", username, by=XPATH) waiter.find_write(driver, "//div/label/input[@name='password']", password, by=XPATH) waiter.find_element(driver, "//div/button[@type='submit']", by=XPATH).click() # wait for the page to load. increase from 5 if internet is slow time.sleep(5) print("login complete.\n")
def __getFollowx(self, onPerform): self.browser.get(f'https://www.instagram.com/{self.__username}/') if onPerform == 'following': Util.clearScreen() print(f'> Watching "{self.__username}" profile.\n') ctxSelector = self.browser.find_element_by_xpath( f'//a[@href=\'/{self.__username}/{onPerform}/\']') self.totalUserFollowx = int( ctxSelector.find_element_by_tag_name('span').text) ctxSelector.click() print(f'> Collecting users in "{onPerform.capitalize()}" list ...') usrXMLPath = 'ul div li:nth-child({}) a.notranslate' listIterator = tqdm(range(1, self.totalUserFollowx, 12), ncols=65, leave=False) for currUsrs in listIterator: for usrIndex in range(currUsrs, currUsrs + 12): yield find_element(self.browser, usrXMLPath.format(usrIndex)).text lastUsrListed = find_element(self.browser, usrXMLPath.format(usrIndex)) self.browser.execute_script("arguments[0].scrollIntoView()", lastUsrListed)
def gethashtagAccountquality(self): try: Failed = True count = 0 for i in range(0, 5): try: Sqlquery = "SELECT Hashtag FROM HashtagAccount WHERE numpost IS NULL order by Rand() Limit 1" self.mycursor.execute(Sqlquery) for db in self.mycursor: count += 1 if count == 0: return 2 Failed = False break except: time.sleep(1) self.__init_db() time.sleep(1) if Failed == False: hashtag = db[0] url = "https://www.instagram.com/explore/tags/" + hashtag[1:] self.driver.get(url) time.sleep(5) ss = waiter.find_element( self.driver, "/html/body/div[1]/section/main/header/div[2]/div[1]/div[2]/span/span", by=XPATH, ) num = ss.text num = num.replace(",", "", 20) num = int(num) for i in range(0, 5): try: Sqldata = [num, hashtag] Sqlquery = ( "Update HashtagAccount SET numpost = %s WHERE Hashtag = %s" ) self.mycursor.execute(Sqlquery, Sqldata) self.mydb.commit() return "Success" except: time.sleep(1) self.__init_db() time.sleep(1) else: print("Error in Hashtag Scraping") return -1 except: print("Error in Hashtag Scraping") return -1
def follow_users_from_user(self, user): driver = self.driver driver.get("https://www.instagram.com/" + user + "/?hl=pt-br") time.sleep(5) waiter.find_element(driver, "//a[@href='/" + user + "/followers/']", by=XPATH).click() time.sleep(5) #follow = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'a.BY3EC > button'))) #follow.click() #segui = driver.find_elements_by_class_name('BY3EC') #if segui: # segui[0].click() for y in range(12): #find all li elements in list fBody = driver.find_element_by_xpath("//div[@class='isgrP']") scroll = 0 while scroll < 2: # scroll 5 times driver.execute_script( 'arguments[0].scrollTop = arguments[0].scrollTop + arguments[0].offsetHeight;', fBody) time.sleep(1) scroll += 1 fList = driver.find_elements_by_xpath("//div[@class='isgrP']//li") print("Dando scroll em {} usuários.".format(len(fList))) print("Fim do scroll.") for x in range(5): try: follow_button = driver.find_element_by_css_selector( 'button.sqdOP.L3NKy.y3zKF') follow_button.click() print("[ Usuário seguido! ]") time.sleep(2) except: continue