Exemplo n.º 1
0
from src.utils import Utils

cache = Utils.cache('VisitLancashire_scraped')
print(cache)
if cache == '1':
    print('Cache exists')
else:
    print('Cache not exists')
Exemplo n.º 2
0
    def run(self):
        followedCount = 0
        for accountName in self.template['screen_names']:
            #Check if followers have been cached
            followersCache = Utils.cache(accountName + '_scraped')
            if (followersCache != '1'):
                #Retrieve followers
                followers = {}
                self.browser.get('https://twitter.com/' + accountName +
                                 '/followers')

                #Grab all followers
                while True:
                    #Wait for page load and get links
                    WebDriverWait(self.browser, 5).until(
                        EC.presence_of_element_located((
                            By.XPATH,
                            "/html/body/div[1]/div[1]/div[1]/div[2]/main/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/section/div[1]/div[1]"
                        )))
                    followerLinks = self.browser.find_elements_by_xpath(
                        '/html/body/div[1]/div[1]/div[1]/div[2]/main/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/section/div[1]/div[1]/div'
                    )

                    for followerLink in followerLinks:
                        try:
                            #Init db connection
                            DB.execute(
                                '''INSERT OR IGNORE INTO 
                                followers(user_name, user_link, followed, followed_at, parent_account) VALUES(?, ?, ?, ?, ?);''',
                                (
                                    followerLink.find_element_by_css_selector(
                                        'a').get_attribute('href').replace(
                                            'https://twitter.com/', ''),
                                    followerLink.find_element_by_css_selector(
                                        'a').get_attribute('href'),
                                    True if ('Following' in followerLink.text
                                             or 'Pending' in followerLink.text)
                                    else False,
                                    '',
                                    accountName,
                                ))
                        except:
                            print('Ignoring...')  #Some load without data

                    #Handle infinite scroll (https://dev.to/mr_h/python-selenium-infinite-scrolling-3o12)
                    last_height = self.browser.execute_script(
                        "return document.body.scrollHeight"
                    )  # Get scroll height
                    self.browser.execute_script(
                        "window.scrollTo(0, document.body.scrollHeight);"
                    )  #Scroll to bottom
                    time.sleep(5)  # Wait to load page

                    # Calculate new scroll height and compare with last scroll height
                    new_height = self.browser.execute_script(
                        "return document.body.scrollHeight")
                    if new_height == last_height:
                        # If heights are the same it will exit the function
                        break
                    last_height = new_height

                Utils.cache(accountName + '_scraped', 1, 43200)

            #Get updated list from db
            followers = DB.selectAll(
                "SELECT * FROM followers WHERE parent_account = ? and followed = 0 limit ?;",
                (accountName, self.template['amount']))

            #Loop through followers and follow
            for follower in followers:

                #Load user page
                print("Following " + follower['user_name'])
                self.browser.get(follower['user_link'])
                WebDriverWait(self.browser, 10).until(
                    EC.element_to_be_clickable(
                        (By.XPATH,
                         '//span[text()="@' + follower['user_name'] + '"]')))

                #Follow user
                try:
                    followBtn = self.browser.find_element_by_xpath(
                        '//span[text()="Follow"]')
                    followBtn.click()
                except:
                    print('User already followed')

                #Update user as followed
                DB.execute(
                    '''UPDATE followers SET 
                    followed = ?,
                    followed_at = ? 
                    WHERE id = ?;''', (1, str(datetime.now()), follower['id']))

                followedCount += 1

                print("Taking a short rest...")
                time.sleep(
                    randrange(self.template['sleep_delay'] * 0.6,
                              self.template['sleep_delay']))

        #Final output
        print("Followed " + str(followedCount) + " people")