Exemplo n.º 1
0
class Share():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login()
        self.es = Es_fb()
        self.list = []
        self.share_list = self.launcher.get_share_list()

    def get_share(self):
        self.driver.get(self.share_list[0])
        for ea in self.driver.find_elements_by_xpath(
                '//div[@id="repost_view_permalink"]/div/div[1]/div'):
            for each in ea.find_elements_by_xpath('./div'):
                author_name = each.find_element_by_xpath(
                    './div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                ).text
                author_id = re.findall(
                    re.compile('id=(\d+)'),
                    each.find_element_by_xpath(
                        './div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                    ).get_attribute('data-hovercard'))
                pic_url = each.find_element_by_xpath(
                    './div/div[2]/div/div[2]/div/div/a/div/img').get_attribute(
                        'src')
                try:
                    content = each.find_element_by_xpath(
                        './div/div[2]/div/div[2]/div[2]//p').text
                except Exception as e:
                    content = 'None'
                timestamp = int(
                    each.find_element_by_xpath(
                        './div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr'
                    ).get_attribute('data-utime'))
                item = {
                    'nick_name': author_name,
                    'uid': author_id,
                    'photo_url': pic_url,
                    'text': content,
                    'timestamp': timestamp
                }
                self.list.append(item)
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Exemplo n.º 2
0
class Share():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login()
        self.es = Es_fb()
        self.list = []
        self.share_list = self.launcher.get_share_list()
        self.update_time = int(time.time())
    def get_share(self):
        try:
            for url in self.share_list:
                self.driver.get(url)
                time.sleep(1)
                # 退出通知弹窗进入页面
                try:
                    self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
                except:
                    pass

                for ea in self.driver.find_elements_by_xpath('//div[@role="feed"]/div'):
                    for each in ea.find_elements_by_xpath('./div'):
                        try:
                            author_name = each.find_element_by_xpath('./div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a').text
                        except:
                            author_name = 'None'
                        try:
                            author_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a').get_attribute('data-hovercard')))
                        except:
                            author_id = 'None'
                        try:
                            pic_url = each.find_element_by_xpath('./div[2]/div/div[2]/div/div/a/div/img').get_attribute('src')
                        except:
                            pic_url = 'None'
                        try:
                            content = each.find_element_by_xpath('./div[2]/div/div[2]/div[2]').text
                        except:
                            content = 'None'
                        try:
                            try:
                                timestamp = int(each.find_element_by_xpath('./div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr').get_attribute('data-utime'))
                            except:
                                timestamp = int(each.find_element_by_xpath('./div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[2]/span/a/abbr').get_attribute('data-utime'))
                        except:
                            timestamp = 'None'
                        try:
                            mid = ''.join(re.findall(re.compile('/(\d+)'),each.find_element_by_xpath('./div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a').get_attribute('href')))
                        except:
                            mid = 'None'
                        try:
                            root_mid = ''.join(re.findall(re.compile('story_fbid=(\d+)'),each.find_element_by_xpath('./div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/h5/span/span/a').get_attribute('href')))
                        except:
                            root_mid = 'None'
                        item = {'uid':author_id, 'photo_url':pic_url, 'nick_name':author_name, 'mid':mid, 'timestamp':timestamp,\
                                 'text':content, 'update_time':self.update_time, 'root_text':content, 'root_mid':root_mid}
                        self.list.append(item)
        finally:
            self.driver.close()
        return self.list
        
    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Exemplo n.º 3
0
                author_id = re.findall(
                    re.compile('id=(\d+)'),
                    each.find_element_by_xpath(
                        './div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                    ).get_attribute('data-hovercard'))
                pic_url = each.find_element_by_xpath(
                    './div/div[2]/div/div[2]/div/div/a/div/img').get_attribute(
                        'src')
                try:
                    content = each.find_element_by_xpath(
                        './div/div[2]/div/div[2]/div[2]//p').text
                except Exception as e:
                    content = 'None'
                time = each.find_element_by_xpath(
                    './div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr'
                ).get_attribute('data-utime')
                root_url = 'https://www.facebook.com/' + each.find_element_by_xpath(
                    './div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/h5/span/span/a'
                ).get_attribute('href')

    def save(self, indexName, typeName, item):
        es.executeES(indexName, typeName, item)


if __name__ == '__main__':
    fb = Launcher('18538728360', 'zyxing,0513')
    es = es_twitter()
    share_list = fb.get_share_list()
    share = Share()
    share.get_share()
Exemplo n.º 4
0
class Share():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.es = Es_fb()
        self.list = []
        self.share_list, self.driver = self.launcher.get_share_list()
        self.update_time = int(time.time())

    def get_share(self):

        for url in self.share_list:
            self.driver.get(url)
            time.sleep(120)
            # 退出通知弹窗进入页面

            try:
                self.driver.find_element_by_xpath(
                    '//div[@class="_n8 _3qx uiLayer _3qw"]').click()
            except:
                pass

            page = self.driver.page_source
            self.driver.save_screenshot('get_share000.png')

            #for ea in self.driver.find_elements_by_xpath('//div[@role="feed"]/div'):
            #for ea in divs:
            #	for each in ea.find_elements_by_xpath('./div'):
            try:
                author_name = self.driver.find_element_by_xpath(
                    '//table[@role="presentation"]/tbody/tr/td[2]/div/h3/strong/a'
                ).text
            except:
                author_name = ''
            print author_name

            try:
                author_id = ''.join(
                    re.search(re.compile('id%3D(\d+)&'), url).group(1))
            except:
                author_id = ''
            print author_id
            #		try:
            #			pic_url = each.find_element_by_xpath('./div[2]/div/div[2]/div/div/a/div/img').get_attribute('src')
            #		except:
            #			pic_url = 'None'

            try:
                content = self.driver.find_element_by_xpath(
                    '/html/body/div/div/div[2]/div/div[1]/div[1]/div/div[1]/div[2]'
                ).text
            except:
                content = ''

            try:
                timestamp = int(
                    re.search(
                        re.compile('"publish_time":(\d+),'),
                        page.replace(' ',
                                     '').replace('\n',
                                                 '').replace('\t',
                                                             '')).group(1))
            except:
                timestamp = ''
            print timestamp

            try:
                mid = ''.join(
                    re.search(re.compile('fbid%3D(\d+)%'), url).group(1))
            except:
                mid = ''
            print mid

            try:
                root_mid = ''.join(
                    re.search(
                        re.compile(
                            '"original_content_id":"(\d+)"'
                        ), page).group(1))
            except:
                root_mid = ''
            print root_mid

            try:
                root_text = self.driver.find_element_by_xpath(
                    '/html/body/div/div/div[2]/div/div[1]/div[1]/div/div[1]/div[3]/div[2]/div/div/div[2]'
                ).text.replace(' ', '').replace('\n', '').replace('\t', '')
            except:
                root_text = ''
            print root_text

            item = {'uid':author_id, 'nick_name':author_name, 'mid':mid, 'timestamp':timestamp,\
               'text':content, 'update_time':self.update_time, 'root_text':root_text, 'root_mid':root_mid}
            self.list.append(item)

        self.driver.quit()
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)