Exemplo n.º 1
0
class Like():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login()
        self.like_list = self.launcher.get_like_list()
        self.es = Es_fb()
        self.list = []

    def get_like(self):
        for url in self.like_list:
            self.driver.get(url)
            try:
                root_name = self.driver.find_element_by_xpath(
                    '//span[@class="fwb"]').text
            except:
                root_name = self.driver.find_element_by_xpath(
                    '//span[@class="fwb fcg"]').text
            try:
                id = ''.join(
                    re.findall(
                        re.compile('id=(\d+)'),
                        self.driver.find_element_by_xpath(
                            '//span[@class="fwb"]/a').get_attribute(
                                'data-hovercard')))
            except:
                id = ''.join(
                    re.findall(
                        re.compile('id=(\d+)'),
                        self.driver.find_element_by_xpath(
                            '//span[@class="fwb fcg"]/a').get_attribute(
                                'data-hovercard')))
            try:
                root_content = self.driver.find_element_by_xpath(
                    '//div[@class="_5pbx userContent _22jv _3576"]/p').text
            except Exception as e:
                root_content = 'None'
            try:
                timestamp = int(
                    self.driver.find_element_by_xpath(
                        '//abbr[@class="_5ptz"]').get_attribute('data-utime'))
            except:
                timestamp = int(
                    self.driver.find_element_by_xpath(
                        '//abbr[@class="_5ptz timestamp livetimestamp"]').
                    get_attribute('data-utime'))
            self.driver.get(
                self.driver.find_element_by_xpath(
                    '//a[@class="_2x4v"]').get_attribute('href'))
            time.sleep(10)
            for each in self.driver.find_elements_by_xpath(
                    '//li[@class="_5i_q"]'):
                author_name = each.find_element_by_xpath(
                    './div/div/div/div[1]/div[2]/div/a').text
                author_id = ''.join(
                    re.findall(
                        re.compile('id=(\d+)'),
                        each.find_element_by_xpath(
                            './div/div/div/div[1]/div[2]/div/a').get_attribute(
                                'data-hovercard')))
                pic_url = each.find_element_by_xpath(
                    './div/a/div/img').get_attribute('src')
                try:
                    relationship = each.find_element_by_xpath(
                        './div/div/div/div[2]/div[2]/span/div/a/span[2]/span'
                    ).text
                except:
                    relationship = "None"
                item = {
                    'nick_name': author_name,
                    'uid': author_id,
                    'photo_url': pic_url,
                    'facebook_type': relationship,
                    'root_name': root_name,
                    'id': id,
                    'root_content': root_content,
                    'timestamp': timestamp
                }
                self.list.append(item)
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Exemplo n.º 2
0
            for each in driver.find_elements_by_xpath('//li[@class="_5i_q"]'):
                author_name = each.find_element_by_xpath(
                    './div/div/div/div[1]/div[2]/div/a').text
                print(author_name)
                author_id = ''.join(
                    re.findall(
                        re.compile('id=(\d+)'),
                        each.find_element_by_xpath(
                            './div/div/div/div[1]/div[2]/div/a').get_attribute(
                                'data-hovercard')))
                print(author_id)
                pic_url = each.find_element_by_xpath(
                    './div/a/div/img').get_attribute('src')
                print(pic_url)
                relationship = each.find_element_by_xpath(
                    './div/div/div/div[2]/div[2]/span/div/a/span[2]/span').text
                print(relationship)
            print('-----')
            time.sleep(10)

    def save(self, indexName, typeName, item):
        es.executeES(indexName, typeName, item)


if __name__ == '__main__':
    fb = Launcher('18538728360', 'zyxing,0513')
    es = es_twitter()
    like_list = fb.get_like_list()
    like = Like()
    like.get_like()
Exemplo n.º 3
0
class Like():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.like_urls_list, self.driver = self.launcher.get_like_list()
        self.es = Es_fb()
        self.like_list = []
        self.update_time = int(time.time())

    def date2timestamp(self, date):
        date = date.replace(u'月', '-').replace(u'日', '').replace(' ', '')
        if u'上午' in date:
            date = date.split(u'上午')[0]
        if u'下午' in date:
            date = date.split(u'下午')[0]
        if u'分钟' in date:
            timestamp = int(
                time.time()) - int(re.search(r'(\d+)', date).group(1)) * 60
            return timestamp
        if u'小时' in date:
            timestamp = int(time.time()) - int(
                re.search(r'(\d+)', date).group(1)) * 60 * 60
            return timestamp
        if u'年' not in date and u'分钟' not in date and u'小时' not in date:
            date = str(
                time.strftime('%Y-%m-%d', time.localtime(
                    time.time())).split('-')[0]) + '-' + date
        if u'年' in date and u'分钟' not in date and u'小时' not in date:
            date = date.replace(u'年', '-')
        timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d')))
        return timestamp

    def get_like(self):
        for url in self.like_urls_list:
            self.driver.get(url)
            time.sleep(1)

            try:
                root_text = self.driver.find_element_by_xpath(
                    '//div[@id="m_story_permalink_view"]/div[1]/div/div[1]/div[2]'
                ).text
            except:
                root_text = 'None'
            print root_text

            try:
                timestamp = self.date2timestamp(
                    self.driver.find_element_by_xpath(
                        '//div[@id="m_story_permalink_view"]/div[1]/div/div[2]/div[1]'
                    ).text)
            except:
                timestamp = 0
            print timestamp

            try:
                root_mid = ''.join(
                    re.search(re.compile('fbid%3D(\d+)%'), url).group(1))
            except:
                root_mid = 0
            print root_mid

            # 进入点赞列表页
            self.driver.get(
                self.driver.find_element_by_xpath(
                    '//div[@id="m_story_permalink_view"]/div[2]/div/div[3]/a').
                get_attribute('href'))
            time.sleep(5)

            for each in self.driver.find_elements_by_xpath(
                    '//div[@id="root"]/table/tbody/tr/td/div/ul/li'):
                try:
                    author_name = each.find_element_by_xpath(
                        './table/tbody/tr/td/table/tbody/tr/td[3]/div/h3[1]/a'
                    ).text
                except:
                    author_name = 'None'
                print author_name

                try:
                    author_id = ''.join(
                        re.findall(
                            re.compile('id=(\d+)'),
                            each.find_element_by_xpath(
                                './table/tbody/tr/td/table/tbody/tr/td[3]/div/h3[1]/a'
                            ).get_attribute('href')))
                except:
                    author_id = 0
                try:
                    pic_url = each.find_element_by_xpath(
                        './table/tbody/tr/td/table/tbody/tr/td[1]/img'
                    ).get_attribute('src')
                except:
                    pic_url = 'None'

                item = {
                    'uid': author_id,
                    'photo_url': pic_url,
                    'nick_name': author_name,
                    'timestamp': timestamp,
                    'root_text': root_text,
                    'update_time': self.update_time,
                    'root_mid': root_mid
                }
                self.like_list.append(item)

        self.driver.quit()
        return self.like_list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Exemplo n.º 4
0
class Like():
	def __init__(self, username, password):
		self.launcher = Launcher(username, password)
		self.driver = self.launcher.login()
		self.like_list = self.launcher.get_like_list()
		self.es = Es_fb()
		self.list = []
		self.update_time = int(time.time())

	def get_like(self):
		try:
			for url in self.like_list:
				self.driver.get(url)
				time.sleep(1)
				# 退出通知弹窗进入页面
				try:
					self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
				except:
					pass

				try:
					text = self.driver.find_element_by_xpath('//div[@class="_5pbx userContent _22jv _3576"]').text
				except Exception as e:
					text = 'None'
				try:
					try:
						timestamp = int(self.driver.find_element_by_xpath('//abbr[@class="_5ptz"]').get_attribute('data-utime'))
					except:
						timestamp = int(self.driver.find_element_by_xpath('//abbr[@class="_5ptz timestamp livetimestamp"]').get_attribute('data-utime'))
				except:
					timestamp = 0
				try:
					mid = ''.join(re.findall(re.compile('/(\d+)'),self.driver.find_element_by_xpath('//a[@class="_5pcq"]').get_attribute('href')))
				except:
					mid = 0
				# 进入点赞列表页
				self.driver.get(self.driver.find_element_by_xpath('//a[@class="_2x4v"]').get_attribute('href'))
				time.sleep(5)
				# 退出通知弹窗进入页面
				try:
					self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
				except:
					pass
				for each in self.driver.find_elements_by_xpath('//li[@class="_5i_q"]'):
					try:
						author_name = each.find_element_by_xpath('./div/div/div/div[1]/div[2]/div/a').text
					except:
						author_name = 'None'
					try:
						author_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div/div/div/div[1]/div[2]/div/a').get_attribute('data-hovercard')))
					except:
						author_id = 'None'
					try:
						pic_url = each.find_element_by_xpath('./div/a/div/img').get_attribute('src')
					except:
						pic_url = 'None'

					item = {'uid':author_id, 'photo_url':pic_url, 'nick_name':author_name, 'timestamp':timestamp, 'text':text, 'update_time':self.update_time, 'root_text':text, 'root_mid':mid}
					self.list.append(item)
		finally:
			self.driver.close()
		return self.list

	def save(self, indexName, typeName, list):
		self.es.executeES(indexName, typeName, list)