def extract_user_data(self, response, profile_data): # yield Request(f'https://m.facebook.com/{userID}/friends', # callback=self.parse_friends_page) if profile_data['timeline_link']: yield Request('https://mbasic.facebook.com/' + profile_data['timeline_link'], callback=wall.extract_posts_data, meta=response.meta) if profile_data['photos_link']: yield Request('https://m.facebook.com' + profile_data['photos_link'], callback=albums.extract_albums_data, meta=response.meta)
def __init__(self): super().__init__() self.fb_dtsg = None self.entrypoint = Request("https://m.facebook.com", callback=self.try_login, errback=self.error) self.start_user = "******"
def extract_posts_data(response): global limit limit -= 1 next_page = response.body.xpath("//a[span[text()='See more stories']]") posts = response.body.xpath( "//div/div/div/span[starts-with(@id, 'like')]/a[not(text()='React') and not(text()='Like')]" ) if posts: for post in posts: yield Request("https://mbasic.facebook.com/" + post.attrib['href'], callback=extract_post_data, meta=response.meta) if next_page and limit > 0: yield Request("https://mbasic.facebook.com/" + next_page[0].attrib['href'], callback=extract_posts_data, meta=response.meta)
def extract_post_data(response): likes = response.body.xpath('//div[@id="root"]/div/div/div/div/a') comments_author = response.body.xpath('//div/div/div/h3/a') comments_likes = response.body.xpath( "//div/span/span/a[not(text()='React') and not(text()='Like')]") if likes: yield Request("https://mbasic.facebook.com/" + likes[0].attrib['href'], callback=extract_likes, meta=dict({'likes_from': 'wall.post'}, **response.meta)) if comments_author and comments_likes: for author in comments_author: # TODO: store result #print("AUTHOR ->", author.text, author.attrib['href']) pass for comment_like in comments_likes: yield Request("https://mbasic.facebook.com/" + comment_like.attrib['href'], callback=extract_likes, meta=dict({'likes_from': 'wall.post.comment'}, **response.meta))
def extract(res): next_page = res.body.xpath("//li/table/tbody/tr/td/div/a") likes = res.body.xpath("//td/div/h3/a") if likes: for like in likes: # print("\t->", like.text, like.attrib['href']) # TODO: store result pass if next_page: yield Request("https://mbasic.facebook.com/" + next_page[0].attrib['href'], callback=extract, meta=res.meta)
def is_logged_in(response): csrf_token = self.fb_dtsg = response.body.xpath( '//input[@name="fb_dtsg"]/@value') if not csrf_token: logger.info("Need to loggin to facebook") login_data = { 'email': config.FACEBOOK_CREDENTIALS[0], 'pass': config.FACEBOOK_CREDENTIALS[1] } yield Request('https://m.facebook.com/login.php', method='POST', body=login_data, allow_redirects=False, callback=self.login, errback=self.error) else: logger.info( "Already logged in to facebook thanks to the preloaded cookies" ) for req in self.parse_user_page(self.start_user): yield req
def get(self): sq = self.redis.lpop(self.key) if sq: return Request.from_json(sq) else: raise Empty("Queue empty")