示例#1
0
文件: facebook.py 项目: jurelou/SNA
    def extract_user_data(self, response, profile_data):
        # yield Request(f'https://m.facebook.com/{userID}/friends',
        # callback=self.parse_friends_page)

        if profile_data['timeline_link']:
            yield Request('https://mbasic.facebook.com/' +
                          profile_data['timeline_link'],
                          callback=wall.extract_posts_data,
                          meta=response.meta)

        if profile_data['photos_link']:
            yield Request('https://m.facebook.com' +
                          profile_data['photos_link'],
                          callback=albums.extract_albums_data,
                          meta=response.meta)
示例#2
0
文件: facebook.py 项目: jurelou/SNA
 def __init__(self):
     super().__init__()
     self.fb_dtsg = None
     self.entrypoint = Request("https://m.facebook.com",
                               callback=self.try_login,
                               errback=self.error)
     self.start_user = "******"
示例#3
0
def extract_posts_data(response):
    global limit
    limit -= 1
    next_page = response.body.xpath("//a[span[text()='See more stories']]")
    posts = response.body.xpath(
        "//div/div/div/span[starts-with(@id, 'like')]/a[not(text()='React') and not(text()='Like')]"
    )
    if posts:
        for post in posts:
            yield Request("https://mbasic.facebook.com/" + post.attrib['href'],
                          callback=extract_post_data,
                          meta=response.meta)
    if next_page and limit > 0:
        yield Request("https://mbasic.facebook.com/" +
                      next_page[0].attrib['href'],
                      callback=extract_posts_data,
                      meta=response.meta)
示例#4
0
def extract_post_data(response):
    likes = response.body.xpath('//div[@id="root"]/div/div/div/div/a')
    comments_author = response.body.xpath('//div/div/div/h3/a')
    comments_likes = response.body.xpath(
        "//div/span/span/a[not(text()='React') and not(text()='Like')]")
    if likes:
        yield Request("https://mbasic.facebook.com/" + likes[0].attrib['href'],
                      callback=extract_likes,
                      meta=dict({'likes_from': 'wall.post'}, **response.meta))
    if comments_author and comments_likes:
        for author in comments_author:
            # TODO: store result
            #print("AUTHOR ->", author.text, author.attrib['href'])
            pass
        for comment_like in comments_likes:
            yield Request("https://mbasic.facebook.com/" +
                          comment_like.attrib['href'],
                          callback=extract_likes,
                          meta=dict({'likes_from': 'wall.post.comment'},
                                    **response.meta))
示例#5
0
 def extract(res):
     next_page = res.body.xpath("//li/table/tbody/tr/td/div/a")
     likes = res.body.xpath("//td/div/h3/a")
     if likes:
         for like in likes:
             # print("\t->", like.text, like.attrib['href'])
             # TODO: store result
             pass
     if next_page:
         yield Request("https://mbasic.facebook.com/" +
                       next_page[0].attrib['href'],
                       callback=extract,
                       meta=res.meta)
示例#6
0
文件: facebook.py 项目: jurelou/SNA
 def is_logged_in(response):
     csrf_token = self.fb_dtsg = response.body.xpath(
         '//input[@name="fb_dtsg"]/@value')
     if not csrf_token:
         logger.info("Need to loggin to facebook")
         login_data = {
             'email': config.FACEBOOK_CREDENTIALS[0],
             'pass': config.FACEBOOK_CREDENTIALS[1]
         }
         yield Request('https://m.facebook.com/login.php',
                       method='POST',
                       body=login_data,
                       allow_redirects=False,
                       callback=self.login,
                       errback=self.error)
     else:
         logger.info(
             "Already logged in to facebook thanks to the preloaded cookies"
         )
         for req in self.parse_user_page(self.start_user):
             yield req
示例#7
0
 def get(self):
     sq = self.redis.lpop(self.key)
     if sq:
         return Request.from_json(sq)
     else:
         raise Empty("Queue empty")