示例#1
0
文件: scrape.py 项目: alpaca/api
def get_about(self, username):

    # facebook_scraper = pickle.load(open( "facebook_scraper.pickle", "rb" ))
    try:
        result = facebook_scraper.get_about(username)
        user = FacebookUser.query.filter_by(username=username).first()

        if not user:
            user = FacebookUser()
            convert_result(user, result)
            user.created_at = datetime.now()
            db.session.add(user)
            transact_type = 'create'
        else:
            convert_result(user, result)
            transact_type = 'update'

        user.updated_at = datetime.now()
    except Exception as e:
        transaction = Transaction(
                timestamp = datetime.utcnow(),
                transact_type = 'error',
                func = 'get_about(%s)' % username,
                ref = "%s: %s" % (
                    str(e.errno) if hasattr(e, 'errno') else 0,
                    e.strerror if hasattr(e, 'strerror') else e
                )
            )
        if 'result' in locals():
            transaction.data = str(result)
            transaction.ref = "%s.%s" % (FacebookUser.__tablename__, str(result.uid))

        db.session.add(transaction)
        db.session.commit()
        return


    ## Scrape Transaction
    
    transaction = Transaction(
        timestamp = datetime.utcnow(),
        transact_type = transact_type,
        ref = "%s.%s" % (FacebookUser.__tablename__, str(result.uid)),
        func = 'get_about(%s)' % username,
        data = str(result)
    )

    db.session.add(transaction)
    db.session.commit()

    return result
示例#2
0
文件: scrape.py 项目: alpaca/api
def get_fans(self, pagename):

    max_retries = 3
    for retries in xrange(max_retries + 1):
        try:
            page = FacebookPage.query.filter_by(username=pagename).first()
            break
        except (DatabaseError, OperationalError):
            if retries + 1 > max_retries:
                raise

    if not page: raise Exception("put the page in the db first plz")

    results = []
    try:
        for result in facebook_scraper.get_fans(pagename):
            try:

                user = FacebookUser.query.filter_by(username=result.username).first()

                if not user:
                    user = FacebookUser()
                    convert_result(user, result)
                    user.created_at = datetime.now()
                    db.session.add(user)
                    transact_type = 'create'
                else:
                    convert_result(user, result)
                    transact_type = 'update'

                # logger.debug()
                # self.update_state(state='PROGRESS', meta={'transact_type': transact_type, 'current_result': result.username, 'current_total': len(results)})                

            except Exception as e:
                transaction = Transaction(
                        timestamp = datetime.utcnow(),
                        transact_type = 'error',
                        func = 'get_likes(%s)' % pagename,
                        ref = "%s: %s" % (
                            str(e.errno) if hasattr(e, 'errno') else 0, 
                            e.strerror if hasattr(e, 'strerror') else e
                        )
                    )
                print transaction.__dict__
                if 'result' in locals():
                    transaction.data = str(result)

                # logger.debug()
                # self.update_state(state='PROGRESS', meta={'transact_type': transact_type, 'current_result': result.username, 'current_total': len(results)})

                db.session.add(transaction)
                db.session.commit()
                continue

            user.updated_at = datetime.now()
            user.pages.append(page)

            ## Scrape Transaction
            
            transaction = Transaction(
                timestamp = datetime.utcnow(),
                transact_type = transact_type,
                ref = "%s.%s" % (FacebookUser.__tablename__, str(result.uid)),
                func = 'get_likes(%s)' % pagename,
                data = str(result)
            )

            db.session.add(transaction)
            db.session.commit()

            results.append(result)
            logger.info( "%s - %i - %s" % (pagename, len(results), result.username))

    except (ScrapingError, ValueError) as e:
        logger.info(e)
        raise

    return results
示例#3
0
文件: scrape.py 项目: alpaca/api
def get_likes(self, username):
    
    # TODO: move this to FacebookUser model
    #
    # from sqlalchemy.exc import DatabaseError, OperationalError
    #
    # retries method on DatabaseError or OperationalError
    # def retry(fun):
    #     @wraps(fun)
    #     def _inner(*args, **kwargs):
    #         max_retries = kwargs.pop('max_retries', 3)
    #         for retries in xrange(max_retries + 1):
    #             try:
    #                 return fun(*args, **kwargs)
    #             except (DatabaseError, OperationalError):
    #                 if retries + 1 > max_retries:
    #                     raise
    #     return _inner
    #
    # @retry
    # def get(username_or_uid):
    #   if type(username_or_uid) == int:
    #       return FacebookUser.query.get(username_or_uid)
    #   elif type(username_or_uid) == str:
    #       return FacebookUser.query.filter_by(username=username).first()
    #   else:
    #       raise

    max_retries = 3
    for retries in xrange(max_retries + 1):
        try:
            user = FacebookUser.query.filter_by(username=username).first()
            break
        except (DatabaseError, OperationalError):
            if retries + 1 > max_retries:
                raise

    if not user: raise Exception("scrape the dude's about information first plz")

    user.scrape_status = 1
    results = []

    try:
        for result in facebook_scraper.get_likes(username):
            try:
                page = FacebookPage.query.filter_by(username=result.username).first()

                if not page:
                    page = FacebookPage()
                    convert_result(page, result)
                    page.created_at = datetime.now()
                    db.session.add(page)
                    transact_type = 'create'
                else:
                    convert_result(page, result)
                    transact_type = 'update'

                # logger.debug()
                # self.update_state(state='PROGRESS', meta={'transact_type': transact_type, 'current_result': result.username, 'current_total': len(results)})

            except Exception as e:
                transaction = Transaction(
                        timestamp = datetime.utcnow(),
                        transact_type = 'error',
                        func = 'get_likes(%s)' % username,
                        ref = "%s: %s" % (
                            str(e.errno) if hasattr(e, 'errno') else 0, 
                            e.strerror if hasattr(e, 'strerror') else e
                        )
                    )
                if 'result' in locals():
                    transaction.data = str(result)

                # logger.debug()
                # self.update_state(state='PROGRESS', meta={'transact_type': transact_type, 'current_result': result.username, 'current_total': len(results)})

                db.session.add(transaction)
                db.session.commit()
                continue

            page.updated_at = datetime.now()
            page.users.append(user)

            ## Scrape Transaction
            
            transaction = Transaction(
                timestamp = datetime.utcnow(),
                transact_type = transact_type,
                ref = "%s.%s" % (FacebookPage.__tablename__, str(result.page_id)),
                func = 'get_likes(%s)' % username,
                data = str(result)
            )

            db.session.add(transaction)
            db.session.commit()

            results.append(result)
            logger.info( "%s - %i - %s" % (username, len(results), result.username))
    except ScrapingError as e:
        user.scrape_status = 0

    if user.scrape_status == 1:
        user.scrape_status = 2
    db.session.commit()

    return results