示例#1
0
def next_product_url():
    cli = get_cli()

    while True:
        product = cli.lpop( KEY_PRODUCTS )
        if product is None:
            logger.info( 'no product in queue ...' )
            time.sleep( 5 )
            continue

        try:
            product = json.loads( product )[ 'asin' ]

            if not product_is_timeout( product ):
                logger.info( 'product : ' + product + \
                                ' is not timeout, {day} days'.format(
                                        day = CRAWL_PRODUCT_TIMEOUT ) )
                continue

            set_product_task( product )
            del_product_reviews( product )

            logger.info( 'next product : ' + product )
            return 'http://www.amazon.com/ss/customer-reviews/' + product

        except Exception as e:
            logger.exception( repr( e ) )
示例#2
0
def next_product_url():
    cli = get_cli()

    while True:
        product = cli.lpop(KEY_PRODUCTS)
        if product is None:
            logger.info('no product in queue ...')
            time.sleep(5)
            continue

        try:
            product = json.loads(product)['asin']

            if not product_is_timeout(product):
                logger.info( 'product : ' + product + \
                                ' is not timeout, {day} days'.format(
                                        day = CRAWL_PRODUCT_TIMEOUT ) )
                continue

            set_product_task(product)
            del_product_reviews(product)

            logger.info('next product : ' + product)
            return 'http://www.amazon.com/ss/customer-reviews/' + product

        except Exception as e:
            logger.exception(repr(e))
示例#3
0
def set_product_task( prdid ):
    task = { 'prdid' : prdid,
             'ctime' : time.time() }
    task_k = KEY_PRODUCT_TASK.format( p = prdid )

    cli = get_cli()
    cli.set( task_k, json.dumps( task ) )
示例#4
0
def get_product_task( prdid ):
    cli = get_cli()
    d = cli.get( KEY_PRODUCT_TASK.format( p = prdid ) )
    if d is None:
        return None

    try:
        return json.loads( d )
    except Exception as e:
        logger.info( 'get task: {p} {d} {e}'.format(
                    p = prdid, d = repr( d ), e = repr( e ) ) )
示例#5
0
def get_product_task(prdid):
    cli = get_cli()
    d = cli.get(KEY_PRODUCT_TASK.format(p=prdid))
    if d is None:
        return None

    try:
        return json.loads(d)
    except Exception as e:
        logger.info('get task: {p} {d} {e}'.format(p=prdid,
                                                   d=repr(d),
                                                   e=repr(e)))
示例#6
0
    def process_item_amazon_review(self, item, spider):

        cli = get_cli()

        prdid = item.get( 'prdid' )
        if not prdid:
            return

        key = KEY_REVIEW.format( p = prdid )
        cli.rpush( key, json.dumps( dict( item ) ) )

        logger.debug( repr( dict( item ) ) )
示例#7
0
def del_product_reviews(prdid):
    k = KEY_REVIEW.format(p=prdid)

    cli = get_cli()
    if cli.exists(k):
        cli.delete(k)
示例#8
0
def set_product_task(prdid):
    task = {'prdid': prdid, 'ctime': time.time()}
    task_k = KEY_PRODUCT_TASK.format(p=prdid)

    cli = get_cli()
    cli.set(task_k, json.dumps(task))
示例#9
0
def del_product_reviews( prdid ):
    k = KEY_REVIEW.format( p = prdid )

    cli = get_cli()
    if cli.exists( k ):
        cli.delete( k )