Exemplo n.º 1
0
    for product in products:
        node_id = product.get('node_id')
        asin = product.get('asin')

        # print('node_id: ' + node_id + ' / asin : ' + asin)
        call_item_lookup_api(node_id, asin)


def crawl_amazon_bests():
    try:
        while True:
            # key, value = rconn.blpop([REDIS_AMZ_BEST_ASIN_QUEUE_TEST])
            key, value = rconn.blpop([REDIS_AMZ_BEST_ASIN_QUEUE])
            if value is not None:
                get_products(value)

    except Exception as e:
        log.error(str(e))


def start(rconn):
    crawl_amazon_bests()


if __name__ == '__main__':
    try:
        # log.info('Start bl-amazon-best-crawler')
        start(rconn)
    except Exception as e:
        log.error(str(e))
Exemplo n.º 2
0
  path = storage.upload_file_to_bucket(AWS_OBJ_IMAGE_BUCKET, file, key, is_public=is_public)
  obj['image_url'] = path
  log.debug('save_to_storage done')

def start(rconn):
  global version_id
  version_id = get_latest_crawl_version()

  log.info('Start dispatch_job')

  Timer(HEALTH_CHECK_TIME, check_health, ()).start()
  count = 0
  while True:
    key, value = rconn.blpop([REDIS_PRODUCT_CLASSIFY_QUEUE])
    if value is not None:
      analyze_product(value)
    global  heart_bit
    heart_bit = True

    # count = count + 1
    # if count > MAX_PROCESS_NUM:
    #   delete_pod()

if __name__ == '__main__':
  try:
    log.info('Start bl-object-classifier:3')
    start(rconn)
  except Exception as e:
    log.error('main; ' + str(e))
    delete_pod()
Exemplo n.º 3
0
            "Exception when calling update_product_by_hostcode_and_productno: %s\n"
            % e)
        # delete_pod()


def keep_the_job():
    rconn.lpush(REDIS_HOST_CRAWL_QUEUE, HOST_CODE)
    log.info('keep_the_job:' + HOST_CODE)


def notify_to_classify(host_code):
    log.info('notify_to_classify')
    rconn.lpush(REDIS_HOST_CLASSIFY_QUEUE, host_code)


if __name__ == '__main__':
    log.info('Start bl-crawler:1')

    try:
        save_status_on_crawl_job(HOST_CODE, STATUS_DOING)
        if HOST_GROUP == 'HG8000':
            crawl_amazon(HOST_CODE, HOST_GROUP)
        else:
            crawl(HOST_CODE, HOST_GROUP)
    except Exception as e:
        log.error('global exception')
        log.error(e)
        log.error(str(e))
        traceback.print_exc(limit=None)
        delete_pod()