Пример #1
0
def xp1024_search_job():
    print(time.strftime("%d %H:%M:%S", time.localtime(time.time())),"xp1024_search_job 开始----")
    for i in range(1, 10):
        http=xp1024Http.xp1024_list_crawer()
        http.url=xp_base_url+"/pw/thread.php?fid=5&page=" + str(i)
        http.pub_type="亚洲无码"
        BaseHttpGet.pushHttpGet(http)
    for i in range(1, 10):
        http=xp1024Http.xp1024_list_crawer()
        http.url=xp_base_url+"/pw/thread.php?fid=22&page=" + str(i)
        http.pub_type="日本骑兵"
        BaseHttpGet.pushHttpGet(http)
    for i in range(1, 10):
        http=xp1024Http.xp1024_list_crawer()
        http.url=xp_base_url+"/pw/thread.php?fid=7&page=" + str(i)
        http.pub_type="歐美新片"
        BaseHttpGet.pushHttpGet(http)

    #执行多线程处理
    # 开启5个线程进行处理

    tpool = MyThreadPool.MyThreadPool(2)
    for i in range(10000):
        if BaseHttpGet.getHttpGetPoolCount(xp1024Http.xp1024_list_crawer.__name__)==0:
            break
        tpool.callInThread(do_http, xp1024Http.xp1024_list_crawer.__name__)
    pass

    for i in range(10000):
        if BaseHttpGet.getHttpGetPoolCount(xp1024Http.xp1024_info_crawer.__name__) == 0:
            break
        tpool.callInThread(do_http,  xp1024Http.xp1024_info_crawer.__name__)
    pass
    tpool.wait()
Пример #2
0
def update_prod_item_job():
    print(time.strftime("%d %H:%M:%S", time.localtime(time.time())),
          "do_update_shop_create_time 开始----")
    job = sched.get_job(job_id="update_prod_item_job")
    next = int(job.next_run_time.strftime('%Y%m%d%H%M%S'))
    clasName = tbHttp.TBProdItemCrawer.__name__
    count = BaseHttpGet.getHttpGetPoolCount(clasName)
    # 开启线程进行处理
    tpool = MyThreadPool.MyThreadPool(10)
    for i in range(2000):
        now = int(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
        if next - now < 7:
            print(
                time.strftime("%d %H:%M:%S", time.localtime(time.time())), i,
                "update_shop_create_time_job 结束--------------------------------------------------------"
            )
            return
        tpool.callInThread(do_http, clasName)
    print(time.strftime("%d %H:%M:%S", time.localtime(time.time())),
          "do_update_shop_create_time 提前结束----")
    pass
    #如果队列中的元素为空,则加入一批到队列中
    list = models.TTbShopProd.objects.filter(shopid=None)[0:5000]
    for p in list:
        http = tbHttp.TBProdItemCrawer()
        http.product_id = p.product_id
        http.uid = p.uid
        BaseHttpGet.pushHttpGet(http)
    pass
    print(time.strftime("%d %H:%M:%S", time.localtime(time.time())),
          "do_update_shop_create_time 提前结束----")
    pass
Пример #3
0
def prod_search_job():
    print(time.strftime("%d %H:%M:%S", time.localtime(time.time())),
          "prod_search_job 开始----")
    job = sched.get_job(job_id="prod_search_job")
    next = int(job.next_run_time.strftime('%Y%m%d%H%M%S'))
    clasName = tbHttp.TBProdSearchCrawer.__name__
    count = BaseHttpGet.getHttpGetPoolCount(clasName)
    # 如果队列中的元素为空,则加入一批到队列中
    if count < 10:
        pass
    #开启40个线程进行处理
    tpool = MyThreadPool.MyThreadPool(40)
    for i in range(10000):
        now = int(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
        if next - now < 3:
            print(
                time.strftime("%d %H:%M:%S", time.localtime(time.time())), i,
                "prod_search_job 结束--------------------------------------------------------"
            )
            return
        tpool.callInThread(do_http, clasName)
    pass
    #如果要提前结束,则放入一批新的查询
    qlist = tbDao.random_prod_name()
    city = chinaCity.getFristCity()
    for q in qlist:
        if (tbpool.ProdQuerykeyExist(q)):
            continue
        prod = tbHttp.TBProdSearchCrawer()
        prod.pageno = 1
        prod.q = q
        prod.city = city
        BaseHttpGet.pushHttpGet(prod)
    print(time.strftime("%d %H:%M:%S", time.localtime(time.time())),
          "prod_search_job 提前结束----")
Пример #4
0
def update_shop_create_time_job():
    print(time.strftime("%d %H:%M:%S", time.localtime(time.time())),
          "do_update_shop_create_time 开始----")
    job = sched.get_job(job_id="update_shop_create_time_job")
    next = int(job.next_run_time.strftime('%Y%m%d%H%M%S'))
    clasName = tbHttp.TBShopCreateTimeCrawer.__name__
    count = BaseHttpGet.getHttpGetPoolCount(clasName)

    #如果队列中的元素为空,则加入一批到队列中
    if count == 0:
        list = models.TTbShop.objects.filter(shop_createtime=None)[0:5000]
        for shop in list:
            http = tbHttp.TBShopCreateTimeCrawer()
            http.shopid = shop.shopid
            http.isProxy = True
            BaseHttpGet.pushHttpGet(http)
        pass
    #开启线程进行处理
    tpool = MyThreadPool.MyThreadPool(5)
    for i in range(10000):
        now = int(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
        if next - now < 10:
            print(
                time.strftime("%d %H:%M:%S", time.localtime(time.time())), i,
                "update_shop_create_time_job 结束--------------------------------------------------------"
            )
            return
        tpool.callInThread(do_http, clasName)
    print(time.strftime("%d %H:%M:%S", time.localtime(time.time())),
          "do_update_shop_create_time 提前结束----")
    pass