async def run_forever():
    #### 实时更新数据
    # ** 不能写成全局变量并放在循环中, 否则会一直记录到同一文件中, 不能实现每日一志
    my_lg = set_logger(log_file_name=MY_SPIDER_LOGS_PATH + '/淘宝/天天特价/' +
                       str(get_shanghai_time())[0:10] + '.txt',
                       console_log_level=INFO,
                       file_log_level=ERROR)

    tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
    try:
        result = list(
            tmp_sql_server.select_taobao_tiantian_tejia_all_goods_id())
    except TypeError:
        my_lg.error('TypeError错误, 导致原因: 数据库连接失败...(可能维护中)')
        result = None
    if result is None:
        pass
    else:
        my_lg.info('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------')
        my_lg.info(str(result))
        my_lg.info('--------------------------------------------------------')

        my_lg.info('即将开始实时更新数据, 请耐心等待...'.center(100, '#'))
        index = 1
        # tmp_taobao_tiantiantejia = TaoBaoTianTianTeJia(logger=my_lg)
        for item in result:  # 实时更新数据
            if index % 50 == 0:
                my_lg.info('正在重置,并与数据库建立新连接中...')
                # try: del tmp_sql_server
                # except: pass
                # gc.collect()
                tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
                my_lg.info('与数据库的新连接成功建立...')

            if tmp_sql_server.is_connect_success:
                tejia_end_time = item[2]
                # my_lg.info(str(tejia_end_time))

                if item[1] == 1:  # 原先下架的商品,扫描到不处理
                    # tmp_sql_server.delete_taobao_tiantiantejia_expired_goods_id(goods_id=item[0])
                    # my_lg.info('该商品goods_id[{0}]已售完, 删除成功!'.format(item[0]))
                    my_lg.info(
                        '&&&&&& 该商品({0})原先状态为is_delete=1, 不进行实际删除操作! 索引为({1})'.
                        format(item[0], str(index)))
                    index += 1
                    pass

                elif tejia_end_time < datetime.datetime.now():
                    # 过期的不删除, 降为更新为常规爆款促销商品
                    index = await update_expired_goods_to_normal_goods(
                        goods_id=item[0],
                        index=index,
                        tmp_sql_server=tmp_sql_server,
                        logger=my_lg)
                    pass

                else:
                    # 下面为天天特价商品信息更新
                    '''
                    ** 由于天天特价不会提前下架商品,就不对应更新特价时间段
                    '''
                    # # 先检查该商品在对应的子分类中是否已经被提前下架, 并获取到该商品的上下架时间
                    # if index % 6 == 0:
                    #     try: del tmp_taobao_tiantiantejia
                    #     except: pass
                    #     gc.collect()
                    #     tmp_taobao_tiantiantejia = TaoBaoTianTianTeJia(logger=my_lg)
                    #
                    # tmp_body = await tmp_taobao_tiantiantejia.get_one_api_body(current_page=item[4], category=item[3])
                    # if tmp_body == '':
                    #     msg = '获取到的tmp_body为空str! 出错category为: ' + item[3]
                    #     my_lg.error(msg)
                    #     continue
                    #
                    # try:
                    #     tmp_body = re.compile(r'\((.*?)\)').findall(tmp_body)[0]
                    # except IndexError:
                    #     msg = 're筛选body时出错, 请检查! 出错category为: ' + item[3]
                    #     my_lg.error(msg)
                    #     continue
                    # tmp_sort_data = await tmp_taobao_tiantiantejia.get_sort_data_list(body=tmp_body)
                    # if tmp_sort_data == 'no items':
                    #     my_lg.info('该api接口获取到的item_list为no items!请检查')
                    #     break
                    # tejia_goods_list = await tmp_taobao_tiantiantejia.get_tiantiantejia_goods_list(data=tmp_sort_data)
                    # # my_lg.info(str(tejia_goods_list))
                    # await asyncio.sleep(.45)
                    # # my_lg.info('111')
                    '''
                    研究发现已经上架的天天特价商品不会再被官方提前下架,所以此处什么都不做,跳过
                    '''
                    # if is_in_child_sort(tejia_goods_list, goods_id=item[0]) is False:     # 表示被官方提前下架
                    #     # tmp_sql_server.delete_taobao_tiantiantejia_expired_goods_id(goods_id=item[0])
                    #     # print('该商品goods_id[{0}]已被官方提前下架, 删除成功!'.format(item[0]))
                    #     print('222')
                    #     pass

                    # else:       # 表示商品未被提前下架
                    my_lg.info(
                        '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%s)'
                        % (item[0], str(index)))
                    taobao = TaoBaoLoginAndParse(logger=my_lg)
                    taobao.get_goods_data(item[0])
                    goods_data = taobao.deal_with_data(goods_id=item[0])
                    if goods_data != {}:
                        # tmp_time = await get_this_goods_id_tejia_time(tejia_goods_list, goods_id=item[0])
                        # if tmp_time != []:
                        #     begin_time, end_time = tmp_time
                        #
                        #     goods_data['goods_id'] = item[0]
                        #     goods_data['schedule'] = [{
                        #         'begin_time': begin_time,
                        #         'end_time': end_time,
                        #     }]
                        #     goods_data['tejia_begin_time'], goods_data['tejia_end_time'] = await tmp_taobao_tiantiantejia.get_tejia_begin_time_and_tejia_end_time(schedule=goods_data.get('schedule', [])[0])
                        #     await taobao.update_taobao_tiantiantejia_table(data=goods_data, pipeline=tmp_sql_server)
                        # else:
                        #     my_lg.info('该goods_id不在该api接口的商品中!!')
                        #     pass

                        goods_data['goods_id'] = item[0]
                        '''不专门更新上下架时间段'''
                        # goods_data['schedule'] = [{
                        #     'begin_time': begin_time,
                        #     'end_time': end_time,
                        # }]
                        # goods_data['tejia_begin_time'], goods_data['tejia_end_time'] = await tmp_taobao_tiantiantejia.get_tejia_begin_time_and_tejia_end_time(schedule=goods_data.get('schedule', [])[0])
                        await taobao.update_taobao_tiantiantejia_table(
                            data=goods_data, pipeline=tmp_sql_server)

                    else:
                        await asyncio.sleep(4)  # 否则休息4秒
                        pass
                    await asyncio.sleep(TAOBAO_REAL_TIMES_SLEEP_TIME)
                    index += 1
                    gc.collect()

            else:  # 表示返回的data值为空值
                my_lg.error('数据库连接失败,数据库可能关闭或者维护中')
                pass
            gc.collect()
        my_lg.info('全部数据更新完毕'.center(100, '#'))  # sleep(60*60)
    if get_shanghai_time().hour == 0:  # 0点以后不更新
        sleep(60 * 60 * 5.5)
    else:
        sleep(5)
    gc.collect()

    return True
示例#2
0
    async def deal_with_all_goods_id(self):
        '''
        获取每个详细分类的商品信息
        :param sort_data: 所有分类的商品信息(包括商品id跟特价开始时间跟结束时间)
        :return: None
        '''
        sort_data = await self.get_all_goods_list()
        my_pipeline = SqlServerMyPageInfoSaveItemPipeline()
        # my_pipeline = SqlPools()
        index = 1
        if my_pipeline.is_connect_success:
            # 普通sql_server连接(超过3000无返回结果集)
            self.my_lg.info('正在获取天天特价db原有goods_id, 请耐心等待...')
            db_ = list(my_pipeline.select_taobao_tiantian_tejia_all_goods_id())
            db_goods_id_list = [[item[0], item[2]] for item in db_]
            self.my_lg.info('获取完毕!!!')
            # print(db_goods_id_list)
            db_all_goods_id = [i[0] for i in db_goods_id_list]

            for item in sort_data:
                tejia_goods_list = await self.get_tiantiantejia_goods_list(
                    data=item.get('data', []))
                self.my_lg.info(tejia_goods_list)

                for tmp_item in tejia_goods_list:
                    if tmp_item.get(
                            'goods_id', ''
                    ) in db_all_goods_id:  # 处理如果该goods_id已经存在于数据库中的情况
                        try:
                            tmp_end_time = [
                                i[1] for i in db_goods_id_list
                                if tmp_item.get('goods_id', '') == i[0]
                            ][0]
                            # print(tmp_end_time)
                        except:
                            tmp_end_time = ''

                        if tmp_end_time != '' and tmp_end_time < datetime.datetime.now(
                        ):
                            '''
                            * 处理由常规商品又转换为天天特价商品 *
                            '''
                            self.my_lg.info('##### 该商品由常规商品又转换为天天特价商品! #####')
                            # 先删除,再重新插入
                            _ = await my_pipeline.delete_taobao_tiantiantejia_expired_goods_id(
                                goods_id=tmp_item.get('goods_id', ''),
                                logger=self.my_lg)
                            if _ is False:
                                continue

                            index = await self.insert_into_table(
                                tmp_item=tmp_item,
                                category=item['category'],
                                current_page=item['current_page'],
                                my_pipeline=my_pipeline,
                                index=index,
                            )
                            await asyncio.sleep(TAOBAO_REAL_TIMES_SLEEP_TIME)

                        else:
                            self.my_lg.info('该goods_id已经存在于数据库中, 此处跳过')
                            pass

                    else:
                        if index % 50 == 0:  # 每50次重连一次,避免单次长连无响应报错
                            self.my_lg.info('正在重置,并与数据库建立新连接中...')
                            my_pipeline = SqlServerMyPageInfoSaveItemPipeline()
                            # my_pipeline = SqlPools()
                            self.my_lg.info('与数据库的新连接成功建立...')

                        if my_pipeline.is_connect_success:
                            index = await self.insert_into_table(
                                tmp_item=tmp_item,
                                category=item['category'],
                                current_page=item['current_page'],
                                my_pipeline=my_pipeline,
                                index=index,
                            )
                            await asyncio.sleep(TAOBAO_REAL_TIMES_SLEEP_TIME)

                        else:
                            self.my_lg.error('数据库连接失败!')
                            pass

        else:
            self.my_lg.error('数据库连接失败!')
            pass
        gc.collect()

        return True