def run_forever(self): ''' 这个实时更新的想法是只更新当天未来2小时的上架商品的信息,再未来信息价格(全为原价)暂不更新 :return: ''' #### 实时更新数据 tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() try: result = list( tmp_sql_server.select_pinduoduo_xianshimiaosha_all_goods_id()) except TypeError as e: print('TypeError错误, 原因数据库连接失败...(可能维护中)') result = None if result is None: pass else: print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------') print(result) print('--------------------------------------------------------') print('即将开始实时更新数据, 请耐心等待...'.center(100, '#')) index = 1 # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放 pinduoduo_miaosha = PinduoduoParse() all_miaosha_goods_list = self.get_all_miaosha_goods_list() # 其中所有goods_id的list miaosha_goods_all_goods_id = [ i.get('goods_id') for i in all_miaosha_goods_list ] # print(miaosha_goods_all_goods_id) for item in result: # 实时更新数据 # 对于拼多多先拿到该商品的结束时间点 miaosha_end_time = json.loads(item[1]).get('miaosha_end_time') miaosha_end_time = int( str( time.mktime( time.strptime(miaosha_end_time, '%Y-%m-%d %H:%M:%S')))[0:10]) # print(miaosha_end_time) if index % 50 == 0: # 每50次重连一次,避免单次长连无响应报错 print('正在重置,并与数据库建立新连接中...') tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() print('与数据库的新连接成功建立...') if tmp_sql_server.is_connect_success: if self.is_recent_time(miaosha_end_time) == 0: tmp_sql_server.delete_pinduoduo_expired_goods_id( goods_id=item[0]) print( '过期的goods_id为(%s)' % item[0], ', 限时秒杀结束时间为(%s), 删除成功!' % json.loads(item[1]).get('miaosha_end_time')) elif self.is_recent_time(miaosha_end_time) == 2: pass # 此处应该是pass,而不是break,因为数据库传回的goods_id不都是按照顺序的 else: # 返回1,表示在待更新区间内 print( '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)' % (item[0], index)) if item[0] not in miaosha_goods_all_goods_id: # 内部已经下架的 ''' 表示其中没有了该goods_id ''' tmp_sql_server.delete_pinduoduo_expired_goods_id( goods_id=item[0]) print('该商品[goods_id为(%s)]已被下架限时秒杀活动,此处将其删除' % item[0]) pass else: # 未下架的 for item_1 in all_miaosha_goods_list: if item_1.get('goods_id', '') == item[0]: # # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放 # pinduoduo_miaosha = PinduoduoParse() pinduoduo_miaosha.get_goods_data( goods_id=item[0]) goods_data = pinduoduo_miaosha.deal_with_data( ) if goods_data == {}: # 返回的data为空则跳过 # sleep(3) pass else: # 否则就解析并且插入 goods_data['stock_info'] = item_1.get( 'stock_info') goods_data['goods_id'] = item_1.get( 'goods_id') if item_1.get('stock_info').get( 'activity_stock') > 0: goods_data['price'] = item_1.get( 'price') # 秒杀前的原特价 goods_data[ 'taobao_price'] = item_1.get( 'taobao_price') # 秒杀价 else: pass goods_data['sub_title'] = item_1.get( 'sub_title', '') goods_data[ 'miaosha_time'] = item_1.get( 'miaosha_time') goods_data[ 'miaosha_begin_time'], goods_data[ 'miaosha_end_time'] = self.get_miaosha_begin_time_and_miaosha_end_time( miaosha_time=item_1.get( 'miaosha_time')) if item_1.get('stock_info').get( 'activity_stock') <= 1: # 实时秒杀库存小于等于1时就标记为 已售罄 print('该秒杀商品已售罄...') goods_data['is_delete'] = 1 # print(goods_data) pinduoduo_miaosha.to_update_pinduoduo_xianshimiaosha_table( data=goods_data, pipeline=tmp_sql_server) sleep(PINDUODUO_SLEEP_TIME) else: pass index += 1 gc.collect() else: # 表示返回的data值为空值 print('数据库连接失败,数据库可能关闭或者维护中') pass print('全部数据更新完毕'.center(100, '#')) # sleep(60*60) if get_shanghai_time().hour == 0: # 0点以后不更新 sleep(60 * 60 * 5.5) else: sleep(3) # del ali_1688 gc.collect()
def get_spike_hour_goods_info(self): ''' 模拟构造得到data的url,得到近期所有的限时秒杀商品信息 :return: ''' all_miaosha_goods_list = self.get_all_miaosha_goods_list() try: self.driver.quit() except: pass gc.collect() pinduoduo = PinduoduoParse() my_pipeline = SqlServerMyPageInfoSaveItemPipeline() if my_pipeline.is_connect_success: if my_pipeline.select_pinduoduo_xianshimiaosha_all_goods_id( ) is None: db_goods_id_list = [] else: db_goods_id_list = [ item[0] for item in list(my_pipeline. select_pinduoduo_xianshimiaosha_all_goods_id()) ] for item in all_miaosha_goods_list: ''' 注意: 明日8点半抓取到的是页面加载中返回的是空值 ''' if item.get('goods_id') != 'None': # 跳过goods_id为'None' if item.get('goods_id', '') in db_goods_id_list: print('该goods_id已经存在于数据库中, 此处跳过') pass else: tmp_url = 'http://mobile.yangkeduo.com/goods.html?goods_id=' + item.get( 'goods_id') pinduoduo.get_goods_data(goods_id=item.get('goods_id')) goods_data = pinduoduo.deal_with_data() # print(goods_data) if goods_data == {}: # 返回的data为空则跳过 print('得到的goods_data为空值,此处先跳过,下次遍历再进行处理') pass else: # 否则就解析并插入 goods_data['stock_info'] = item.get('stock_info') goods_data['goods_id'] = item.get('goods_id') goods_data['spider_url'] = tmp_url goods_data['username'] = '******' goods_data['price'] = item.get('price') # 秒杀前的原特价 goods_data['taobao_price'] = item.get( 'taobao_price') # 秒杀价 goods_data['sub_title'] = item.get('sub_title', '') goods_data['miaosha_time'] = item.get( 'miaosha_time') if item.get('stock_info').get( 'activity_stock') <= 2: # 实时秒杀库存小于等于2时就标记为 已售罄 print('该秒杀商品已售罄...') goods_data['is_delete'] = 1 pinduoduo.insert_into_pinduoduo_xianshimiaosha_table( data=goods_data, pipeline=my_pipeline) sleep(.9) else: print('该goods_id为"None", 此处跳过') pass sleep(5) else: pass try: del pinduoduo except: pass gc.collect()