async def _get_new_ali_obj(self, index) -> None: if index % 10 == 0: try: del self.zhe_800 except: try: del self.zhe_800 except: pass collect() self.zhe_800 = Zhe800Parse()
async def _one_update(self, **kwargs) -> bool: ''' 未下架的更新 :return: ''' miaosha_goods_list = kwargs.get('miaosha_goods_list') goods_id = kwargs.get('goods_id') zhe_800_miaosha = Zhe800Parse() res = False for item_1 in miaosha_goods_list: if item_1.get('zid', '') == goods_id: zhe_800_miaosha.get_goods_data(goods_id=goods_id) goods_data = zhe_800_miaosha.deal_with_data() if goods_data == {}: # 返回的data为空则跳过 break else: # 否则就解析并且插入 goods_data['stock_info'] = item_1.get('stock_info') goods_data['goods_id'] = str(item_1.get('zid')) if item_1.get('stock_info').get('activity_stock') > 0: # self.lg.info(item_1.get('price')) # self.lg.info(item_1.get('taobao_price')) goods_data['price'] = item_1.get('price') goods_data['taobao_price'] = item_1.get('taobao_price') else: self.lg.info('该商品参与活动的对应库存为0') res = _handle_goods_shelves_in_auto_goods_table( goods_id=goods_id, logger=self.lg, update_sql_str=z8_update_str_6, sql_cli=self.sql_cli, ) break goods_data['sub_title'] = item_1.get('sub_title') goods_data['miaosha_time'] = item_1.get('miaosha_time') goods_data['miaosha_begin_time'], goods_data[ 'miaosha_end_time'] = get_miaosha_begin_time_and_miaosha_end_time( miaosha_time=item_1.get('miaosha_time')) if goods_data.get('is_delete', 0) == 1: self.lg.info('该商品[{0}]已售罄...'.format(goods_id)) res = zhe_800_miaosha.to_update_zhe_800_xianshimiaosha_table( data=goods_data, pipeline=self.sql_cli) break else: pass collect() return res
async def _update_db(self): while True: self.lg = await self._get_new_logger(logger_name=get_uuid1()) result = await self._get_db_old_data() if result is None: pass else: self.goods_index = 1 tasks_params_list = TasksParamsListObj( tasks_params_list=result, step=self.concurrency) self.zhe_800 = Zhe800Parse() index = 1 while True: try: slice_params_list = tasks_params_list.__next__() # self.lg.info(str(slice_params_list)) except AssertionError: # 全部提取完毕, 正常退出 break tasks = [] for item in slice_params_list: db_goods_info_obj = Z8DbGoodsInfoObj(item=item, logger=self.lg) self.lg.info('创建 task goods_id: {}'.format( db_goods_info_obj.goods_id)) tasks.append( self.loop.create_task( self._update_one_goods_info( db_goods_info_obj=db_goods_info_obj, index=index))) index += 1 await _get_async_task_result(tasks=tasks, logger=self.lg) try: del tasks except: pass self.lg.info('全部数据更新完毕'.center(100, '#')) if get_shanghai_time().hour == 0: # 0点以后不更新 await async_sleep(60 * 60 * 5.5) else: await async_sleep(10) try: del self.zhe_800 except: pass collect()
def _update_old_goods_info(self, tmp_sql_server, result): ''' 更新old goods info :param result: :return: ''' index = 1 for item in result: # 实时更新数据 miaosha_begin_time = json.loads(item[1]).get('miaosha_begin_time') miaosha_begin_time = int( str( time.mktime( time.strptime(miaosha_begin_time, '%Y-%m-%d %H:%M:%S')))[0:10]) # print(miaosha_begin_time) data = {} # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放 zhe_800_miaosha = Zhe800Parse() if index % 50 == 0: # 每50次重连一次,避免单次长连无响应报错 print('正在重置,并与数据库建立新连接中...') tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() print('与数据库的新连接成功建立...') if tmp_sql_server.is_connect_success: if self.is_recent_time(miaosha_begin_time) == 0: tmp_sql_server._delete_table(sql_str=self.delete_sql_str, params=(item[0])) print( '过期的goods_id为(%s)' % item[0], ', 限时秒杀开始时间为(%s), 删除成功!' % json.loads(item[1]).get('miaosha_begin_time')) elif self.is_recent_time(miaosha_begin_time) == 2: # break # 跳出循环 pass # 此处应该是pass,而不是break,因为数据库传回的goods_id不都是按照顺序的 else: # 返回1,表示在待更新区间内 print( '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)' % (item[0], index)) data['goods_id'] = item[0] try: tmp_data = self.zhe_800_spike._get_one_session_id_data( base_session_id=str(item[2])) except Exception as e: print(e) continue if tmp_data.get('data', {}).get('blocks', []) == []: # session_id不存在 print('该session_id不存在,此处跳过') pass else: tmp_data = [ item_s.get('deal', {}) for item_s in tmp_data.get( 'data', {}).get('blocks', []) ] if tmp_data != []: # 否则说明里面有数据 miaosha_goods_list = self.get_miaoshao_goods_info_list( data=tmp_data) # pprint(miaosha_goods_list) # 该session_id中现有的所有zid的list miaosha_goods_all_goods_id = [ i.get('zid') for i in miaosha_goods_list ] if item[0] not in miaosha_goods_all_goods_id: # 内部已经下架的 print('该商品已被下架限时秒杀活动,此处将其删除') tmp_sql_server._delete_table( sql_str=self.delete_sql_str, params=(item[0])) print('下架的goods_id为(%s)' % item[0], ', 删除成功!') pass else: # 未下架的 for item_1 in miaosha_goods_list: if item_1.get('zid', '') == item[0]: zhe_800_miaosha.get_goods_data( goods_id=item[0]) goods_data = zhe_800_miaosha.deal_with_data( ) if goods_data == {}: # 返回的data为空则跳过 pass else: # 否则就解析并且插入 goods_data[ 'stock_info'] = item_1.get( 'stock_info') goods_data['goods_id'] = str( item_1.get('zid')) # goods_data['username'] = '******' if item_1.get('stock_info').get( 'activity_stock') > 0: goods_data[ 'price'] = item_1.get( 'price') goods_data[ 'taobao_price'] = item_1.get( 'taobao_price') else: pass goods_data[ 'sub_title'] = item_1.get( 'sub_title') goods_data[ 'miaosha_time'] = item_1.get( 'miaosha_time') goods_data[ 'miaosha_begin_time'], goods_data[ 'miaosha_end_time'] = get_miaosha_begin_time_and_miaosha_end_time( miaosha_time=item_1. get('miaosha_time')) # print(goods_data['stock_info']) # print(goods_data['miaosha_time']) zhe_800_miaosha.to_update_zhe_800_xianshimiaosha_table( data=goods_data, pipeline=tmp_sql_server) else: pass else: # 说明这个sessionid没有数据, 就删除对应这个sessionid的限时秒杀商品 print('该sessionid没有相关key为jsons的数据') # return {} tmp_sql_server._delete_table( sql_str=self.delete_sql_str, params=(item[0])) print( '过期的goods_id为(%s)' % item[0], ', 限时秒杀开始时间为(%s), 删除成功!' % json.loads(item[1]).get('miaosha_begin_time')) pass else: # 表示返回的data值为空值 print('数据库连接失败,数据库可能关闭或者维护中') pass index += 1 # try: # del tmall # except: # pass # sleep(.8) gc.collect() print('全部数据更新完毕'.center(100, '#')) # sleep(60*60) gc.collect() return
def get_spike_hour_goods_info(self): ''' 模拟构造得到data的url,得到近期所有的限时秒杀商品信息 :return: ''' base_session_id = BASE_SESSION_ID while base_session_id < MAX_SESSION_ID: print('待抓取的session_id为: ', base_session_id) data = self._get_one_session_id_data( base_session_id=base_session_id) sleep(.3) if data.get('data', {}).get('blocks', []) == []: # session_id不存在 pass else: # 否则session_id存在 try: _ = str( data.get('data', {}).get('blocks', [])[0].get('deal', {}).get('begin_time', ''))[:10] if _ != '': pass elif data.get('data', {}).get('blocks', [])[0].get( 'showcase', {}) != {}: # 未来时间 print('*** 未来时间 ***') # pprint(data.get('data', {})) _ = str( data.get('data', {}).get('blocks', [])[1].get( 'deal', {}).get('begin_time', ''))[:10] else: raise Exception begin_times_timestamp = int( _) # 将如 "2017-09-28 10:00:00"的时间字符串转化为时间戳,然后再将时间戳取整 except Exception as e: print('遇到严重错误: ', e) base_session_id += 2 continue print('秒杀时间为: ', timestamp_to_regulartime(begin_times_timestamp)) if self.is_recent_time( timestamp=begin_times_timestamp): # 说明秒杀日期合法 try: data = [ item_s.get('deal', {}) for item_s in data.get( 'data', {}).get('blocks', []) ] except Exception as e: print('遇到严重错误: ', e) base_session_id += 2 continue # pprint(data) if data != []: # 否则说明里面有数据 miaosha_goods_list = self.get_miaoshao_goods_info_list( data=data) # pprint(miaosha_goods_list) zhe_800 = Zhe800Parse() my_pipeline = SqlServerMyPageInfoSaveItemPipeline() if my_pipeline.is_connect_success: sql_str = 'select goods_id, miaosha_time, session_id from dbo.zhe_800_xianshimiaosha where site_id=14' db_goods_id_list = [ item[0] for item in list( my_pipeline._select_table(sql_str=sql_str)) ] for item in miaosha_goods_list: if item.get('zid', '') in db_goods_id_list: print('该goods_id已经存在于数据库中, 此处跳过') pass else: tmp_url = 'https://shop.zhe800.com/products/' + str( item.get('zid', '')) goods_id = zhe_800.get_goods_id_from_url( tmp_url) zhe_800.get_goods_data(goods_id=goods_id) goods_data = zhe_800.deal_with_data() if goods_data == {}: # 返回的data为空则跳过 pass else: # 否则就解析并且插入 goods_data['stock_info'] = item.get( 'stock_info') goods_data['goods_id'] = str( item.get('zid')) goods_data['spider_url'] = tmp_url goods_data['username'] = '******' goods_data['price'] = item.get('price') goods_data['taobao_price'] = item.get( 'taobao_price') goods_data['sub_title'] = item.get( 'sub_title') # goods_data['is_baoyou'] = item.get('is_baoyou') goods_data['miaosha_time'] = item.get( 'miaosha_time') goods_data[ 'miaosha_begin_time'], goods_data[ 'miaosha_end_time'] = get_miaosha_begin_time_and_miaosha_end_time( miaosha_time=item.get( 'miaosha_time')) goods_data['session_id'] = str( base_session_id) # print(goods_data['miaosha_time']) # print(goods_data) zhe_800.insert_into_zhe_800_xianshimiaosha_table( data=goods_data, pipeline=my_pipeline) sleep(ZHE_800_SPIKE_SLEEP_TIME) # 放慢速度 # sleep(2) else: pass try: del zhe_800 except: pass gc.collect() else: # 说明这个sessionid没有数据 print('该sessionid没有相关key为jsons的数据') # return {} pass else: pass base_session_id += 2
def get_spike_hour_goods_info(self): ''' 模拟构造得到data的url,得到近期所有的限时秒杀商品信息 :return: ''' base_session_id = BASE_SESSION_ID while base_session_id < MAX_SESSION_ID: print('待抓取的session_id为: ', base_session_id) tmp_url = 'https://zapi.zhe800.com/zhe800_n_api/xsq/get?sessionId={0}&page=1&per_page=1000'.format( str(base_session_id), ) body = self.get_url_body(url=tmp_url) body_1 = re.compile(r'<pre.*?>(.*)</pre>').findall(body) if body_1 != []: data = body_1[0] data = json.loads(data) # pprint(data) if data.get('status') == 0: # session_id不存在 print('该session_id不存在,此处跳过') pass else: # 否则session_id存在 begin_times = data.get('begin_times')[0] print('秒杀时间为: ', begin_times) begin_times_timestamp = int( time.mktime( time.strptime(begin_times, '%Y-%m-%d %H:%M:%S')) ) # 将如 "2017-09-28 10:00:00"的时间字符串转化为时间戳,然后再将时间戳取整 if self.is_recent_time( timestamp=begin_times_timestamp): # 说明秒杀日期合法 data = data.get('jsons', []) if data != []: # 否则说明里面有数据 miaosha_goods_list = self.get_miaoshao_goods_info_list( data=data) zhe_800 = Zhe800Parse() my_pipeline = SqlServerMyPageInfoSaveItemPipeline() if my_pipeline.is_connect_success: db_goods_id_list = [ item[0] for item in list( my_pipeline. select_zhe_800_xianshimiaosha_all_goods_id( )) ] for item in miaosha_goods_list: if item.get('zid', '') in db_goods_id_list: print('该goods_id已经存在于数据库中, 此处跳过') pass else: tmp_url = 'https://shop.zhe800.com/products/' + str( item.get('zid', '')) goods_id = zhe_800.get_goods_id_from_url( tmp_url) zhe_800.get_goods_data( goods_id=goods_id) goods_data = zhe_800.deal_with_data() if goods_data == {}: # 返回的data为空则跳过 pass else: # 否则就解析并且插入 goods_data[ 'stock_info'] = item.get( 'stock_info') goods_data['goods_id'] = str( item.get('zid')) goods_data['spider_url'] = tmp_url goods_data[ 'username'] = '******' goods_data['price'] = item.get( 'price') goods_data[ 'taobao_price'] = item.get( 'taobao_price') goods_data['sub_title'] = item.get( 'sub_title') # goods_data['is_baoyou'] = item.get('is_baoyou') goods_data[ 'miaosha_time'] = item.get( 'miaosha_time') goods_data['session_id'] = str( base_session_id) # print(goods_data) zhe_800.insert_into_zhe_800_xianshimiaosha_table( data=goods_data, pipeline=my_pipeline) sleep(ZHE_800_SPIKE_SLEEP_TIME ) # 放慢速度 # sleep(2) else: pass try: del zhe_800 except: pass gc.collect() else: # 说明这个sessionid没有数据 print('该sessionid没有相关key为jsons的数据') # return {} pass else: pass else: print('获取到的data为空!') # return {} pass base_session_id += 2
def get_spike_hour_goods_info(self): ''' 模拟构造得到data的url,得到近期所有的限时秒杀商品信息 :return: ''' base_session_id = BASE_SESSION_ID while base_session_id < MAX_SESSION_ID: print('待抓取的session_id为: ', base_session_id) data = self._get_one_session_id_data(base_session_id=base_session_id) sleep(.5) if data.get('data', {}).get('blocks', []) == []: # session_id不存在 base_session_id += 2 continue try: begin_times_timestamp = self._get_begin_times_timestamp(data) except Exception as e: print('遇到严重错误: ', e) base_session_id += 2 continue print('秒杀时间为: ', timestamp_to_regulartime(begin_times_timestamp)) is_recent_time = self.is_recent_time(timestamp=begin_times_timestamp) if not is_recent_time: # 说明秒杀日期合法 base_session_id += 2 continue try: data = [item_s.get('deal', {}) for item_s in data.get('data', {}).get('blocks', [])] except Exception as e: print('遇到严重错误: ', e) base_session_id += 2 continue # pprint(data) if data != []: # 否则说明里面有数据 miaosha_goods_list = self.get_miaoshao_goods_info_list(data=data) # pprint(miaosha_goods_list) zhe_800 = Zhe800Parse() my_pipeline = SqlServerMyPageInfoSaveItemPipeline() if my_pipeline.is_connect_success: db_goods_id_list = self._get_db_goods_id_list(my_pipeline) for item in miaosha_goods_list: if item.get('zid', '') in db_goods_id_list: print('该goods_id已经存在于数据库中, 此处跳过') pass else: tmp_url = 'https://shop.zhe800.com/products/' + str(item.get('zid', '')) goods_id = zhe_800.get_goods_id_from_url(tmp_url) zhe_800.get_goods_data(goods_id=goods_id) goods_data = zhe_800.deal_with_data() if goods_data == {}: # 返回的data为空则跳过 pass else: # 否则就解析并且插入 goods_data['stock_info'] = item.get('stock_info') goods_data['goods_id'] = str(item.get('zid')) goods_data['spider_url'] = tmp_url goods_data['username'] = '******' goods_data['price'] = item.get('price') goods_data['taobao_price'] = item.get('taobao_price') goods_data['sub_title'] = item.get('sub_title') # goods_data['is_baoyou'] = item.get('is_baoyou') goods_data['miaosha_time'] = item.get('miaosha_time') goods_data['miaosha_begin_time'], goods_data['miaosha_end_time'] = get_miaosha_begin_time_and_miaosha_end_time(miaosha_time=item.get('miaosha_time')) goods_data['session_id'] = str(base_session_id) # print(goods_data) res = zhe_800.insert_into_zhe_800_xianshimiaosha_table(data=goods_data, pipeline=my_pipeline) if res: if goods_id not in db_goods_id_list: db_goods_id_list.append(goods_id) sleep(ZHE_800_SPIKE_SLEEP_TIME) # 放慢速度 sleep(4) else: pass try: del zhe_800 except: pass gc.collect() else: # 说明这个sessionid没有数据 print('该sessionid没有相关key为jsons的数据') pass base_session_id += 2
def run_forever(): while True: #### 实时更新数据 tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() try: result = list(tmp_sql_server.select_zhe_800_all_goods_id()) except TypeError as e: print('TypeError错误, 原因数据库连接失败...(可能维护中)') result = None if result is None: pass else: print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------') print(result) print('--------------------------------------------------------') print('即将开始实时更新数据, 请耐心等待...'.center(100, '#')) index = 1 for item in result: # 实时更新数据 data = {} # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放 zhe_800 = Zhe800Parse() if index % 50 == 0: # 每50次重连一次,避免单次长连无响应报错 print('正在重置,并与数据库建立新连接中...') # try: # del tmp_sql_server # except: # pass # gc.collect() tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() print('与数据库的新连接成功建立...') if tmp_sql_server.is_connect_success: print( '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)' % (item[0], index)) zhe_800.get_goods_data(goods_id=item[0]) data = zhe_800.deal_with_data() if data != {}: data['goods_id'] = item[0] ''' 设置最后刷新的商品状态上下架时间 ''' # 1.is_delete由0->1 为下架时间down_time 2. is_delete由1->0 为上架时间shelf_time my_shelf_and_down_time = { 'shelf_time': '', 'down_time': '', } if data['is_delete'] != item[1]: if data['is_delete'] == 0 and item[1] == 1: # is_delete由0->1 表示商品状态上架变为下架 my_shelf_and_down_time['down_time'] = str( get_shanghai_time()) else: # is_delete由1->0 表示商品状态下架变为上架 my_shelf_and_down_time['shelf_time'] = str( get_shanghai_time()) else: if item[2] is None or item[ 2] == '{"shelf_time": "", "down_time": ""}' or len( item[2]) == 35: # 35就是那串初始str if data['is_delete'] == 0: # 上架的状态 my_shelf_and_down_time['shelf_time'] = str( get_shanghai_time()) else: # 下架的状态 my_shelf_and_down_time['down_time'] = str( get_shanghai_time()) else: # 否则保存原始值不变 tmp_shelf_and_down_time = item[2] my_shelf_and_down_time = json.loads( tmp_shelf_and_down_time) # 先转换为dict data['my_shelf_and_down_time'] = my_shelf_and_down_time # print(my_shlef_and_down_time) # print('------>>>| 爬取到的数据为: ', data) zhe_800.to_right_and_update_data( data, pipeline=tmp_sql_server) else: # 表示返回的data值为空值 pass else: # 表示返回的data值为空值 print('数据库连接失败,数据库可能关闭或者维护中') pass index += 1 # try: # del tmall # except: # pass gc.collect() # sleep(1) print('全部数据更新完毕'.center(100, '#')) # sleep(60*60) if get_shanghai_time().hour == 0: # 0点以后不更新 sleep(60 * 60 * 5.5) else: sleep(5) # del ali_1688 gc.collect()
def run_forever(self): ''' 这个实时更新的想法是只更新当天前天未来两小时的上架商品的信息,再未来信息价格(全为原价)暂不更新 :return: ''' #### 实时更新数据 tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() try: result = list( tmp_sql_server.select_zhe_800_xianshimiaosha_all_goods_id()) except TypeError as e: print('TypeError错误, 原因数据库连接失败...(可能维护中)') result = None if result is None: pass else: print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------') print(result) print('--------------------------------------------------------') print('即将开始实时更新数据, 请耐心等待...'.center(100, '#')) index = 1 for item in result: # 实时更新数据 miaosha_begin_time = json.loads( item[1]).get('miaosha_begin_time') miaosha_begin_time = int( str( time.mktime( time.strptime(miaosha_begin_time, '%Y-%m-%d %H:%M:%S')))[0:10]) # print(miaosha_begin_time) data = {} # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放 zhe_800_miaosha = Zhe800Parse() if index % 50 == 0: # 每50次重连一次,避免单次长连无响应报错 print('正在重置,并与数据库建立新连接中...') tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() print('与数据库的新连接成功建立...') if tmp_sql_server.is_connect_success: if self.is_recent_time(miaosha_begin_time) == 0: tmp_sql_server.delete_zhe_800_expired_goods_id( goods_id=item[0]) print( '过期的goods_id为(%s)' % item[0], ', 限时秒杀开始时间为(%s), 删除成功!' % json.loads(item[1]).get('miaosha_begin_time')) elif self.is_recent_time(miaosha_begin_time) == 2: # break # 跳出循环 pass # 此处应该是pass,而不是break,因为数据库传回的goods_id不都是按照顺序的 else: # 返回1,表示在待更新区间内 print( '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)' % (item[0], index)) data['goods_id'] = item[0] # print('------>>>| 爬取到的数据为: ', data) tmp_url = 'https://zapi.zhe800.com/zhe800_n_api/xsq/m/session_deals?session_id={0}&page=1&per_page=1000'.format( str(item[2])) body = self.my_phantomjs.use_phantomjs_to_get_url_body( url=tmp_url) body_1 = re.compile(r'<pre.*?>(.*)</pre>').findall( body) if body_1 != []: tmp_data = body_1[0] tmp_data = json.loads(tmp_data) # pprint(tmp_data) if tmp_data.get('data', {}).get('blocks', []) == []: # session_id不存在 print('该session_id不存在,此处跳过') pass else: tmp_data = [ item_s.get('deal', {}) for item_s in tmp_data.get('data', {}).get( 'blocks', []) ] if tmp_data != []: # 否则说明里面有数据 miaosha_goods_list = self.get_miaoshao_goods_info_list( data=tmp_data) # pprint(miaosha_goods_list) # 该session_id中现有的所有zid的list miaosha_goods_all_goods_id = [ i.get('zid') for i in miaosha_goods_list ] if item[0] not in miaosha_goods_all_goods_id: # 内部已经下架的 print('该商品已被下架限时秒杀活动,此处将其删除') tmp_sql_server.delete_zhe_800_expired_goods_id( goods_id=item[0]) print('下架的goods_id为(%s)' % item[0], ', 删除成功!') pass else: # 未下架的 for item_1 in miaosha_goods_list: if item_1.get('zid', '') == item[0]: zhe_800_miaosha.get_goods_data( goods_id=item[0]) goods_data = zhe_800_miaosha.deal_with_data( ) if goods_data == {}: # 返回的data为空则跳过 pass else: # 否则就解析并且插入 goods_data[ 'stock_info'] = item_1.get( 'stock_info') goods_data[ 'goods_id'] = str( item_1.get('zid')) # goods_data['username'] = '******' if item_1.get( 'stock_info' ).get('activity_stock' ) > 0: goods_data[ 'price'] = item_1.get( 'price') goods_data[ 'taobao_price'] = item_1.get( 'taobao_price') else: pass goods_data[ 'sub_title'] = item_1.get( 'sub_title') goods_data[ 'miaosha_time'] = item_1.get( 'miaosha_time') goods_data[ 'miaosha_begin_time'], goods_data[ 'miaosha_end_time'] = self.get_miaosha_begin_time_and_miaosha_end_time( miaosha_time= item_1.get( 'miaosha_time' )) # print(goods_data['stock_info']) # print(goods_data['miaosha_time']) zhe_800_miaosha.to_update_zhe_800_xianshimiaosha_table( data=goods_data, pipeline=tmp_sql_server ) else: pass else: # 说明这个sessionid没有数据, 就删除对应这个sessionid的限时秒杀商品 print('该sessionid没有相关key为jsons的数据') # return {} tmp_sql_server.delete_zhe_800_expired_goods_id( goods_id=item[0]) print( '过期的goods_id为(%s)' % item[0], ', 限时秒杀开始时间为(%s), 删除成功!' % json.loads( item[1]).get('miaosha_begin_time')) pass else: print('获取到的data为空!') # return {} pass else: # 表示返回的data值为空值 print('数据库连接失败,数据库可能关闭或者维护中') pass index += 1 # try: # del tmall # except: # pass # sleep(.8) gc.collect() print('全部数据更新完毕'.center(100, '#')) # sleep(60*60) if get_shanghai_time().hour == 0: # 0点以后不更新 sleep(60 * 60 * 5.5) else: sleep(5) # del ali_1688 gc.collect()
def run_forever(): while True: #### 实时更新数据 tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() sql_str = ''' select GoodsID, IsDelete, Price, TaoBaoPrice, shelf_time, delete_time from dbo.GoodsInfoAutoGet where SiteID=11 and MainGoodsID is not null''' try: result = list(tmp_sql_server._select_table(sql_str=sql_str)) except TypeError: print('TypeError错误, 原因数据库连接失败...(可能维护中)') result = None if result is None: pass else: print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------') print(result) print('--------------------------------------------------------') print('即将开始实时更新数据, 请耐心等待...'.center(100, '#')) index = 1 for item in result: # 实时更新数据 # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放 zhe_800 = Zhe800Parse() if index % 50 == 0: # 每50次重连一次,避免单次长连无响应报错 print('正在重置,并与数据库建立新连接中...') # try: # del tmp_sql_server # except: # pass # gc.collect() tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() print('与数据库的新连接成功建立...') if tmp_sql_server.is_connect_success: print('------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)' % (item[0], index)) zhe_800.get_goods_data(goods_id=item[0]) data = zhe_800.deal_with_data() if data != {}: data['goods_id'] = item[0] data['shelf_time'], data['delete_time'] = get_shelf_time_and_delete_time( tmp_data=data, is_delete=item[1], shelf_time=item[4], delete_time=item[5]) data['_is_price_change'], data['_price_change_info'] = _get_price_change_info( old_price=item[2], old_taobao_price=item[3], new_price=data['price'], new_taobao_price=data['taobao_price'] ) # print('------>>>| 爬取到的数据为: ', data) zhe_800.to_right_and_update_data(data, pipeline=tmp_sql_server) else: # 表示返回的data值为空值 sleep(2) pass else: # 表示返回的data值为空值 print('数据库连接失败,数据库可能关闭或者维护中') pass index += 1 # try: # del tmall # except: # pass gc.collect() sleep(1.5) print('全部数据更新完毕'.center(100, '#')) # sleep(60*60) if get_shanghai_time().hour == 0: # 0点以后不更新 sleep(60*60*5.5) else: sleep(5) # del ali_1688 gc.collect()
def run_forever(): while True: #### 实时更新数据 tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() try: result = list( tmp_sql_server._select_table(sql_str=z8_select_str_3)) except TypeError: print('TypeError错误, 原因数据库连接失败...(可能维护中)') result = None if result is None: pass else: print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------') print(result) print('--------------------------------------------------------') print('即将开始实时更新数据, 请耐心等待...'.center(100, '#')) index = 1 for item in result: # 实时更新数据 # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放 zhe_800 = Zhe800Parse() if index % 50 == 0: # 每50次重连一次,避免单次长连无响应报错 print('正在重置,并与数据库建立新连接中...') tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() print('与数据库的新连接成功建立...') if tmp_sql_server.is_connect_success: print( '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)' % (item[0], index)) zhe_800.get_goods_data(goods_id=item[0]) data = zhe_800.deal_with_data() if data != {}: data['goods_id'] = item[0] data['shelf_time'], data[ 'delete_time'] = get_shelf_time_and_delete_time( tmp_data=data, is_delete=item[1], shelf_time=item[4], delete_time=item[5]) data['_is_price_change'], data[ '_price_change_info'] = _get_price_change_info( old_price=item[2], old_taobao_price=item[3], new_price=data['price'], new_taobao_price=data['taobao_price']) try: old_sku_info = format_price_info_list( price_info_list=json_2_dict(item[6]), site_id=11) except AttributeError: # 处理已被格式化过的 old_sku_info = item[6] data['_is_price_change'], data[ 'sku_info_trans_time'] = get_sku_info_trans_record( old_sku_info=old_sku_info, new_sku_info=format_price_info_list( data['price_info_list'], site_id=11), is_price_change=item[7] if item[7] is not None else 0) zhe_800.to_right_and_update_data( data, pipeline=tmp_sql_server) else: # 表示返回的data值为空值 sleep(2) pass else: # 表示返回的data值为空值 print('数据库连接失败,数据库可能关闭或者维护中') pass index += 1 # try: # del tmall # except: # pass gc.collect() sleep(1.5) print('全部数据更新完毕'.center(100, '#')) # sleep(60*60) if get_shanghai_time().hour == 0: # 0点以后不更新 sleep(60 * 60 * 5.5) else: sleep(5) # del ali_1688 gc.collect()