def _get_is_delete(self, price_info_list, data, other): ''' 获取is_delete :param price_info_list: :param data: :return: ''' is_delete = 0 all_rest_number = 0 if price_info_list != []: for item in price_info_list: all_rest_number += item.get('rest_number', 0) if all_rest_number == 0: is_delete = 1 # 当官方下架时间< 当前时间戳 则商品已下架 is_delete = 1 if data['sell_time'] != {}: end_time = datetime_to_timestamp( string_to_datetime( data.get('sell_time', {}).get('end_time', ''))) if end_time < datetime_to_timestamp(get_shanghai_time()): self.lg.info('该商品已经过期下架...! 进行逻辑删除 is_delete=1') is_delete = 1 # print(is_delete) if not other.get('sku_info', {}).get('goodsStoreStatus', True): is_delete = 1 return is_delete
def _get_is_delete(self, price_info_list, data, other): is_delete = 0 all_rest_number = 0 if price_info_list != []: for item in price_info_list: all_rest_number += item.get('rest_number', 0) if all_rest_number == 0: is_delete = 1 else: is_delete = 1 # 当官方下架时间< 当前时间戳 则商品已下架 is_delete = 1 if data['sell_time'] != {}: end_time = datetime_to_timestamp( string_to_datetime( data.get('sell_time', {}).get('end_time', ''))) if end_time < datetime_to_timestamp(get_shanghai_time()): self.my_lg.info('该商品已经过期下架...! 进行逻辑删除 is_delete=1') is_delete = 1 # print(is_delete) if other.get('soldOut'): # True or False is_delete = 1 return is_delete
def judge_create_time_is_old(now_time, create_time): ''' 判断商品创建时间是否超过8小时 :param now_time: datetime :param create_time: datetime :return: bool ''' if int( datetime_to_timestamp(now_time) - datetime_to_timestamp(create_time)) < 28800: # 小于8小时 return True else: return False
def turn_one_time() -> dict: cookies = { 'Hm_lpvt_fa0ddec29ac177a2d127cebe209832e3': str(datetime_to_timestamp(get_shanghai_time())), 'Hm_lvt_fa0ddec29ac177a2d127cebe209832e3': '1537161510,1537228200,1537353114,1537411854', # 定值 'wk_': '9umq63s8g6leobk2p285frmp583nhm9t', # 定值 } headers = { 'Host': 'm.riyiwk.com', 'accept': 'application/json, text/javascript, */*; q=0.01', 'origin': 'https://m.riyiwk.com', 'referer': 'https://m.riyiwk.com/lottery.html?check_login=1', 'accept-language': 'zh-cn', 'x-requested-with': 'XMLHttpRequest', 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Mobile/15A5341f/RIYIWK 2.6.0/USER_ID 203793/TOKEN 3a3988e07be98db064a70fc635c0b590', } url = 'https://m.riyiwk.com/lottery/start.html' res = json_2_dict( Requests.get_url_body(method='post', use_proxy=False, url=url, headers=headers, cookies=cookies)) # pprint(res) return res
def get_ordered_dict(data): """ 获取有序字典 :param data: :return: """ # 获取有序字典 ordered_dict = OrderedDict() tmp_ordered_day_list = [{ datetime_to_timestamp(date_parse(key)): key, } for key in data.keys()] # 已被正确排序的日期 ordered_day_list = sorted( tmp_ordered_day_list, key=lambda item: item.keys()) # pprint(ordered_day_list) for item_dict in ordered_day_list: item_dict_value = list(item_dict.values())[0] # print(item_dict_value) for key, value in data.items(): if key == item_dict_value: ordered_dict[key] = value else: continue return ordered_dict
def _get_is_delete(self, data, price_info_list): is_delete = 0 all_rest_number = 0 for item in price_info_list: all_rest_number += item.get('rest_number', 0) if all_rest_number == 0: is_delete = 1 # 当官方下架时间< int(time.time()) 则商品已下架 is_delete = 1 if int(data.get('sell_time', {}).get('end_time', '')) < int(datetime_to_timestamp(get_shanghai_time())): print('该商品已经过期下架...! 进行逻辑删除 is_delete=1') is_delete = 1 return is_delete
def _get_origin_comment_list(self, **kwargs) -> list: ''' 得到加密的接口数据信息 :param kwargs: :return: ''' csrf = kwargs.get('csrf', '') goods_id = kwargs.get('goods_id', '') cookies = kwargs.get('cookies', '') url = 'https://m.1688.com/page/offerRemark.htm' headers = { 'cookie': cookies, 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', 'user-agent': get_random_pc_ua(), 'accept': 'application/json, text/javascript, */*; q=0.01', 'referer': 'https://m.1688.com/page/offerRemark.htm?offerId={}'.format(goods_id), 'authority': 'm.1688.com', 'x-requested-with': 'XMLHttpRequest', } origin_comment_list = [] for i in range(1, self.max_page): __wing_navigate_options = { 'data': { 'bizType': 'trade', 'itemId': int(goods_id), 'offerId': str(goods_id), 'page': i, 'pageSize': 5, # 'receiveUserId': 989036456, 'starLevel': 7 } } params = ( ('_csrf', csrf), ('__wing_navigate_type', 'view'), ('__wing_navigate_url', 'detail:modules/offerRemarkList/view'), ('__wing_navigate_options', dumps(__wing_navigate_options)), ('_', str(datetime_to_timestamp(get_shanghai_time())) + str(get_random_int_number(start_num=100, end_num=999))), ) body = Requests.get_url_body(url=url, headers=headers, params=params, ip_pool_type=self.ip_pool_type) data = json_2_dict(body, encoding='ascii').get('data', {}) # pprint(data) one = data.get('model', []) pprint(one) origin_comment_list += one sleep(.25) return origin_comment_list
def _check_req_timestamp(self, req_timestamp): """ 校验时间戳 @pram req_timestamp str, int: 请求参数中的时间戳(10位) """ if len(str(req_timestamp)) == 10: req_timestamp = int(req_timestamp) self.now_timestamp = datetime_to_timestamp(get_shanghai_time()) if self.now_timestamp - req_timestamp == 28805: # 单独处理相差8 hour的请求(加拿大服务器问题) req_timestamp += 28805 if req_timestamp <= self.now_timestamp and req_timestamp + self._timestamp_expiration >= self.now_timestamp: return True return False
async def is_recent_time(self, timestamp): ''' 判断是否在指定的日期差内 :param timestamp: 时间戳 :return: 0: 已过期恢复原价的 1: 待更新区间内的 2: 未来时间的 ''' time_1 = int(timestamp) time_2 = datetime_to_timestamp(get_shanghai_time()) # 当前的时间戳(上海时间) diff_time = time_1 - time_2 if diff_time < -259200: # (为了后台能同步下架)所以设置为 72个小时, 只需要更新过去48小时和对与当前时间的未来2小时的商品信息 # if diff_time < -172800: # (原先的时间)48个小时, 只需要跟新过去48小时和对与当前时间的未来2小时的商品信息 return 0 # 已过期恢复原价的 else: return 1 # 表示是昨天跟今天的也就是待更新的
def _get_params(self, goods_id): ''' 得到获取sku_info的params :param goods_id: :return: ''' t = str(datetime_to_timestamp(get_shanghai_time())) + str(get_random_int_number(start_num=100, end_num=999)) params = ( ('t', t), ('goodsId', str(goods_id)), # ('provinceCode', '330000'), # ('cityCode', '330100'), # ('districtCode', '330102'), ) return params
def is_recent_time(self, timestamp) -> int: ''' 判断是否在指定的日期差内 :param timestamp: 时间戳 :return: 0: 已过期恢复原价的 1: 待更新区间内的 2: 未来时间的 ''' time_1 = int(timestamp) time_2 = int(datetime_to_timestamp(get_shanghai_time())) # 当前的时间戳 diff_time = time_1 - time_2 if diff_time < -86400: # (为了后台能同步下架)所以设置为 24个小时 # if diff_time < 0: # (原先的时间)结束时间 与当前时间差 <= 0 return 0 # 已过期恢复原价的 elif diff_time > 0: return 1 # 表示是昨天跟今天的也就是待更新的 else: # 表示过期但是处于等待的数据不进行相关先删除操作(等<=24小时时再2删除) return 2
def _get_sell_time(self, data): ''' 得到上下架时间 :param data: :return: ''' try: left_time = data.get('gradientPrice', {}).get('leftTime', 0) except AttributeError: # gradientPrice的值可能为'' return {} if left_time == 0: return {} now_time_timestamp = datetime_to_timestamp(get_shanghai_time()) sell_time = { 'begin_time': timestamp_to_regulartime(now_time_timestamp), 'end_time': timestamp_to_regulartime(now_time_timestamp + left_time), } return sell_time
def _get_one_page_articles(self, page_num) -> list: ''' 得到一页新闻 :param page_num: :return: ''' headers = { 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'User-Agent': get_random_pc_ua(), 'Accept': '*/*', 'Referer': 'https://36kr.com/', 'Connection': 'keep-alive', } params = ( ('per_page', '20'), ('page', str(page_num)), ('_', str(datetime_to_timestamp(get_shanghai_time())) + str(get_random_int_number(100, 999))), ) url = 'https://36kr.com/api/search-column/mainsite' data = json_2_dict( Requests.get_url_body(url=url, headers=headers, params=params, cookies=None)).get('data', {}).get('items', []) # pprint(data) if data == []: return [] [ item.update({'user_info': json_2_dict(item.get('user_info', ''))}) for item in data ] # pprint(data) return data
def _set_params(self, goods_id): ''' 设置params :param goods_id: :return: ''' params_data_1 = {'id': goods_id} params_data_2 = { 'exParams': json.dumps(params_data_1), # 每层里面的字典都要先转换成json 'itemNumId': goods_id } # self.lg.info(str(params_data_2)) ### * 注意这是正确的url地址: right_url = 'https://acs.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?appKey=12574478&t=1508886442888&api=mtop.taobao.detail.getdetail&v=6.0&ttid=2016%40taobao_h5_2.0.0&isSec=0&ecode=0&AntiFlood=true&AntiCreep=true&H5Request=true&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22exParams%22%3A%22%7B%5C%22id%5C%22%3A%5C%22546756179626%5C%22%7D%22%2C%22itemNumId%22%3A%22546756179626%22%7D' # right_url = 'https://acs.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?appKey=12574478&t=1508886442888&api=mtop.taobao.detail.getdetail&v=6.0&ttid=2016%40taobao_h5_2.0.0&isSec=0&ecode=0&AntiFlood=true&AntiCreep=true&H5Request=true&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22exParams%22%3A%22%7B%5C%22id%5C%22%3A%5C%22546756179626%5C%22%7D%22%2C%22itemNumId%22%3A%22546756179626%22%7D' # right_url = 'https://acs.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?appKey=12574478&t=1508857184835&api=mtop.taobao.detail.getdetail&v=6.0&ttid=2016%40taobao_h5_2.0.0&isSec=0&ecode=0&AntiFlood=true&AntiCreep=true&H5Request=true&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22exParams%22%3A%22%7B%5C%22id%5C%22%3A%5C%2241439519931%5C%22%7D%22%2C%22itemNumId%22%3A%2241439519931%22%7D' # self.lg.info(right_url) params = ( ('jsv', '2.4.8'), ('appKey', '12574478'), ('t', str(datetime_to_timestamp(get_shanghai_time())) + str(randint(100, 999))), # ('sign', 'b7cd843a2b40b5238d3b53faa3bb605b'), ('api', 'mtop.taobao.detail.getdetail'), ('v', '6.0'), ('ttid', '2016@taobao_h5_2.0.0'), ('isSec', '0'), ('ecode', '0'), ('AntiFlood', 'true'), ('AntiCreep', 'true'), ('H5Request', 'true'), ('type', 'jsonp'), ('dataType', 'jsonp'), ('callback', 'mtopjsonp1'), ('data', json.dumps(params_data_2)), # 每层里面的字典都要先转换成json ) return params
def _set_params(self, goods_id): ''' 设置params :param goods_id: :return: ''' params = ( ('jsv', '2.4.8'), ('appKey', '12574478'), ('t', str(datetime_to_timestamp(get_shanghai_time())) + str(randint(100, 999))), # ('sign', 'de765f1adf3bdc4a07687d45fd10a6b3'), ('api', 'mtop.taobao.detail.getdetail'), ('v', '6.0'), ('dataType', 'jsonp'), ('ttid', '2017@taobao_h5_6.6.0'), ('AntiCreep', 'true'), ('type', 'jsonp'), ('callback', 'mtopjsonp3'), ('data', json.dumps({'itemNumId': goods_id})), ) return params
@connect : [email protected] ''' from requests import session from requests_toolbelt import MultipartEncoder from fzutils.spider.fz_requests import Requests from fzutils.time_utils import ( get_shanghai_time, datetime_to_timestamp, ) from fzutils.common_utils import get_random_int_number cookies = { 'yd_cookie': '2369844f-fc3f-42742d88d5deabc0ec65d866d61526e32347', } _t = str(datetime_to_timestamp(get_shanghai_time())) + str( get_random_int_number(100, 999)) data = MultipartEncoder( fields={ 'PageIndex': '1', 'PageSize': '20', # 'TimesTamp': '1547813627151', 'TimesTamp': _t, 'UserId': '259146', 'sign': '42531e765ce3055f25f369db3505db8f' }) headers = { 'Host': 'api.yiuxiu.com', 'accept': 'application/json', # 'content-type': 'multipart/form-data; boundary=Boundary+C98168C62FD125E1', 'content-type': data.content_type,
def run_forever(self): ''' 实时更新数据 :return: ''' result = self._get_db_old_data() if result is None: sleep_time = 20 print('获取db数据失败, 休眠{}s ...'.format(sleep_time)) sleep(sleep_time) return None index = 1 for item in result: # 实时更新数据 goods_id = item[0] pid = item[2] # 2020-04-12 00:00:00 pintuan_end_time = json_2_dict(item[1]).get('end_time') pintuan_end_time = datetime_to_timestamp( string_to_datetime(pintuan_end_time)) # print(pintuan_end_time) data = {} self.sql_cli = _block_get_new_db_conn(db_obj=self.sql_cli, index=index, remainder=50) if self.sql_cli.is_connect_success: is_recent_time = self.is_recent_time(pintuan_end_time) if is_recent_time == 0: # 已恢复原价的 _handle_goods_shelves_in_auto_goods_table( goods_id=goods_id, update_sql_str=mia_update_str_7, sql_cli=self.sql_cli) print('该goods拼团开始时间为({})'.format( json.loads(item[1]).get('begin_time'))) sleep(.4) elif is_recent_time == 2: # 表示过期但是处于等待的数据不进行相关先删除操作(等<=24小时时再2删除) pass else: # 返回1,表示在待更新区间内 print( '------>>>| 正在更新的goods_id为({}) | --------->>>@ 索引值为({})' .format(goods_id, index)) data['goods_id'] = goods_id try: data_list = get_mia_pintuan_one_page_api_goods_info( page_num=pid) except ResponseBodyIsNullStrException: index += 1 sleep(.4) continue # TODO 会导致在售商品被异常下架, 不进行判断, 一律进行更新 # try: # assert data_list != [], 'data_list不为空list!' # except AssertionError as e: # print(e) # _handle_goods_shelves_in_auto_goods_table( # goods_id=goods_id, # update_sql_str=mia_update_str_7, # sql_cli=self.sql_cli) # sleep(.4) # index += 1 # continue pintuan_goods_all_goods_id = [ item_1.get('goods_id', '') for item_1 in data_list ] # print(pintuan_goods_all_goods_id) ''' 蜜芽拼团不对内部下架的进行操作,一律都更新未过期商品 (根据pid来进行更新多次研究发现出现商品还在拼团,误删的情况很普遍) ''' mia_pt = MiaPintuanParse(is_real_times_update_call=True) if goods_id not in pintuan_goods_all_goods_id: # 内部已经下架的 # 一律更新 try: goods_data = self._get_mia_pt_one_goods_info( mia_pt_obj=mia_pt, goods_id=goods_id, ) except AssertionError: # 返回的data为空则跳过 index += 1 continue # pprint(goods_data) mia_pt.update_mia_pintuan_table(data=goods_data, pipeline=self.sql_cli) sleep(MIA_SPIKE_SLEEP_TIME) # 放慢速度 else: # 未下架的 for item_2 in data_list: if item_2.get('goods_id', '') == goods_id: sub_title = item_2.get('sub_title', '') try: goods_data = self._get_mia_pt_one_goods_info( mia_pt_obj=mia_pt, goods_id=goods_id, sub_title=sub_title, ) except AssertionError: # 返回的data为空则跳过 continue # pprint(goods_data) mia_pt.update_mia_pintuan_table( data=goods_data, pipeline=self.sql_cli) sleep(MIA_SPIKE_SLEEP_TIME) # 放慢速度 else: pass try: del mia_pt except: pass else: # 表示返回的data值为空值 print('数据库连接失败,数据库可能关闭或者维护中') pass index += 1 collect() print('全部数据更新完毕'.center(100, '#')) # sleep(60*60) if get_shanghai_time().hour == 0: # 0点以后不更新 sleep(60 * 60 * 5.5) else: sleep(10 * 60) collect()
sys.path.append('..') import hashlib import time import requests from fzutils.time_utils import ( get_shanghai_time, datetime_to_timestamp, timestamp_to_regulartime, ) md5 = lambda pwd: hashlib.md5(pwd).hexdigest() # 国际化 get_current_timestamp = lambda: datetime_to_timestamp(get_shanghai_time()) class RequestClient(object): """ 接口签名客户端示例 """ def __init__(self): self._version = "v1" self._access_key_id = "yiuxiu" self._access_key_secret = "yiuxiu6688" def _sign(self, parameters): """ 签名 @param parameters dict: uri请求参数(包含除signature外的公共参数) """ if "sign" in parameters: parameters.pop("sign")
def run_forever(): while True: #### 实时更新数据 sql_cli = SqlServerMyPageInfoSaveItemPipeline() try: sql_cli._delete_table(sql_str=jp_delete_str_1) result = list(sql_cli._select_table(sql_str=jp_select_str_2)) except TypeError: print('TypeError错误, 原因数据库连接失败...(可能维护中)') result = None if result is None: pass else: _block_print_db_old_data(result=result) index = 1 # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放 juanpi_pintuan = JuanPiParse() for item in result: # 实时更新数据 goods_id = item[0] if index % 6 == 0: try: del juanpi_pintuan except: pass gc.collect() juanpi_pintuan = JuanPiParse() sql_cli = _block_get_new_db_conn(db_obj=sql_cli, index=index, remainder=50) if sql_cli.is_connect_success: try: pintuan_end_time = json.loads( item[1])[0].get('end_time') except IndexError: print('获取pintuan_end_time时索引异常!出错goods_id:{0}'.format( goods_id)) _handle_goods_shelves_in_auto_goods_table( goods_id=goods_id, sql_cli=sql_cli, update_sql_str=jp_update_str_7, ) continue pintuan_end_time = int( str( time.mktime( time.strptime(pintuan_end_time, '%Y-%m-%d %H:%M:%S')))[0:10]) # print(pintuan_end_time) if item[2] == 1 or pintuan_end_time < int( datetime_to_timestamp(get_shanghai_time())): _handle_goods_shelves_in_auto_goods_table( goods_id=goods_id, sql_cli=sql_cli, update_sql_str=jp_update_str_7, ) print('该goods_id[{0}]已过期或者售完,逻辑删除成功!'.format(goods_id)) else: print( '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)' % (goods_id, index)) juanpi_pintuan.get_goods_data(goods_id=goods_id) data = juanpi_pintuan.deal_with_data() if data == {}: continue data['goods_id'] = goods_id juanpi_pintuan.to_right_and_update_pintuan_data( data=data, pipeline=sql_cli) else: # 表示返回的data值为空值 print('数据库连接失败,数据库可能关闭或者维护中') pass index += 1 gc.collect() sleep(1.2) print('全部数据更新完毕'.center(100, '#')) # sleep(60*60) if get_shanghai_time().hour == 0: # 0点以后不更新 sleep(60 * 60 * 5.5) else: sleep(5 * 60) gc.collect()
def run_forever(): while True: #### 实时更新数据 tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() try: tmp_sql_server._delete_table(sql_str=jp_delete_str_1) result = list( tmp_sql_server._select_table(sql_str=jp_select_str_2)) except TypeError: print('TypeError错误, 原因数据库连接失败...(可能维护中)') result = None if result is None: pass else: print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------') print(result) print('--------------------------------------------------------') print('即将开始实时更新数据, 请耐心等待...'.center(100, '#')) index = 1 # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放 juanpi_pintuan = JuanPiParse() for item in result: # 实时更新数据 if index % 6 == 0: try: del juanpi_pintuan except: pass gc.collect() juanpi_pintuan = JuanPiParse() if index % 50 == 0: # 每50次重连一次,避免单次长连无响应报错 print('正在重置,并与数据库建立新连接中...') tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() print('与数据库的新连接成功建立...') if tmp_sql_server.is_connect_success: try: pintuan_end_time = json.loads( item[1])[0].get('end_time') except IndexError: print('获取pintuan_end_time时索引异常!出错goods_id:{0}'.format( item[0])) print('此处将其标记为is_delete=1') tmp_sql_server._update_table(sql_str=jp_update_str_5, params=(item[0], )) continue pintuan_end_time = int( str( time.mktime( time.strptime(pintuan_end_time, '%Y-%m-%d %H:%M:%S')))[0:10]) # print(pintuan_end_time) if item[2] == 1 or pintuan_end_time < int( datetime_to_timestamp(get_shanghai_time())): tmp_sql_server._delete_table(sql_str=jp_delete_str_2, params=(item[0], )) print('该goods_id[{0}]已过期或者售完,删除成功!'.format(item[0])) else: print( '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)' % (item[0], index)) juanpi_pintuan.get_goods_data(goods_id=item[0]) data = juanpi_pintuan.deal_with_data() if data != {}: data['goods_id'] = item[0] juanpi_pintuan.to_right_and_update_pintuan_data( data=data, pipeline=tmp_sql_server) else: # 表示返回的data值为空值 pass else: # 表示返回的data值为空值 print('数据库连接失败,数据库可能关闭或者维护中') pass index += 1 gc.collect() sleep(1.2) print('全部数据更新完毕'.center(100, '#')) # sleep(60*60) if get_shanghai_time().hour == 0: # 0点以后不更新 sleep(60 * 60 * 5.5) else: sleep(5) gc.collect()