def start_requests(self): while True: result = pubUtil.getUrl('BE', 10) if not result: logging.info('no task! sleep 10s...') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # 把获取到的data格式化 if pubUtil.dateIsInvalid(dt): continue temp = { 'depart': dep, 'arr': to, 'departing': dt, 'returning': '', 'promo-code': '', 'adults': 1, 'teens': 0, 'children': 0, 'infants': 0 } try: params = urllib.parse.urlencode(temp) except: params = urllib.urlencode(temp) url = '%s%s/%s?%s' % (self.start_urls, dep, to, params) yield scrapy.Request( url, callback=self.parse, dont_filter=True, errback=lambda x: self.download_errback(x, url))
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=1) result = next(result_iter) else: result = pubUtil.getUrl('aq', 1) if not result: logging.info('get task error') time.sleep(20) continue for data in result: # logging.info("###input data: " + data) (dt, dep, to) = pubUtil.analysisData(data) # dt,dep,to='2019-03-28','PVG','TPE' # ua = UserAgent() # self.headers['user-agent'] = ua.random post_data = 'B_LOCATION_1=' + dep + '&E_LOCATION_1=' + to + '&B_DATE_1=' + dt.replace( '-', '' ) + '0000&B_ANY_TIME_1=True&EMBEDDED_TRANSACTION=FlexPricerAvailability&ARRANGE_BY=D&DISPLAY_TYPE=2&PRICING_TYPE=O&SO_SITE_MATRIX_CALENDAR=FALSE&SO_SITE_RUI_CAL_AVAI_NO_RECO=TRUE&SO_SITE_RUI_FP_AVAI_PRESEL=FALSE&COMMERCIAL_FARE_FAMILY_1=NEWECOOW&COMMERCIAL_FARE_FAMILY_2=NEWDELOW&COMMERCIAL_FARE_FAMILY_3=NEWBIZOW&SO_SITE_RUI_AX_CAL_ENABLED=TRUE&SO_SITE_CAL_CHANGE_WEEK=TRUE&SO_SITE_RUI_HIDE_MDF_SRC=FALSE&EXTERNAL_ID%236=OW&TRAVELLER_TYPE_1=ADT&TRIP_TYPE=O&TRIP_FLOW=YES&SO_SITE_EXPORT_CONFIRM=TRUE&SO_SITE_EXPORT_CONF_URL=https%3A%2F%2Fbooking.evaair.com%2Fexporttripplan%2Fwebservice.aspx&SO_SITE_THREEDS_USE=N&SO_SITE_BILLING_NOT_REQUIRED=Y&SO_SITE_BILL_ADD_OPTIONS=BILL_ADD_HIDDEN&SO_SITE_PREBOOK_CANCELLATION=TRUE&SO_GL=%3C%3Fxml+version%3D%221.0%22+encoding%3D%22iso-8859-1%22%3F%3E%0D%0A%3CSO_GL%3E%0D%0A%3CGLOBAL_LIST+mode%3D%22partial%22%3E%0D%0A%3CNAME%3ESL_AIR_MOP%3C%2FNAME%3E%0D%0A%3CLIST_ELEMENT%3E%0D%0A%3CCODE%3ECC%3C%2FCODE%3E%0D%0A%3CLIST_VALUE%3ECredit+Card%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EY%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECC%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECryptic%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECC%25T%25I%2F%25E%2F%25C%25F%2FN%25A%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%2F%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECC%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3C%2FLIST_ELEMENT%3E%0D%0A%3C%2FGLOBAL_LIST%3E%0D%0A%3C%2FSO_GL%3E&SO_SITE_FD_DISPLAY_MODE=1&SO_SITE_CURRENCY_FORMAT_JAVA=0&SO_SITE_ENABLE_SRV_POLICY=BAG%2CCOA&SO_SITE_ALLOW_SPEC_REQ_SERV=FALSE&SO_SITE_SD_TRUE_OP_CARRIER=TRUE&SO_SITE_BARCODE_ENABLE=TRUE&SO_SITE_ALLOW_CS_CODE_SHARE=FALSE&SO_SITE_USE_PAYMENT_ACTION=TRUE&EXTERNAL_ID=AIBS&EXTERNAL_ID%232=&EXTERNAL_ID%233=&EXTERNAL_ID%234=NEWECOOW&EXTERNAL_ID%235=&EXTERNAL_ID%2314=N&EXTERNAL_ID%2312=&EXTERNAL_ID%2313=zh_CN&EXTERNAL_ID%2399=C5WBKT102%23%23flyeva&DIRECT_LOGIN=NO&SO_SITE_RUI_MULTIDEV_ENABLED=TRUE&SO_SITE_RUI_TABLET_PG_LIST=ALL&SO_SITE_RUI_MOBILE_PG_LIST=ALL&SO_SITE_RUI_DISP_FF_TABLE=TRUE&SO_SITE_RUI_UPSLL_T_MDL=TRUE&SO_SITE_RUI_UPSLL_T_MDL_ATC=TRUE&SO_SITE_RUI_DPICKER_NATIVE=TABLET%2CMOBILE&MC_FORCE_DEVICE_TYPE=MOBILE&SO_SITE_RUI_MOBILE_FLOW=ALL&SO_SITE_RUI_TABLET_FLOW=ALL&SO_SITE_RUI_COLLAPSE_BOUND_T=TWO_STEPS&SO_SITE_RUI_UPSLL_HIDE_BTNS=FALSE&SO_SITE_OFFICE_ID=SHABR08AA&LANGUAGE=CN&SITE=CAWXCNEW' url_data = { "ENCT": "1", "ENC": "990572D723A7BC83F77B4C6C03C696340674137066140FF11D721B8765E55FF8DC0562E080CE4BD1CD01272028CBBA89", # 传入当前查询时间 "ENC_TIME": time.strftime("%Y%m%d%H%M%S", time.localtime()) } # 设置无效 invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } url_data = urllib.urlencode(url_data) self.url = self.start_urls[0] + '?' + url_data # print '# url: ', url # print '# url_data: ', url_data # ip = '127.0.0.1:8888' # ip = '127.0.0.1:1080' yield scrapy.Request( self.url, headers=self.headers, body=post_data, callback=self.parse, dont_filter=True, # meta={'invalid': invalid, 'proxy': ip}, meta={'invalid': invalid}, method='POST', errback=self.errback)
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('je', 1) if not result: logging.info('get task error') time.sleep(10) continue for data in result: # logging.info("###input data: " + data) (dt, dep, to) = pubUtil.analysisData(data) # 目标地址参数字典 post_data = { "AgencyCode": "", "AirportFrom": dep, "AirportTo": to, "BoardDate": dt, "CarPackage": 'false', "ReturnDate": "", "SearchType": "Normal", "AvailType": "", "IsReturnFlight": 'false', "IsBusiness": 'false', "Adults": self.ADT, "Children": "0", "Infants": "0", "FareDesignator": "", "EdgarsClubCard": "", "VoyagerState": '0', "HaveErrors": 'false', "IsChangeBooking": 'false', "MomentumClientNumber": "", "OutSegmentKeyFromRedirect": "", "InSegmentKeyFromRedirect": "", "isMobile": 'false', "CriteriaSearchType": "Day" } # 设置无效 invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } PayLoad = json.dumps(post_data) yield scrapy.Request(self.start_urls[0], body=PayLoad, callback=self.parse, dont_filter=True, meta={'invalid': invalid}, method='POST', errback=self.errback)
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name,self.name,self.num,permins,self.version)) while True: result = pubUtil.getUrl('FZ', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # for data in self.get_task(): # dep, to, dt = data # dt,dep,to= '2018-09-13','DXB','BEY' dt_change = datetime.strptime(dt,'%Y-%m-%d').strftime('%m/%d/%Y') print(dep, to, dt) seat = self.custom_settings.get('SEAT') payload = { "journeyType": "ow", "isOriginMetro": False, "isDestMetro": False, "variant": "0", "searchCriteria": [{ "origin": dep, "dest":to, "originDesc": "", "destDesc": "", "isOriginMetro": False, "isDestMetro": False, "direction": "outBound", "date": "%s 12:00 AM"%dt_change }], "paxInfo": { "adultCount": seat, "infantCount": 0, "childCount": 0 } } invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } body=json.dumps(payload) meta_data = dict( invalid=invalid, payload=body, aaa = (dep, to, dt) ) yield scrapy.Request(self.start_urls[0], callback=self.parse, method='POST', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, body=body, errback=self.errback )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl(self.name) if not result: time.sleep(6) continue for data in result: # 处理任务 [u'TLN-CFE:20181110:1'] task_data_list = data.split(':') count = int(task_data_list[2]) (dt, dep, arr) = pubUtil.analysisData(task_data_list[0] + ':' + task_data_list[1]) _date = datetime.strptime(dt, '%Y-%m-%d') for i in range(count): date = _date + timedelta(days=i) date = date.strftime('%Y%m%d0000') dep = self.port_city.get(dep, dep) arr = self.port_city.get(arr, arr) # logging.info('# input data: ' + dep + '' + arr + '' + date) # 设置无效 invalid = { 'date': date.replace('-', ''), 'depAirport': dep, 'arrAirport': arr, 'mins': self.custom_settings.get('INVALID_TIME') } post_data = urllib.urlencode( ly_post_data.second_post_data(dep, arr, date, self.ADT)) yield scrapy.Request(self.start_urls[0], body=post_data, callback=self.parse, dont_filter=True, meta={'invalid': invalid}, errback=self.errback, method='POST')
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None result = None while True: if hasattr(self, 'local'): if not result_iter or not result: # 本地任务未编写 result_iter = self.get_task() result = next(result_iter) else: result = pubUtil.getUrl('TR', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # dt, dep, to = '2019-01-12', 'SIN', 'TAO' print dt, dep, to seat = self.custom_settings.get('SEAT') querystring = { 'adt': seat, 'arcity': to, 'chd': '0', 'dpcity': dep, 'dpdate': dt, 'inft': '0', 'promo': '', 'type': '1' } data = '' for key in querystring: data = data + key + '=' + str(querystring.get(key)) + '&' invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict( invalid=invalid, data=data, ) yield scrapy.Request(self.start_urls[0], callback=self.parse, headers=self.custom_settings.get('HEADERS'), method='POST', meta={'meta_data': meta_data}, body=data, errback=self.errback)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None result = None while True: if hasattr(self, 'local'): if not result_iter or not result: result_iter = self.get_task() result = next(result_iter) else: result = pubUtil.getUrl('TT', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # dt, dep, to = '2019-01-25', 'SYD', 'PER' seat = self.custom_settings.get('SEAT') payload = { 'currencyCode': 'AUD', 'departureDate': dt, 'destination': to, 'numAdults': seat, 'numChildren': 0, 'numInfants': 0, 'origin': dep, 'promoCode': '' } invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict( invalid=invalid, payload=payload, ) headers = self.custom_settings.get('HEADERS') # headers['User-Agent'] = random.choice(self.ua_data)[0] headers['User-Agent'] = self.ua_construction() yield scrapy.Request(self.start_urls[0], callback=self.parse, headers=headers, method='POST', meta={'meta_data': meta_data}, body=json.dumps(payload), errback=self.errback)
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('by', 1) if not result: logging.info('get task error') time.sleep(10) continue for data in result: logging.info("###input data: " + data) (dt, dep, to) = pubUtil.analysisData(data) self.dep = dep self.arr = to self.date = dt second_data = { 'flyingFrom[]': self.dep, 'flyingTo[]': self.arr, 'depDate': self.date, 'returnDate': '', 'adults': self.ADT, 'children': '0', 'infants': '0', 'infantAge': '', 'isOneWay': 'true', 'childAge': '', 'searchType': 'selected', 'tabId': dep, 'cycleDates': dt, 'duration': '0' } second_url = '%s%s' % (self.second_url[0], urllib.urlencode(second_data)) # 设置无效 invalid = { 'date': self.date.replace('-', ''), 'depAirport': self.dep, 'arrAirport': self.arr, 'mins': self.custom_settings.get('INVALID_TIME') } yield scrapy.Request(second_url, callback=self.parse, dont_filter=True, meta={'invalid': invalid}, errback=self.errback)
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(20) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # dep, to, dt = 'FUK', 'YNT', '2019-03-27' post_data = { "tripType": "OW", "orgCode": dep, "dstCode": to, "takeoffdate1": dt, } # 随机UA ua = UserAgent() self.headers['User-Agent'] = ua.random # post_data = urllib.urlencode(post_data) # logging.info("###input data: " + dep + to + dt) # 设置无效 invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } yield scrapy.Request( self.start_urls[0], headers=self.headers, body=json.dumps(post_data), # body=post_data, callback=self.parse, dont_filter=True, # meta={'invalid': invalid, 'proxy': 'http://127.0.0.1:8888'}, meta={'invalid': invalid}, method='POST', errback=self.errback)
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, step=7) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(10) continue # 循环多个任务,现在默认一个 for data in result: # 处理任务 BVE-LYS-201812030000-15 count = int(data.split(':')[-1]) (date, dep, arr) = pubUtil.analysisData(data[:-2]) date = date.replace('-', '') + '0000' # logging.info('# input data: ' + dep + '-' + arr + '-' + date + '-' + str(count)) task_data = { 'dep': dep, 'arr': arr, 'date': date, 'count': count } post_data = urllib.urlencode( a5_post_data.first_post_data(dep, arr, date, self.ADT)) # 获取session yield scrapy.Request( self.get_session_url[0], body=post_data, callback=self.get_session, dont_filter=True, meta={ 'post_data': post_data, 'task_data': task_data }, method='POST', errback=self.errback, )
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(20) continue self.session_data['tck'] = random.choice(self.id_pool) for data in result: # logging.info("###input data: " + data) (dt, dep, to) = pubUtil.analysisData(data) # dt,dep,to='2019-02-28','CAN','RGN' post_data = { 'traveldate': dt, 'ori': dep, 'currency': 'CNY', 'dest': to } # 设置无效 invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } post_data = urllib.urlencode(post_data) yield scrapy.Request(self.start_urls[0], headers=self.headers, body=post_data, callback=self.parse, dont_filter=True, meta={'invalid': invalid}, method='POST', errback=self.errback)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('KC', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, arr) = pubUtil.analysisData(data) dt = re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3.\2.\1', dt) # print(dt) # dt = time.strftime('%d.%m.%Y',dt) print(dep, arr, dt) payload = { 'captchaResponse': '', 'pReturnDateStr': '', 'pFlightDateStr': dt, 'pRequest': { 'TwoWayRoute': 'false', 'DateAreFlexible': 'true', 'Origin': dep, 'Destination': arr, 'Bookingclass': 'ECO', 'Adult': '3', 'Child': '0', 'Infant': '0', 'Resident': 'false' }, } invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': arr, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict(invalid=invalid, payload=payload) yield scrapy.Request( self.custom_settings.get('sessionID_url'), callback=self.data_requests, method='POST', headers=self.custom_settings.get('start_headers'), meta={'meta_data': meta_data}, body=json.dumps(payload), errback=self.errback)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('LA', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # for i in range(1): # for data in self.get_task(): # dep, to, dt = data # dt,dep,to= '2018-10-16','CWB','SAO' # print(dep, to, dt) currency = "BR" seat = self.custom_settings.get('SEAT') querystring = { "country": currency, "origin": dep, "destination": to, "departure": dt, "adult": seat, } url = self.start_urls[0] + '?' for key in querystring: url = url + key + '=' + str(querystring.get(key)) + '&' invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict(invalid=invalid, params=querystring, aaa=(dep, to, dt), flight_time=dt) yield scrapy.Request( url, callback=self.parse, method='GET', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, errback=self.errback)
def start_requests(self): permins = 0 # print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: # CRL-OTP:20181203:30 # result = pubUtil.getUrl(self.name, 1) result = ["CRL-OTP:20181220:30"] if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # 把获取到的data格式化 # (dt, dep, to, days) = ('20181026', 'LTN', 'IAS', 30) dt_datetime = datetime.strptime(dt, '%Y-%m-%d') # end_date = dt_datetime + timedelta(days=int(days)) dt = dt_datetime.strftime('%Y-%m-%d') # dep = 'AES' # to = 'GDN' # dt = '2018-12-20' logging.info('# input data: ' + dep + '-' + to + '-' + dt) data_post = dict( DepartureDate=dt, DepartureStation=dep, ArrivalStation=to, ) # 设置无效 invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } self.custom_settings['POST_DATA']['flightList'] = [data_post] pay_load = json.dumps(self.custom_settings.get('POST_DATA')) yield scrapy.Request(method='POST', url=self.start_urls[0], body=pay_load, dont_filter=True, callback=self.parse, meta={'invalid': invalid, 'pay_load': pay_load}, errback=self.errback, )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.carrier, self.num, permins, self.version)) result_iter, result = None, None while True: if hasattr(self, 'local'): if not result_iter or not result: result_iter = self.get_task() result = next(result_iter) else: result = pubUtil.getUrl('BE', 10) if not result: time.sleep(60) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # 把获取到的data格式化 # dt, dep, to = '2018-11-01', 'EXT', 'JER' if pubUtil.dateIsInvalid(dt): continue temp = { 'depart': dep, 'arr': to, 'departing': dt, 'returning': '', 'promo-code': '', 'adults': 3, 'teens': 0, 'children': 0, 'infants': 0 } try: params = urllib.parse.urlencode(temp) except: params = urllib.urlencode(temp) url = '%s%s/%s?%s' % (self.start_urls, dep, to, params) yield scrapy.Request(url, callback=self.parse, dont_filter=True, errback=self.err_back)
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl('TW', 5) if not result: logging.info('get task error') time.sleep(10) continue today = datetime.now().strftime('%Y%m%d') for data in result: (dt, dep, to) = pubUtil.analysisData(data) # 把获取到的data格式化 # dt, dep, to = '20180722', 'ICN', 'KIX' # 测试某条数据 params = urllib.urlencode(dict( origin=dep, destination=to, onwardDateStr=dt.replace('-', ''), # pointOfPurchase='KR', paxTypeCountStr='3,0,0', today=today, travelType='OW', searchType='byDate', # domesticYn='Y', bundleAmountOW=0, bundleAmountRT=0, routeCls='AS', _=int(time.time() * 1000) )) total_url = self.start_urls[0] + params yield scrapy.Request(url=total_url, callback=self.transit, meta={'params': params, 'flag': 1}, dont_filter=True)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None result = None while True: if hasattr(self, 'local'): if not result_iter or not result: result_iter = self.get_task() result = next(result_iter) else: result = pubUtil.getUrl('PC', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: print data (dt, dep, to) = pubUtil.analysisData(data) # dt,dep,to = '2018-08-30','TZX','SAW' seat = self.custom_settings.get('SEAT') payload = { "flightSearchList": [{ "arrivalPort": to, "departurePort": dep, "departureDate": dt }], "adultCount": seat, "childCount": 0, "infantCount": 0, "soldierCount": 0, "currency": "TL", "operationCode": "TK", "ffRedemption": False, "openFlightSearch": False, "personnelFlightSearch": False, "dateOption": 1 } invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } yield scrapy.Request( self.start_urls[0], callback=self.parse, method='POST', headers=self.custom_settings.get('HEADERS'), meta={'invalid': invalid}, body=json.dumps(payload), errback=self.errback)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None result = None while True: if hasattr(self, 'local'): if not result_iter or not result: result_iter = self.get_task() result = next(result_iter) print result[0] else: result = pubUtil.getUrl('4O', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # for i in range(1): # for data in self.get_task(): # dep, to, dt = data # dt,dep,to= '2018-11-21','MEX','MTY' # print(dep, to, dt) seat = self.custom_settings.get('SEAT') querystring = { 'ArrivalStation': to, 'CurrencyCode': 'USD', 'DepartureDate': dt, 'DepartureStation': dep, 'PaxResidentCountry': 'US', 'ReturnDate': '', 'RoleCode': 'WWWA', # 'Signature': '7g45Wa4GSDU%3D%7CFGktbwO8EpThKsp1cB6fKOAqUvn3guMXZi8UTGptOepMgEP4vfdsuDVx9CUEK6PPNKDct2Otx5ujxMUtBdbGAKypdlrDs58IJ5egu0MpkyBUcJAzA3CC5OLpbNF%2B2XmVvSljYUJspk0%3D', # 'Signature': 'D0QxiSJmAVA%3D%7Cu8aeufqyaeaQ8rZDzn%2FfgXgUix%2Fw6vE1NV1boWcJkMxA%2FST6xYdjipCYmvebA4zP%2BVfgbgxJPChcxCy2jn8ur4hAPBprYZ%2F7PBBZr3%2FaXo6aZ018F3GSPm3jNzQzeC3UBXtJs77215s%3D' 'TotalPaxAdt': seat, 'TotalPaxChd': '0', 'TotalPaxInf': '0', 'TotalPaxSrc': '0', 'Version': '7' } url = self.start_urls[0] + '?' for key in querystring: url = url + key + '=' + str(querystring.get(key)) + '&' headers = self.custom_settings.get('HEADERS') # headers['User-Agent'] = random.choice(self.ua_data)[0] headers['User-Agent'] = self.ua_construction() # print '请求:', headers['User-Agent'] invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME'), } meta_data = dict( invalid=invalid, params=querystring, aaa=(dep, to, dt), flight_time=dt, url=url, ) yield scrapy.Request(url, callback=self.parse, method='GET', headers=headers, meta={'meta_data': meta_data}, errback=self.errback)
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('G9', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # # for i in range(1): # for data in self.get_task(): # dep, to, dt = data # dt,dep,to = '2018-10-16','SHJ','DMM' seat = self.custom_settings.get('SEAT') payload = { "dataModel": { "app": { "apiKey": "api_key", "appVersion": "4.0.3", "language": "en", "os": "android" }, "isReturn": False, "journeyInfo": [{ "departureDateTime": "%sT00:00:00"%dt, "departureVariance": 0, "destination": to, "destinationCity": False, "origin": dep, "originCity": False }], "preferences": { "cabinClass": "Y", "currency": "USD", "logicalCabinClass": "Y", "promotion": { "code": "", "type": "PROMO_CODE" } }, "travellerQuantity": { "adultCount": seat, "childCount": "0", "infantCount": "0" } } } body = json.dumps(payload) invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict( invalid=invalid, payload=body, aaa=(dep, to, dt), flight_time=dt ) yield scrapy.Request(self.start_urls[0], callback=self.parse, method='POST', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, body=body, errback=self.errback )
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(5) continue for data in result: # logging.info("## input data: " + data) # 处理任务 [u'TLN-CFE:20181110:1'] count = int(data.split(':')[-1]) (date, dep, arr) = pubUtil.analysisData(data[:-2]) _date = datetime.strptime(date, '%Y-%m-%d') for i in range(count): _date = _date + timedelta(days=i) date = _date.strftime('%Y%m%d') # dep = 'KIX' # arr = 'ICN' # logging.info('# input data: ' + dep + '-' + arr + '-' + date) city_code = self.city_dict.get(dep) if city_code is None: logging.info('# not found city: ' + dep) body = json.dumps( ze_post_data.get_data(dep, arr, date, self.ADT, city_code)) # 设置无效 invalid = { 'date': date.replace('-', ''), 'depAirport': dep, 'arrAirport': arr, 'mins': self.custom_settings.get('INVALID_TIME') } task_data = { 'dep': dep, 'arr': arr, 'date': date, 'city_code': city_code, 'body': body } yield scrapy.Request(self.start_urls[0], headers=self.headers, body=body, callback=self.parse, dont_filter=True, meta={ 'invalid': invalid, 'task_data': task_data }, errback=self.errback, method='POST')
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name,self.name,self.num,permins,self.version)) while True: result = pubUtil.getUrl('XQ', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # for data in self.get_task(): # dep, to, dt = data # dt,dep,to= '2018-09-18','AYT','DUS' dt_change = datetime.strptime(dt,'%Y-%m-%d').strftime('%d-%b-%Y') print(dep, to, dt,dt_change) seat = self.custom_settings.get('SEAT') payload = { 'origin': dep, 'adults':seat, 'fareRT': '', 'flightNumberRT': '', 'wvm': 'WVMD', 'ibeScreenId': 'IBE000', 'bookingSource': '', 'fareOW': '', 'children': '0', 'cabinClass': 'ECONOMY', '_eventId': 'showWtLblResult', 'travelDate': dt_change, 'destination': to, 'fareLevel': '', 'deviceType': '', 'tripType': 'OW', 'channel': 'DEBD', 'pointOfPurchase': 'OTHERS', 'flightNumberOW': '', 'access_token': '', 'promoCode': '', 'ccType': '', 'skyscanner_redirectid': '', 'mode': 'searchResultInter', 'infants': '0', 'flexTrvlDates': ''} invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } body ='' for key in payload: body = body + key + '=' + payload.get(key) + '&' meta_data = dict( invalid=invalid, payload=body, aaa = (dep, to, dt), flight_time = dt ) yield scrapy.Request(self.start_urls[0], callback=self.parse, method='POST', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, body=body, errback=self.errback )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('SL', 1) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # 把获取到的data格式化 dt_com = data.split(':')[1] self.task.append({ 'date': dt_com, 'depAirport': dep, 'arrAirport': to, 'mins': settings.INVALID_TIME }) dep = str(dep) to = str(to) if pubUtil.dateIsInvalid(dt): logging.info('date is invalid ,next~') continue dt_stamp = time.mktime(time.strptime(dt, '%Y-%m-%d')) + 8 * 60 * 60 data_post = { "sd": { "Adults": self.seats, "AirlineCode": "", "ArrivalCity": to, "ArrivalCityName": None, "BookingClass": None, "CabinClass": 0, "ChildAge": [], "Children": 0, "CustomerId": 0, "CustomerType": 0, "CustomerUserId": 230, "DepartureCity": dep, "DepartureCityName": None, "DepartureDate": "/Date(%s)/" % int(dt_stamp * 1000), "DepartureDateGap": 0, "DirectFlightsOnly": False, "Infants": 0, "IsPackageUpsell": False, "JourneyType": 1, "PreferredCurrency": "THB", "ReturnDate": "/Date(-2208988800000)/", "ReturnDateGap": 0, "SearchOption": 1 }, "fsc": "0" } yield scrapy.Request( method='POST', url=self.start_urls, body=json.dumps(data_post), callback=self.parse, meta=data_post, dont_filter=True, errback=lambda x: self.download_errback(x, data_post))
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None result = None while True: if hasattr(self, 'local'): if not result_iter or not result: result_iter = self.get_task() result = next(result_iter) print result[0] else: result = pubUtil.getUrl('B6', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # for i in range(1): # for data in self.get_task(): # dep, to, dt = data # dt,dep,to= '2018-11-21','RDU','JFK' # dt,dep,to= '2018-11-23','BOS','JAX' # print(dep, to, dt) seat = self.custom_settings.get('SEAT') querystring = { 'departureAirportCode': dep, 'env': 'prod', 'jbBookerCurrency-flights': 'usd', 'journeySpan': 'OW', 'numAdults': seat, 'numChildren': '0', 'numInfants': '0', 'returnAirportCode': to, 'startDate': re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3-\2-\1', dt), 'submitted-form': 'bkSearch', 'un_jtt_application_platform': 'android', 'version': 'ANDROID-v4.6.4' } url = self.start_urls[0] + '?' for key in querystring: url = url + key + '=' + str(querystring.get(key)) + '&' invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME'), } meta_data = dict( invalid=invalid, params=querystring, aaa=(dep, to, dt), flight_time=dt, url=url, ) yield scrapy.Request(url, callback=self.parse, method='GET', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, errback=self.errback )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None result = None while True: if hasattr(self, 'local'): if not result_iter or not result: result_iter = self.get_task() result = next(result_iter) else: result = pubUtil.getUrl('9C', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # for i in range(10): # dt, dep, to = '2019-02-25', 'SJW', 'TPE' # print(dt, dep, to) seat = self.custom_settings.get('SEAT') payload = { 'Arrival': to, 'IsIJFlight': 'false', 'CabinActId': 'null', 'SeatsNum': seat, 'Currency': '0', 'IsLittleGroupFlight': 'false', 'ReturnDate': 'null', 'Departure': dep, 'IsUM': 'false', 'IsBg': 'false', 'IsJC': 'false', 'Active9s': '', 'IsShowTaxprice': 'false', 'DepartureDate': dt, 'isdisplayold': 'false', 'ActId': '0', 'IfRet': 'false', 'IsEmployee': 'false', 'SType': '0' } form = '' for b in payload: form = form + b + '=' + str(payload.get(b)) + '&' invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict( invalid=invalid, form=form, ) yield scrapy.Request( self.start_urls[0], callback=self.parse, headers=self.custom_settings.get('HEADERS'), method='POST', meta={'meta_data': meta_data}, body=form, errback=self.errback)
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name,self.name,self.num,permins,self.version)) while True: result = pubUtil.getUrl('AD', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # for i in range(1): # for data in self.get_task(): # dep, to, dt = data # dt,dep,to= '2018-11-07','JJD','CNF' print(dep, to, dt) seat = self.custom_settings.get('SEAT') payload = { 'ControlGroupSearch$SearchMainSearchView$TextBoxMarketOrigin1': dep, 'ControlGroupSearch$SearchMainSearchView$TextBoxMarketDestination1': to, 'ControlGroupSearch$SearchMainSearchView$DropDownListMarketMonth1': re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\1-\2', dt), 'ControlGroupSearch$SearchMainSearchView$DropDownListMarketDay1': re.sub(r'(\d{4})-(\d{2})-(\d{2})',r'\3', dt), 'ControlGroupSearch$SearchMainSearchView$DropDownListPassengerType_ADT': '5', 'ControlGroupSearch$SearchMainSearchView$TextBoxPromoCode': 'CALLCENT', 'culture': 'en-US', 'ControlGroupSearch$SearchMainSearchView$DropDownListPassengerType_CHD': '0', # 'departure1': '09/29/2018', 'ControlGroupSearch$SearchMainSearchView$CheckBoxUseMacDestination1': '', 'ControlGroupSearch$SearchMainSearchView$DropDownListPassengerType_INFANT': '0', # 'originIata1': 'CWB', # 'origin1': 'Curitiba (CWB)', 'ControlGroupSearch$SearchMainSearchView$CheckBoxUseMacOrigin1': '', 'ControlGroupSearch$SearchMainSearchView$RadioButtonMarketStructure': 'OneWay', # 'destinationIata1': 'VCP', # '_authkey_': '106352422A4DEB0810953636A6FBE2079955529786098DE8B0D32416202E380E34C245FA99C431C7C7A75560FDE65150', 'ControlGroupSearch$SearchMainSearchView$DropDownListFareTypes': 'R', '__EVENTTARGET': 'ControlGroupSearch$LinkButtonSubmit', # 'destination1': 'Campinas (VCP)', # 'hdfSearchCodeDeparture1': '1N', # 'hdfSearchCodeArrival1': '1N', # "ControlGroupSearch$SearchMainSearchView$DropDownListSearchBy": 'columnView', } # payload = { # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListPassengerType_ADT': seat, # 'hdfSearchCodeArrival1': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$TextBoxMarketOrigin1': dep, # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketMonth1': re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\1-\2', dt), # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$TextBoxMarketDestination1': to, # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketDay1':re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3', dt), # 'faretypes': 'R', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketDay2': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListSearchBy': 'columnView', # '__EVENTTARGET': 'SearchControlGroupAvailabilityView$LinkButtonSubmit', # 'NavigationHeaderInputAvailabilityView$MemberLoginAvailabilityView$PasswordFieldPassword': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListFareTypes': 'R', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketMonth2': '', # 'pageToken': '', # # 'culture':'en-US', # 'loginDomain': 'AZUL_LOGIN', # '_authkey_': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacOrigin1': '', # 'arrival': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListPassengerType_CHD': '0', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacDestination1': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListPassengerType_INFANT': '0', # 'hdfSearchCodeArrival2': '', # 'NavigationHeaderInputAvailabilityView$MemberLoginAvailabilityView$TextBoxUserID': '', # '__VIEWSTATE': '/wEPDwUBMGRkZ/qdcJAW2QnebbciaZoBYUGCuQI=', 'password-ta': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$RadioButtonMarketStructure': 'OneWay', # 'hdfSearchCodeDeparture2': '', # '__EVENTARGUMENT': '', # 'departure2': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacOrigin2': '', # 'AvailabilityInputAvailabilityView$DropdownListOrderFlights': '0', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacDestination2': '', # 'hdfSearchCodeDeparture1': '', # 'login-ta': '', # } invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } body ='' for key in payload: body = body + key + '=' + str(payload.get(key)) + '&' meta_data = dict( invalid=invalid, payload=body, aaa = (dep, to, dt), flight_time = dt ) yield scrapy.Request(self.start_urls[0], callback=self.parse, method='POST', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, body=body, errback=self.errback )
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(5) continue for data in result: # logging.info("## input data: " + data) # 处理任务 [u'TLN-CFE:20181110:1'] count = int(data.split(':')[-1]) (date, dep, arr) = pubUtil.analysisData(data[:-2]) _date = datetime.strptime(date, '%Y-%m-%d') for i in range(count): temp_date = _date + timedelta(days=i) date = temp_date.strftime('%Y-%m-%d') # logging.info('# input data: ' + dep + '-' + arr + '-' + date) # dep, arr, date = 'MNL', 'SIN', '2019-01-04' post_data = { "originIata": dep, "destinationIata": arr, "departureDate": date + "T00:00:00+08:00", "passengerComposition": { "adult": self.ADT, "children": 0, "infant": 0 } } body = json.dumps(post_data) # 设置无效 invalid = { 'date': date[:10].replace('-', ''), 'depAirport': dep, 'arrAirport': arr, 'mins': self.custom_settings.get('INVALID_TIME') } task_data = { 'dep': dep, 'arr': arr, 'date': date, 'body': body } yield scrapy.Request(url=self.start_urls[0], body=body, callback=self.parse, dont_filter=True, meta={ 'invalid': invalid, 'task_data': task_data }, errback=self.errback, method='POST')
def start_requests(self): permins = 0 logging.info(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None # IP使用时长计时器 # start_time = time.time() while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=10) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(20) continue for data in result: # logging.info("## input data: " + data) # 处理任务 [u'TLN-CFE:20181110:1'] count = int(data.split(':')[-1]) (date, dep, arr) = pubUtil.analysisData(data[:-2]) _date = datetime.strptime(date, '%Y-%m-%d') for i in range(count): temp_date = _date + timedelta(days=i) date = temp_date.strftime('%m/%d/%Y') invalid_date = temp_date.strftime('%Y%m%d') # logging.info('# input data: ' + dep + '-' + arr + '-' + date) # dep, arr, date = 'FLL', 'LAS', '2019-01-13' # IP超过使用时长,强制更换 # logging.info('ip used time: ' + str(time.time() - start_time)) # if time.time() - start_time > self.use_time: # self.proxy_flag = True # logging.info('### ip invalid:' + self.proxy) if self.proxy_flag: while True: # 俄罗斯代理 # self.proxy = pubUtil.nk_get_ip() # 小池子代理 self.proxy = pubUtil.get_proxy(self.name) if self.proxy is None: logging.info('# no get proxy, continue') # time.sleep(60) continue logging.info('# get a new ip: ' + self.proxy) ip_proxies = {"https": "https://" + self.proxy} # 获取session try: response = requests.get(self.get_session_url, proxies=ip_proxies, timeout=15) self.cookies_str = json.dumps(requests.utils.dict_from_cookiejar(response.cookies))[ 1:-1].replace( '\"', '').replace( ':', '=').replace(' ', '').replace(',', '; ') except Exception as e: logging.info(e) self.proxy_flag = True logging.info('# get session error') continue # IP正常使用,开始计时 # start_time = time.time() self.proxy_flag = False break headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Cookie': self.cookies_str } post_data = { 'from': dep, 'to': arr, # 'from': 'AXM', # 'to': 'ATL', 'departDate': date, 'departDateDisplay': date, 'ADT': self.ADT } post_data.update(self.custom_settings.get('POST_DATA')) post_data = urllib.urlencode(post_data) # 设置无效 invalid = { 'date': invalid_date, 'depAirport': dep, 'arrAirport': arr, 'mins': self.custom_settings.get('INVALID_TIME') } yield scrapy.Request(url=self.start_urls[0], body=post_data, headers=headers, callback=self.parse, dont_filter=True, meta={'invalid': invalid, 'proxy': self.proxy}, errback=self.errback, method='POST')
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: #增加睡眠时间 if pubUtil.get_mm_adult(): result = pubUtil.getUrl('MM', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, arr) = pubUtil.analysisData(data) # dep,arr,dt = 'KIX','HKG','2018-10-12' # print(dep,arr,dt) from_data = { 'F_departure_cd': dep, # 出发 'F_destination_cd': arr, # 到达 'F_go_ym': int(re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\1\2', dt)), 'F_go_d': int(re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3', dt)), 'F_trip_type': 1, 'F_adult_count': 5, # 座位 'F_child_count': 0, 'F_infant_count': 0, 'F_p_token': '', 'PCMD': 'searchflightresult', 'BPCMD': 'searchflight', 'Campaign': '', 'next': 'next', } # print(from_data) form = '' for b in from_data: form = form + b + '=' + str(from_data.get(b)) + '&' invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': arr, 'mins': self.custom_settings.get('INVALID_TIME'), } meta_data = dict(invalid=invalid, maxSeats=from_data.get('F_adult_count'), year=dt[:4], form=form) yield scrapy.Request( self.start_urls[0], method='POST', headers=self.custom_settings.get('headers'), body=form, callback=self.parse, meta={'meta_data': meta_data}, errback=self.errback) else: print('Non-issue time,sleep:60s') time.sleep(60)