Exemplo n.º 1
0
    def parse(self, response):
        self.isOK = True
        try:
            data = json.loads(response.body)
        except:
            self.isOK=False
            yield scrapy.Request(self.start_urls[0],
                                 method='POST',
                                 headers=self.custom_settings.get('HEADERS'),
                                 body=response.meta.get('meta_data').get('payload'),
                                 callback=self.parse,
                                 meta={'meta_data': response.meta.get('meta_data')},
                                 errback=self.errback)
            return


        if data.get('errors'):
            self.isOK = False
            if data.get('errors')[0].get('message') == 'nsk-server:InvalidStationCode' or 'nsk-server:AuthorizationStationCategoryNotAllowed':
                print('Invalid airfield pair')
                return
            print('get data error')
            # print(data.get('errors'))
            yield scrapy.Request(self.start_urls[0],
                                 method='POST',
                                 headers=self.custom_settings.get('HEADERS'),
                                 body=response.meta.get('meta_data').get('payload'),
                                 callback=self.parse,
                                 meta={'meta_data': response.meta.get('meta_data')},
                                 errback=self.errback)
            return

        if data.get('message') or data.get('statusCode'):
            # print('6' * 66)
            print(data.get('message'))
            # time.sleep(20)
            self.isOK = False
            yield scrapy.Request(self.start_urls[0],
                                 method='POST',
                                 headers=self.custom_settings.get('HEADERS'),
                                 body=response.meta.get('meta_data').get('payload'),
                                 callback=self.parse,
                                 meta={'meta_data': response.meta.get('meta_data')},
                                 errback=self.errback)
            return

        currency= data.get('data').get('currencyCode')
        if not currency:
            num_day = 0
            for lowFareDateMarket in data.get('data').get('lowFareDateMarkets'):
                lowFares = lowFareDateMarket.get('lowFares')
                if len(lowFares) == 0:
                    num_day += 1
                else:
                    break
            meta = response.meta.get('meta_data')
            payload = meta.get('payload')
            begin_dt=meta.get('begin_dt')
            add_day=meta.get('add_day')+ num_day
            duration = meta.get('duration')-num_day
            invalid = meta.get('invalid')
            date = invalid.get('date')
            new_date = datetime.strptime(date, '%Y%m%d')
            for i in range(num_day):
                date_time = new_date + timedelta(days=i)
                # print(date_time)
                invalid['date'] = date_time.strftime('%Y%m%d')
                self.task.append(invalid)
            if duration <= 0:
                print('No flight on current date')
                return
            begin_dt, end_dt = pubUtil.time_add_5j(begin_dt, add_day, duration)
            payload = json.loads(payload)
            payload['Criteria'][0]['BeginDate'] = "%sT00:00:00" % begin_dt
            payload['Criteria'][0]['EndDate'] = "%sT00:00:00" % end_dt
            payload = json.dumps(payload)
            invalid = meta.get('invalid')
            invalid['date'] = begin_dt.replace('-', '')
            meta_data = dict(
                invalid=invalid,
                payload=payload,
                begin_dt=begin_dt,
                add_day=meta.get('add_day'),
                duration = duration
            )
            print('No flight today,to requests new time:%s,%s'% (begin_dt, end_dt))
            yield scrapy.Request(self.start_urls[0],
                                 method='POST',
                                 headers=self.custom_settings.get('HEADERS'),
                                 body=payload,
                                 callback=self.parse,
                                 meta={'meta_data': meta_data},
                                 errback=self.errback)
            return
        lowFareDateMarkets=data.get('data').get('lowFareDateMarkets')
        #显示几天结果的列表
        for lowFareDateMarket in lowFareDateMarkets:
            #取出当天航班列表
            lowFares = lowFareDateMarket.get('lowFares')
            for lowFare in lowFares:
                #先进行判断是否中转
                legs = lowFare.get('legs')
                if len(legs) > 1:
                    continue

                leg=legs[0]
                carrier =leg.get('carrierCode')
                flightNumber='%s%s'%(carrier,leg.get('flightNumber'))
                deptime = time.strptime(leg.get('departureTime'), '%Y-%m-%dT%H:%M:%S')
                depTime = time.mktime(deptime)
                arrtime = time.strptime(leg.get('arrivalTime'), '%Y-%m-%dT%H:%M:%S')
                arrTime = time.mktime(arrtime)
                depAirport = leg.get('origin')
                arrAirport = leg.get('destination')
                aircraftType = leg.get('equipmentType')

                admin_tax = self.custom_settings.get('ADMIN_FEES').get('%s%s'%(depAirport,arrAirport))
                if not admin_tax.get('currency') == currency or not admin_tax:
                    invalid = response.meta.get('meta_data').get('invalid')
                    invalid['date'] = time.strftime('%Y%m%d',deptime)
                    # print('--------------------------------------------invaild:%s----------------------------------------'%time.strftime('%Y%m%d',deptime))
                    self.task.append(invalid)
                    continue
                # adultPrice = netFare + adultTax + float(self.custom_settings.get('ADMIN_FEES').get('%s%s'%(depAirport,arrAirport)))
                fares = lowFare.get('passengers').get('ADT')
                netFare = fares.get('fareAmount')
                adultTax = fares.get('taxesAndFeesAmount') + float(admin_tax.get('tax'))
                adultPrice = netFare + adultTax
                maxSeats = lowFare.get('availableCount')
                cabin = lowFare.get('bookingClasses')[0]
                isChange = 1
                segments = dict(
                    flightNumber=flightNumber,
                    aircraftType=aircraftType,
                    number=1,
                    departureTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(depTime)),
                    destinationTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(arrTime)),
                    airline=carrier,
                    dep=depAirport,
                    dest=arrAirport,
                    seats=maxSeats,
                    duration=dataUtil.gen_duration(depTime, arrTime),
                    depTerminal=''
                )
                getTime = time.time()

                item = WowSpiderItem()
                item['flightNumber'] = flightNumber
                item['depTime'] = depTime
                item['arrTime'] = arrTime
                item['fromCity'] = self.portCitys.get(depAirport, depAirport)
                item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
                item['depAirport'] = depAirport
                item['arrAirport'] = arrAirport
                item['currency'] = currency
                item['adultPrice'] = adultPrice
                item['adultTax'] = adultTax
                item['netFare'] = netFare
                item['maxSeats'] = maxSeats
                item['cabin'] = cabin
                item['carrier'] = carrier
                item['isChange'] = isChange
                item['segments'] = '[]'
                item['getTime'] = getTime
                yield item
Exemplo n.º 2
0
    def parse(self, response):
        # print(response.text)
        # self.isOK = True
        # print len(response.text)
        if len(response.text) == 0:
            print "not flight"
            return
        data_dict = json.loads(response.text)
        #特殊的结果
        days_data = data_dict.get('AirAvailabilityData')
        if type(days_data) == dict:
            days_data = [data_dict.get('AirAvailabilityData').get('1')]

        for day_data in days_data:
            #判断中转
            flight = day_data.get('flight')

            if len(flight) > 1:
                print "is change"
                continue

            flight_segment = flight[0]
            carrier = flight_segment.get('Carrier')
            flightNumber = carrier + flight_segment.get('Flight')
            deptime_tuple = time.strptime(flight_segment.get('Depart'), '%Y-%m-%dT%H:%M:00')
            depTime = time.mktime(deptime_tuple)
            arrtime_tuple = time.strptime(flight_segment.get('Arrivee'), '%Y-%m-%dT%H:%M:00')
            arrTime = time.mktime(arrtime_tuple)
            depAirport = flight_segment.get('From')
            arrAirport = flight_segment.get('To')
            maxSeats = int(flight_segment.get('Stock'))

            adultPrice = day_data.get('prix') / self.custom_settings.get("SEAT") - 76
            currency = day_data.get('deviseGet')




            adultTax = 0
            netFare = adultPrice-adultTax
            cabin = 'X'
            isChange = 1
            getTime = time.time()


            #增加套餐价格
            price_dict = {
                'LIGHT': 0,
                'EASY': 0,
                'FLEX': 0,
            }
            if adultPrice != 0:
                price_dict['EASY'] = adultPrice + 76
                price_dict['FLEX'] = adultPrice + 76 + 87
            segments = [
                [price_dict.get('EASY'), maxSeats],
                [price_dict.get('FLEX'), maxSeats],
            ]

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = json.dumps(segments)
            item['getTime'] = getTime
            yield item
Exemplo n.º 3
0
    def parse(self, response):
        self.isOK = True
        # print('*'*50)
        # print response.text
        try:
            json_dict = json.loads(response.body)
        except:
            # 出现503错误,重新进行请求
            meta_data = response.meta.get('meta_data')
            # print '503 error'
            yield scrapy.Request(self.start_urls[0],
                                 callback=self.parse,
                                 method='POST',
                                 meta={'meta_data': meta_data},
                                 body=json.dumps(meta_data.get('payload')),
                                 errback=self.errback)
            return
        try:
            flight_data = json_dict.get('flights')[0]
        except:
            print 'no airport', response.meta.get('invalid')
            return
        arrAirport = flight_data.get('destination')
        depAirport = flight_data.get('origin')
        currency = flight_data.get('currencyCode')
        flights = flight_data.get('flights')
        # 循环遍历航班
        for flight in flights:
            # 判断中转,之前中转判断有误,增加对sellkey的判断
            sell_Key = flight.get('fares')[0].get('sellKey')
            if sell_Key:
                if len(sell_Key.split('^')) >= 2:
                    # print('is Change')
                    continue
            if flight.get('stops') > 0:
                continue
            carrier = flight.get('carrierCode')
            flightNumber = '%s%s' % (carrier, flight.get('flightNumber'))
            deptime = time.strptime(flight.get('std'), '%Y-%m-%d %H:%M:%S')
            depTime = time.mktime(deptime)
            arrtime = time.strptime(flight.get('sta'), '%Y-%m-%d %H:%M:%S')
            arrTime = time.mktime(arrtime)
            maxSeats = self.custom_settings.get('SEAT')
            isChange = 1
            # 存在两种票价
            fares = flight.get('fares')
            adultPrice, adultTax, netFare, cabin = sys.maxint, 0, 0, 0
            for fare in fares:
                price = fare.get('total')
                if price > adultPrice:
                    continue
                adultPrice = price
                adultTax = fare.get('tax')
                netFare = fare.get('base')
                cabin = fare.get('fareClass')
                info = {'farekey': fare.get('sellKey')}

                #增加套餐价格,先定义价格表
                price_dict = {
                    'Express': 0,
                }
                for fare in fares:
                    if fare.get('name') == 'Express':
                        price_dict['Express'] = fare.get('total')
                segments = [[price_dict.get('Express'), maxSeats]]
                getTime = time.time()

                item = WowSpiderItem()
                item['flightNumber'] = flightNumber
                item['depTime'] = depTime
                item['arrTime'] = arrTime
                item['fromCity'] = self.portCitys.get(depAirport, depAirport)
                item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
                item['depAirport'] = depAirport
                item['arrAirport'] = arrAirport
                item['currency'] = currency
                item['adultPrice'] = adultPrice
                item['adultTax'] = adultTax
                item['netFare'] = netFare
                item['maxSeats'] = maxSeats
                item['cabin'] = cabin
                item['carrier'] = carrier
                item['isChange'] = isChange
                item['segments'] = json.dumps(segments)
                item['getTime'] = getTime
                item['info'] = json.dumps(info)
                yield item
Exemplo n.º 4
0
    def parse(self, response):
        # print(response.text)
        # print('6'*66)
        self.isOK = True
        error = response.xpath('//title/text()')[0].extract()
        if error == 'Internal Server Error':
            self.isOK = False
            print(error)
            # print(response.text)
            yield scrapy.Request(self.start_urls[0],
                                 method='POST',
                                 headers=self.custom_settings.get('HEADERS'),
                                 body=response.meta.get('meta_data').get('payload'),
                                 callback=self.parse,
                                 meta={'meta_data': response.meta.get('meta_data')},
                                 errback=self.errback)
            return
        flights = response.xpath('//div[@id="tbl-depart-flights"]/div[@class="flight-item"]')
        #当天没有航班加失效
        if len(flights) == 0:
            invalid = response.meta.get('meta_data').get('invalid')
            self.task.append(invalid)
            print('no flight')
            return
        # print('\n'*5)
        # print(response.body)
        # print('\n'*5)
        # print('7' * 66)
        # price = flights[0].xpath('.//td[@class="promo"]//*[@class="price"]')
        # adultPrice_str = price.xpath('./span[@class="fare-price"]/text()').extract()
        # print(flights)
        for flight in flights:
            flight_data = flight.xpath('.//*[@class="detail"]/a')
            #当天没有航班
            # print('7' * 66)
            if not flight_data:
                invalid = response.meta.get('meta_data').get('invalid')
                self.task.append(invalid)
                print('No flight to day')
                return
            # print(type(flight_data[0]))
            flight_dict =flight_data[0]
            carrier = flight_dict.xpath('./@carriercode').extract()[0]
            if len(carrier.split(',')) > 1:
                print('is change')
                continue
            flightNumber = carrier + str(flight_dict.xpath('./@flightnumber').extract()[0])

            # dt = response.meta.get('meta_data').get('flight_time')
            # dt_str = flight.xpath('.//td[@class="promo"]//@std').extract()[0]
            dt = response.meta.get('meta_data').get('flight_time')
            dep_dt_str = flight_dict.xpath('./@departuretime').extract()[0]
            dep_dt = dt + 'T' + dep_dt_str
            dep_tupletime = time.strptime(dep_dt, '%Y-%m-%dT%H:%M')
            depTime = time.mktime(dep_tupletime)
            # 判断第二天的情况
            arr_dt_str = flight_dict.xpath('./@arrivaltime').extract()[0]
            if int(dep_dt_str.split(':')[0]) > int(arr_dt_str.split(':')[0]):
                arr_dt = pubUtil.time_add_num(dt, 1) + 'T' + arr_dt_str
            else:
                arr_dt = dt + 'T' + arr_dt_str
            arr_tupletime = time.strptime(arr_dt, '%Y-%m-%dT%H:%M')
            arrTime = time.mktime(arr_tupletime)
            depAirport = flight_dict.xpath('./@departure').extract()[0]
            arrAirport = flight_dict.xpath('./@arrival').extract()[0]
            aircraftType = flight_dict.xpath('./@equipmenttype').extract()[0]

            duration = dataUtil.time_standard(flight_dict.xpath('./@traveltime').extract()[0])
            # print('9' * 66)
            # print(flightNumber, depTime, arrTime, depAirport, arrAirport)

            # self.attrib(flight_data)

            price = flight.xpath('.//*[@class="flight-price"]')
            if not price:
                print('no seat')
                adultPrice_str = ''
            else:
                adultPrice_str = price[1].xpath('./span[@class="fare-price"]/text()').extract()

            #当没有座位时
            if not adultPrice_str:
                adultPrice = 0
                currency = 'RPG'
                netFare = 0
                adultTax = 0
                maxSeats =0
                print('no seat')
            else:
                # adultPrice = float(adultPrice_str[0].replace('.','').replace(',','.'))
                currency_unit = price.xpath('normalize-space(./span[@class="currency"]/text())').extract()[0]
                # print(currency_unit)
                if currency_unit == '$':
                    adultPrice = float(adultPrice_str[0])
                else:
                    adultPrice = float(adultPrice_str[0].replace('.', '').replace(',', '.'))

                adultTax = 0
                netFare = adultPrice - adultTax
                # print(price.xpath('normalize-space(./span[@class="currency"]/text())').extract())
                currency = self.custom_settings.get('CURRENCY_CACHE').get(currency_unit,currency_unit)
                # 目前暂未发现座位,使用请求的座位
                maxSeats = self.custom_settings.get('SEAT')

            isChange = 1
            cabin= 'X'

            segments = dict(
                flightNumber=flightNumber,
                aircraftType=aircraftType,
                number=1,
                departureTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(depTime)),
                destinationTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(arrTime)),
                airline=carrier,
                dep=depAirport,
                dest=arrAirport,
                seats=maxSeats,
                duration=duration,
                depTerminal=''
            )
            getTime = time.time()

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = '[]'
            item['getTime'] = getTime
            yield item
Exemplo n.º 5
0
    def parse(self, response):
        # print('*'*50)
        # print response.text
        # 数据存储在meta里,在middleware有解释
        res = response.request.meta.get('response')
        # if res.status_code == 502:
        #     #
        #     return
        try:
            flight_list = re.compile("console.log\(\[(.*?)\]\);").findall(res.text)
        except:
            try:
                log_mail.log_mail('TRcookie:失效'%response.request.headers.get('Cookie'))
            except:
                pass
            # 全局更换cookie
            cookie = random.choice(self.custom_settings.get('COOKIE_LIST'))
            self.custom_settings.get('HEADERS')['Cookie'] = 'kuhang_=%s;' % cookie
            print '之前cookie失效,使用cookie:%s进行访问' % cookie
            meta_data = response.meta.get('meta_data')
            yield scrapy.Request(self.start_urls[0],
                                 callback=self.parse,
                                 headers=self.custom_settings.get('HEADERS'),
                                 method='POST',
                                 meta={'meta_data': meta_data},
                                 body=meta_data.get('data'),
                                 errback=self.errback)
            return

        if len(flight_list) == 0:
            # 当天无航班
            return

        for i in flight_list:
            flight = json.loads('[' + i + ']')

            # 判断中转
            if len(flight) > 1:
                continue
            flight = flight[0]
            arrAirport = flight.get('ArrivalStation')
            depAirport = flight.get('DepartureStation')
            carrier = flight.get('CarrierCode')
            flightNumber = '%s%s' % (carrier, flight.get('FlightNumber').replace(' ', ''))
            deptime = time.strptime(flight.get('STD'), '%Y-%m-%dT%H:%M:%S')
            depTime = time.mktime(deptime)
            arrtime = time.strptime(flight.get('STA'), '%Y-%m-%dT%H:%M:%S')
            arrTime = time.mktime(arrtime)

            isChange = 1
            # 存在多种票价
            fares = flight.get('Fares')
            # 增加套餐价格,先定义价格表
            price_dict = {
                'E1': [0, 0],
                'E2': [0, 0],
                'E3': [0, 0],
                'J': [0, 0],
            }
            adultPrice, adultTax, netFare, cabin, maxSeats, currency = 0, 0, 0, 'X', 0, None
            for key in fares.keys():
                # 获取不同套餐的价格
                adult_Tax, net_Fare = 0, 0
                fare = fares.get(key)
                prices = fare.get('PaxFare').get('ADT').get('BookingServiceCharge')
                for price in prices:
                    # 获取税价和净票价
                    if price.get('ChargeType') == 'FarePrice':
                        net_Fare = price.get('Amount')
                    else:
                        adult_Tax = adult_Tax + price.get('Amount')
                    if currency:
                        if currency != price.get('CurrencyCode'):
                            break
                    else:
                        currency = price.get('CurrencyCode')
                try:
                    adult_Price = net_Fare + adult_Tax
                except:
                    print net_Fare, adult_Tax, json.dumps(prices)
                    traceback.print_exc()
                    print '6'*66, json.dumps(fares), '6'*66,
                    return
                cabin_ = fare.get('FareBasisCode')[0]
                maxSeats = fare.get('AvailableCount')
                price_dict[key] = [adult_Price, maxSeats]
                if key == 'E1':
                    adultPrice, adultTax, netFare,cabin = adult_Price, adult_Tax, net_Fare, cabin_
                # info = {'farekey': fare.get('FareSellKey')}

            segments = [
                [x for x in price_dict.get('E2')],
                [x for x in price_dict.get('E3')],
                # [x for x in price_dict.get('J')],
            ]
            getTime = time.time()

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = json.dumps(segments)
            item['getTime'] = getTime
            # item['info'] = json.dumps(info)
            yield item
Exemplo n.º 6
0
    def parse(self, response):
        # print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++====')
        self.isOK = True
        parrten = re.compile(r'config\s:\s([\s\S]*), pageEngine : pageEngine')
        data = parrten.findall(response.text)
        if not len(data):
            print('data not')
            print('response:' + response.text)
            # print(response.text)
            return
        data = data[0].strip('\n ')
        dict_data = json.loads(data)
        Availability = jsonpath(dict_data, '$..Availability')
        if not Availability:
            self.task.append(response.meta.get('invalid'))
            return

        # 航班列表
        flight_list = jsonpath(Availability[0], '$..proposedFlightsGroup')[0]
        # 获取当天每个航班信息
        for flight_data in flight_list:
            # 是否中转,不确定后期调整
            if len(flight_data.get('segments')) > 1:
                continue
            # 航班号
            flightNumber = jsonpath(flight_data,
                                    '$..airline')[0].get('code') + jsonpath(
                                        flight_data, '$..flightNumber')[0]
            deptime = time.strptime(
                jsonpath(flight_data, '$..beginDate')[0],
                '%b %d, %Y %I:%M:%S %p')
            # 出发时间
            depTime = time.mktime(deptime)
            arrtime = time.strptime(
                jsonpath(flight_data, '$..endDate')[0],
                '%b %d, %Y %I:%M:%S %p')
            # 到达时间
            arrTime = time.mktime(arrtime)
            # 出发城市代码
            fromCity = jsonpath(flight_data,
                                '$..beginLocation')[0].get('cityCode')
            # 到达城市代码
            toCity = jsonpath(flight_data, '$..endLocation')[0].get('cityCode')
            # 出发机场代码
            depAirport = jsonpath(flight_data,
                                  '$..beginLocation')[0].get('locationCode')
            # 到达机场代码
            arrAirport = jsonpath(flight_data,
                                  '$..endLocation')[0].get('locationCode')
            # 货币种类
            currency = jsonpath(Availability, '$..currencyBean')[0].get('code')

            final_price, adultPrice, adultTax, netFare = sys.maxint, 0, 0, 0
            maxSeats, cabin = 0, ''
            # 分类价格,获取当天每个航班座位数对比
            flight_id = flight_data.get('proposedBoundId')
            for recommendation in jsonpath(Availability,
                                           '$..recommendationList')[0]:
                # 价格
                price = jsonpath(recommendation, '$..boundAmount')[0]
                price_current = price.get('totalAmount')
                if price_current >= final_price:
                    continue
                for flightGroup in jsonpath(recommendation,
                                            '$..flightGroupList')[0]:
                    # 座位数,舱位
                    if flightGroup.get('flightId') == flight_id:
                        # 含税价
                        adultPrice = price.get('totalAmount') / 3
                        final_price = price_current
                        # 税价
                        adultTax = price.get('tax') / 3
                        # 净票价
                        netFare = price.get('amountWithoutTaxAndFee') / 3
                        maxSeats = flightGroup.get('numberOfSeatsLeft')
                        cabin = flightGroup.get('rbd')

            # 航司二字码
            carrier = jsonpath(flight_data, '$..airline')[0].get('code')
            isChange = len(flight_data.get('segments'))
            flightTime = jsonpath(flight_data, '$..flightTime')[0]
            # 航段信息
            segments = dict(
                flightNumber=flightNumber,
                aircraftType=jsonpath(flight_data,
                                      '$..equipment')[0].get('code'),
                number=1,
                departureTime=time.strftime('%Y-%m-%d %H:%M:%S', deptime),
                destinationTime=time.strftime('%Y-%m-%d %H:%M:%S', arrtime),
                airline=carrier,
                dep=depAirport,
                dest=arrAirport,
                seats=maxSeats,
                duration='%02d:%02d' %
                (flightTime / 60000, flightTime % 60000),
                depTerminal='')
            getTime = time.time()

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, fromCity)
            item['toCity'] = self.portCitys.get(arrAirport, toCity)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = segments
            item['getTime'] = getTime

            yield item
Exemplo n.º 7
0
    def parse(self, response):
        self.isOK = True
        # print('*'*50)
        # print response.text
        # return
        try:
            json_dict = json.loads(response.body)
        except:
            print response.text
            # 出现503错误,重新进行请求
            # meta_data = response.meta.get('meta_data')
            # print '503 error'
            # yield scrapy.Request(self.start_urls[0],
            #                      callback=self.parse,
            #                      method='POST',
            #                      meta={'meta_data': meta_data},
            #                      body=json.dumps(meta_data.get('payload')),
            #                      errback=self.errback)
            return
        flights = json_dict.get('Route')
        if not flights:
            return
        # 循环遍历航班
        for flight in flights:
            # 判断中转
            if len(flight) > 1:
                continue
            flight_data = flight[0]
            depAirport = flight_data.get('DepartureAirportCode')
            arrAirport = flight_data.get('ArrivalAirportCode')
            # 取不到货币,暂定CNY
            currency = 'CNY'
            flightNumber = flight_data.get('No')
            carrier = flightNumber[:2]
            deptime = time.strptime(flight_data.get('DepartureTime'),
                                    '%Y-%m-%d %H:%M:%S')
            depTime = time.mktime(deptime)
            arrtime = time.strptime(flight_data.get('ArrivalTime'),
                                    '%Y-%m-%d %H:%M:%S')
            arrTime = time.mktime(arrtime)
            isChange = 1
            getTime = time.time()
            cabin, adultTax, adultPrice, maxSeats, netFare = 'X', 0, 0, 0, 0
            for flight_price in flight_data.get('AircraftCabins'):
                cabin = flight_price.get('CabinLevel')
                price_and_seat = flight_price.get('AircraftCabinInfos')[0]
                adultTax = price_and_seat.get(
                    'AirportConstructionFees') + price_and_seat.get(
                        'FuelSurcharge') + price_and_seat.get('OtherFees')
                netFare = price_and_seat.get('Price')
                adultPrice = adultTax + netFare
                maxSeats = price_and_seat.get('Remain')
                if maxSeats != -1:
                    break

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = json.dumps([])
            item['getTime'] = getTime
            item['info'] = json.dumps([])
            yield item
Exemplo n.º 8
0
    def parse(self, response):
        self.isCOOKIE = True
        self.isOK = True
        # print('----------------------------')
        # print(response.text)
        #把当天的航班信息和价格信息分类
        time_numbers = response.xpath('//td[contains(@class,"fl_date")]')
        prices = response.xpath(
            '//td[contains(@class,"outward-total-fare-td")]')
        # print('-'*50)
        # print(response.text)
        # print('+' * 50)
        if not time_numbers:
            # print('!'*30)
            # print '%s' % (response.text.decode('utf-8').encode('gbk', 'ignore'))
            # self.log("no data",40)
            try:
                page = response.xpath('//h2/text()')[0].extract()
                # print(page)
                # print(response.status)
                # # print response.text
                # # proxy_invalid = response.xpath('//td[4]/text()')[0].extract()
                # # print proxy_invalid
                # if response.status == 404:
                #     self.isOK = False
                #     yield scrapy.Request(self.start_urls[0],
                #                          method='POST',
                #                          headers=self.custom_settings.get('headers'),
                #                          body=response.meta.get('meta_data').get('form'),
                #                          callback=self.parse,
                #                          meta={'meta_data': response.meta.get('meta_data')},
                #                          errback=self.errback)
                if page == 'Are you human?':
                    self.isCOOKIE = False
                    yield scrapy.Request(
                        self.start_urls[0],
                        method='POST',
                        headers=self.custom_settings.get('headers'),
                        body=response.meta.get('meta_data').get('form'),
                        callback=self.parse,
                        meta={'meta_data': response.meta.get('meta_data')},
                        errback=self.errback)
                self.task.append(response.meta.get('meta_data').get('invalid'))
            except:
                self.log("no data", 10)
                self.task.append(response.meta.get('meta_data').get('invalid'))
            return

        #循环取出每个航班信息,year指的是航班的年份
        year = response.meta.get('meta_data').get('year')
        # print(len(time_numbers))

        for i in range(len(time_numbers)):
            #取出当次的航班信息,出发时间,到达时间,航班号
            time_number = time_numbers[i].xpath('./span/text()').extract()
            # 出发时间
            deptime = time.strptime(year + time_number[0], '%Y%m/%d\xa0%H:%M')
            depTime = time.mktime(deptime)
            # 到达时间
            arrtime = time.strptime(year + time_number[1], '%Y%m/%d\xa0%H:%M')
            arrTime = time.mktime(arrtime)
            # 航班号
            flightNumber = time_number[2]
            carrier = re.search('\D{2}', time_number[2]).group()

            #根据n定位当前航班价格,总价除以maxSeats为单人价格
            maxSeats = response.meta.get('meta_data').get('maxSeats')
            try:
                price = prices[i].xpath(
                    './div[@id="outward_hp_' + str(i + 1) +
                    '_total_fare"]//span/text()').extract()
            except:
                #这种情况是ip有问题,得到数据是错误的
                self.log('Dangerous error data....', 40)
                self.isOK = False
            if price[3] == '0':
                price = prices[i].xpath(
                    '//div[@id="outward_hpp_' + str(i + 1) +
                    '_total_fare"]//span/text()').extract()
            if price[3] == '0':
                price = prices[i].xpath(
                    '//div[@id="outward_prime_' + str(i + 1) +
                    '_total_fare"]//span/text()').extract()
            if price[3] == '0':
                self.task.append(response.meta.get('meta_data').get('invalid'))
                continue
            # print(price)
            # 取价格

            netFare = int(
                re.search(r"\d.*", price[0]).group().replace(',',
                                                             '')) / maxSeats
            adultTax = int(
                re.search(r"\d.*", price[1]).group().replace(',',
                                                             '')) / maxSeats
            #增加价格打折的判断
            promo = response.xpath('//td[@id="outward_hp_' + str(i + 1) +
                                   '_list"]/@class').extract()[0].split(' ')
            if promo[-1] == 'promo':
                adultPrice = int(
                    re.search(r"\d.*", price[3]).group().replace(
                        ',', '')) / maxSeats / 0.7
                cabin = 'S'
            else:
                cabin = 'X'
                adultPrice = int(
                    re.search(r"\d.*", price[3]).group().replace(
                        ',', '')) / maxSeats
            # 判断网页信息是否虚假
            if not price[2]:
                return
            currency = self.custom_settings.get('CURRENCY_CACHE').get(price[2])

            depAirport = response.meta.get('meta_data').get('invalid').get(
                'depAirport')
            arrAirport = response.meta.get('meta_data').get('invalid').get(
                'arrAirport')

            isChange = 1
            segments = dict(
                flightNumber=flightNumber,
                aircraftType='',
                number=1,
                departureTime=time.strftime('%Y-%m-%d %H:%M:%S',
                                            time.localtime(depTime)),
                destinationTime=time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(arrTime)),
                airline=carrier,
                dep=depAirport,
                dest=arrAirport,
                seats=int(maxSeats),
                duration=dataUtil.gen_duration(depTime, arrTime),
                depTerminal='')
            getTime = time.time()

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = '[]'
            item['getTime'] = getTime
            yield item
Exemplo n.º 9
0
    def parse(self, response):
        # print(response.text)
        self.isOK = True
        results = response.xpath("//div[@class='results-list']/div[@*]")
        # print(results)
        #分析每个航班
        for result in results:
            #判断中转
            fare_amenities = result.xpath('.//div[@class="fare-amenities"]')
            if len(fare_amenities) > 1:
                # print('is change')
                continue
            #航司网页没显示,设置默认
            carrier ="B6"
            # print(fare_amenities.extract())
            flight_number = str(fare_amenities.xpath('normalize-space(./ul/li[1]//text()[3])').extract()[0])
            # 去掉航班号的**
            flight_number = re.compile("\d+").search(flight_number)
            if not flight_number:
                invalid = response.meta.get('meta_data').get('invalid')
                self.task.append(invalid)
                print("no flight")
                continue

            flightNumber = carrier + flight_number.group()
            data_summary = result.xpath('./div[1]/ul/li')
            dep_time_airport = data_summary[0].xpath('./span/text()').extract()
            depAirport = dep_time_airport[1]
            dt = response.meta.get('meta_data').get('flight_time')
            dep_dt = dt + 'T' + dep_time_airport[0]
            dep_tupletime = time.strptime(dep_dt, '%Y-%m-%dT%I:%M %p')
            depTime = time.mktime(dep_tupletime)

            arr_time_airport = data_summary[2].xpath('./span/text()').extract()
            arrAirport = arr_time_airport[1]
            if arr_time_airport[0][-2:] == "+1":

                # print arr_time_airport[0],response.meta.get('meta_data').get('aaa')
                arr_dt =  pubUtil.time_add_num(dt, 1) + 'T' + arr_time_airport[0][:-2]
            else:
                arr_dt = dt + 'T' + arr_time_airport[0]
            arr_tupletime = time.strptime(arr_dt, '%Y-%m-%dT%I:%M %p')
            arrTime = time.mktime(arr_tupletime)
            price = data_summary[4].xpath('./span[2]/text()').extract()
            if not price:
                currency = "RPG"
                adultPrice = 0
                maxSeats = 0
            else:
                currency = self.custom_settings.get('CURRENCY_CACHE').get(price[0][0],price[0][0])
                adultPrice = float(price[0][1:])
                maxSeats = self.custom_settings.get('SEAT')
            adultTax = 0
            netFare = adultPrice-adultTax
            cabin = 'X'
            isChange = 1
            getTime = time.time()

            #增加套餐价
            price_dict = {
                'Blue': 0,
                'Blue Plus': 0,
                'Blue Flex': 0,
            }
            # aaa = result.xpath('.//div[@class="fare-row BN"]//div[@style="font-size: 18px; margin-top: 3px;"]/text()').extract()
            # print(aaa)
            plus_price = result.xpath('normalize-space(.//div[@class="fare-row CN ribbon"]//div[@style="font-size: 18px; margin-top: 3px;"]/text())').extract()[0][1:]
            if plus_price.replace('.','').isnumeric():
                price_dict['Blue Plus'] = float(plus_price)
            flex_price = result.xpath('normalize-space(.//div[@class="fare-row BN "]//div[@style="font-size: 18px; margin-top: 3px;"]/text())').extract()[0][1:]
            if flex_price.replace('.','').isnumeric():
                price_dict['Blue Flex'] = float(flex_price)
            segments = [[price_dict.get('Blue Plus'),maxSeats],[price_dict.get('Blue Flex'),maxSeats]]
            # print(segments)

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = json.dumps(segments)
            item['getTime'] = getTime
            # flight_number_time = str(time.strftime('%Y-%m-%dT%H:%M',dep_tupletime)) + flightNumber
            # if not self.data_task.get(flight_number_time):
            #     self.data_task[flight_number_time] = adultPrice
            #     print self.data_task
            yield item
Exemplo n.º 10
0
    def parse(self, response):
        # print(response.text)
        data = json.loads(response.body)
        try:
            flights = data.get('segments')[0].get('flights')
        except:
            # self.task.append(response.meta.get('invalid'))
            # print(response.text)
            # traceback.print_exc()
            print('6' * 66)
            # print(response.meta.get('meta_data').get('aaa'))
            # print('6' * 66)
            # meta_data = response.meta.get('meta_data')
            # yield scrapy.Request(self.start_urls[0],
            #                      callback=self.parse,
            #                      method='POST',
            #                      headers=self.custom_settings.get('HEADERS'),
            #                      meta={'meta_data': meta_data},
            #                      body=meta_data.get('payload'),
            #                      errback=self.errback
            #                      )
            return
        #判断是否无航班
        if not flights:
            print('No flight to day')
            # print('2'*66)
            self.task.append(response.meta.get('meta_data').get('invalid'))
            return
        # print(len(flights))
        # print(response.text)
        for flight in flights:
            #先判断是否中转
            legs = flight.get('legs')
            if len(flight.get('stops')) > 0 or len(legs) > 1:
                print('is change:%s'%len(flight.get('stops')) )
                # print(len(legs)  )
                continue
            # print('---'*20)
            leg =legs[0]
            carrier = leg.get('operatingCarrier')
            flightNumber = '%s%s' % (carrier, leg.get('marketingFlightNum'))
            dep_tupletime = time.strptime(leg.get('departureDate'), '%Y-%m-%dT%H:%M:%S')
            depTime = time.mktime(dep_tupletime)
            arr_tupletime = time.strptime(leg.get('arrivalDate'), '%Y-%m-%dT%H:%M:%S')
            arrTime = time.mktime(arr_tupletime)
            duration = leg.get('flightDuration')
            depAirport = leg.get('origin')
            arrAirport = leg.get('destination')
            aircraftType = leg.get('equipmentType')

            fareTypes = flight.get('fareTypes')
            final_price = sys.maxint
            fareType = {}
            for fareType_data in fareTypes:
                try:
                    adultPrice = float(jsonpath(fareType_data,'$..adultFarePerPax')[0].replace(',',''))
                except:
                    # traceback.print_exc()
                    # self.task.append(response.meta.get('meta_data').get('invalid'))
                    continue
                if adultPrice >= final_price:
                    continue
                final_price = adultPrice
                fareType =fareType_data
            adultPrice = final_price
            if not fareType:
                # print('1'*66)
                self.task.append(response.meta.get('meta_data').get('invalid'))
                continue
            currency = jsonpath(fareType,'$..currencyCode')[0]
            adultTax = float(jsonpath(fareType,'$..taxPerPax')[0].replace(',',''))
            netFare = float(jsonpath(fareType,'$..baseAdultFarePerPax')[0].replace(',',''))
            cabin = jsonpath(fareType,'$..fareClass')[0]
            #目前暂未发现座位,使用请求的座位
            maxSeats = self.custom_settings.get('SEAT')
            isChange = 1

            segments = dict(
                flightNumber=flightNumber,
                aircraftType=aircraftType,
                number=1,
                departureTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(depTime)),
                destinationTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(arrTime)),
                airline=carrier,
                dep=depAirport,
                dest=arrAirport,
                seats=maxSeats,
                duration=duration,
                depTerminal=''
            )
            getTime = time.time()

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = '[]'
            item['getTime'] = getTime
            yield item
Exemplo n.º 11
0
    def parse(self, response):
        # print('6'*66)
        flights = response.xpath('//*[@data-validation-prefix="Please choose your outbound flight."]')
        # print('1'*66)
        if not flights:
            print('No flight to day')
            return
        # print('2' * 66)
        # print(len(flights))
        for i in range(len(flights)):
            #判断是否中转
            # change = flights[i].xpath('.//*[@class="flight-information float-left"]/text()').extract()
            change = flights[i].xpath('.//*[@class="layover"]').extract()
            # print(change)
            # if len(change) > 2:
            if change:
                # print('3' * 66)
                print('is change')
                continue
            # print('4'*66)
            flight_number = flights[i].xpath('.//*[@class="flight-information float-left"]/text()').extract()[1]
            # print(flight_number)
            flight_numberlist = re.compile(r'\S+').findall(flight_number)
            flightNumber = ''.join(flight_numberlist)
            dep_time_airport = flights[i].xpath('normalize-space(.//*[@class="departure-time float-left"])').extract()[0].split(' ')
            # print(dep_time_airport)
            dt = response.meta.get('meta_data').get('flight_time')
            dep_dt =dt + 'T' + dep_time_airport[0]
            dep_tupletime = time.strptime(dep_dt, '%Y-%m-%dT%H:%M')
            depTime = time.mktime(dep_tupletime)
            depAirport = dep_time_airport[-1]
            #判断是第二天的情况
            arr_time_airport = flights[i].xpath('normalize-space(.//*[@class="arrival-time float-left"])').extract()[0].split(' ')
            if flights[i].xpath('normalize-space(.//*[@class="time-offset"])').extract()[0]:
                arr_dt = pubUtil.time_add_num(dt,1) + 'T' + arr_time_airport[0]
            else:
                arr_dt = dt + 'T' + arr_time_airport[0]
            arr_tupletime = time.strptime(arr_dt, '%Y-%m-%dT%H:%M')
            arrTime = time.mktime(arr_tupletime)
            arrAirport = arr_time_airport[-1]

            #票卖完了
            try:
                # fare_currency = flights[i].xpath('.//*[@name="selectedHiddenFarepos_0"]/@value').extract()[0].split(':')
                fare_currency = flights[i].xpath('normalize-space(.//*[@class="book-inner"]/text())').extract()[0].split(' ')

                adultPrice = float(fare_currency[0].replace('.','').replace(',','.'))
                adultTax = 0
                netFare = adultPrice - adultTax
                currency = fare_currency[-1]
                # currency = re.compile(r'_(\w+)').findall(fare_currency[2])[-1].split('_')[-1]
            except:
                # print(flights[i].xpath('normalize-space(.//*[@class="book-inner"])'))
                # # print(flights[i].xpath('.//*[@name="selectedHiddenFarepos_0"]/@value').extract())
                # print(response.meta.get('meta_data').get('aaa'))
                # print(dep_dt)
                print('flight invalid')
                # self.task.append(response.meta.get('meta_data').get('invalid'))
                adultPrice = 0
                netFare = 0
                adultTax = 0
                currency = 'A'
                # print('4' * 66)
                # traceback.print_exc()
                # continue

            seat = flights[i].xpath('normalize-space(.//*[@class="seats-left"])').extract()[-1]
            seat_num = re.compile(r'\d+').search(seat)

            # print('---------------------------seat:%s%s-------------------------'%(seat_num,seat))
            if not seat_num:
                #没有时座位比较多
                maxSeats = 9
            else:
                maxSeats = seat_num.group()
            if adultPrice == 0:
                maxSeats = 0
            isChange = 1
            aircraftType = ''
            cabin = 'X'
            carrier = flight_numberlist[0]
            duration_data = flights[i].xpath('.//*[@class="duration float-left"]/text()').extract()[0]
            duration = ':'.join(re.compile(r'\d+').findall(duration_data))

            segments = dict(
                flightNumber=flightNumber,
                aircraftType=aircraftType,
                number=1,
                departureTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(depTime)),
                destinationTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(arrTime)),
                airline=carrier,
                dep=depAirport,
                dest=arrAirport,
                seats=maxSeats,
                duration=duration,
                depTerminal=''
            )
            getTime = time.time()

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = '[]'
            item['getTime'] = getTime
            yield item
Exemplo n.º 12
0
    def parse(self, response):
        # print(response.text)
        data = json.loads(response.text).get('data')
        # flights = data.get('flights')
        try:
            currency = data.get('currency')
        except:
            print response.text
            traceback.print_exc()
            return
        for flight in data.get('flights'):
            #当天没有航班设定失效
            if not flight:
                invalid = response.meta.get('invalid')
                self.task.append(invalid)
                # print("no flight")
                continue
            #先判断中转
            flight_segments = flight.get('segments')
            if len(flight_segments) > 1:
                continue
            flight_segment = flight_segments[0]
            carrier = flight_segment.get('airline').get('code')
            flightNumber = flight_segment.get('flightCode')
            departure = flight_segment.get('departure')
            deptime_tuple = time.strptime(
                departure.get('dateTime')[:-6], '%Y-%m-%dT%H:%M')
            depTime = time.mktime(deptime_tuple)
            arrival = flight_segment.get('arrival')
            arrtime_tuple = time.strptime(
                arrival.get('dateTime')[:-6], '%Y-%m-%dT%H:%M')
            arrTime = time.mktime(arrtime_tuple)
            depAirport = departure.get('airportCode')
            arrAirport = arrival.get('airportCode')

            #选取最低票价,在列表里面,暂时先不做
            fares = flight.get('cabins')[0].get('fares')
            cabin = fares[0].get('code')
            fare = fares[0].get('price').get('adult')
            adultPrice = fare.get('total')
            adultTax = fare.get('taxAndFees')
            netFare = fare.get('amountWithoutTax')

            isChange = 1
            getTime = time.time()
            maxSeats = self.custom_settings.get('SEAT')

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = '123:123:1223'
            item['getTime'] = getTime
            yield item
Exemplo n.º 13
0
    def parse(self, response):
        # print response.text
        self.isOK = True
        data = json.loads(response.text)
        self.isToken = True
        response_code = data.get('ResponseCode')
        if response_code in ["-1", "100"]:
            # 无效机场
            if data.get('Message') != "Session Token authentication failure.":
                print "invalid station:%s" % data.get('Message')
                return
            self.isToken = False
            yield scrapy.Request(
                response.meta.get("meta_data").get('url'),
                callback=self.parse,
                method='GET',
                headers=self.custom_settings.get('HEADERS'),
                meta={'meta_data': response.meta.get('meta_data')},
                errback=self.errback)
            return
        # 没有数据错误,换代理
        if response_code in ["12"]:
            self.isOK = False
            print "proxy invalid"
            yield scrapy.Request(
                response.meta.get("meta_data").get('url'),
                callback=self.parse,
                method='GET',
                headers=self.custom_settings.get('HEADERS'),
                meta={'meta_data': response.meta.get('meta_data')},
                errback=self.errback)
            return
        schedules = data.get('SchedulesIj')
        #判断没有航班时
        if not schedules:
            print("not flight today")
            # print response.text
            invalid = response.meta.get("meta_data").get("invalid")
            self.task.append(invalid)
            return
        journeys = schedules[0].get('JourneysIj')
        for journey in journeys:
            #判断中转
            flight_segments = journey.get('SegmentsIj')
            if len(flight_segments) > 1:
                # print "is change"
                continue
            #把需要的数据从JourneySellKey取出来
            sell_data = re.split(r'~[~|\s]*', journey.get('JourneySellKey'))
            carrier = sell_data[0]
            flightNumber = carrier + sell_data[1]
            depAirport = sell_data[2]
            arrAirport = sell_data[4]
            deptime_tuple = time.strptime(sell_data[3], '%m/%d/%Y %H:%M')
            depTime = time.mktime(deptime_tuple)
            arrtime_tuple = time.strptime(sell_data[5], '%m/%d/%Y %H:%M')
            arrTime = time.mktime(arrtime_tuple)

            fares = flight_segments[0].get('FaresIj')
            adultPrice, adultTax, maxSeats, currency, cabin = 0, 0, 0, "", 'X'
            #有票价时判断
            if fares:
                adultPrice = fares[0].get('TotalFare')
                currency = jsonpath(fares[0], '$..CurrencyCode')[0]
                maxSeats = self.custom_settings.get('SEAT')
                cabin = fares[0].get('ClassOfService')
            netFare = adultPrice - adultTax
            isChange = 1
            getTime = time.time()

            # 增加套餐价格,先定义价格表
            price_dict = {'ECO': 0, 'ADVANTAGE': 0, 'EXTRA': 0, 'SUPER_ECO': 0}

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = '[]'
            item['getTime'] = getTime
            yield item
Exemplo n.º 14
0
    def parse(self, response):
        # print(response.text)
        # print('1' * 66)
        self.isOK = True
        if response.text == 'Service Unavailable, Rate limit reached, No Direct Access.':
            self.isOK = False
            # print(response.text)
            # print(response.status)
            # print(response.request.cookies)
            yield scrapy.Request(self.start_urls[0],
                                 method='POST',
                                 headers=self.custom_settings.get('HEADERS'),
                                 body=response.meta.get('meta_data').get('body'),
                                 callback=self.parse,
                                 meta={'meta_data': response.meta.get('meta_data')},
                                 errback=self.errback
                                 )
            return
        try:
            json_dict = json.loads(response.body)
        except:
            print(response.text)
            print(response.status)
            traceback.print_exc()
        if json_dict.get('message').get('code') == 400:
            self.isOK = False
            print(response.text)
            print(response.status)
            # print(response.request.cookies)
            # print(response.meta.get('meta_data').get('aaa'))
            yield scrapy.Request(self.start_urls[0],
                                 method='POST',
                                 headers=self.custom_settings.get('HEADERS'),
                                 body=response.meta.get('meta_data').get('body'),
                                 callback=self.parse,
                                 meta={'meta_data': response.meta.get('meta_data')},
                                 errback=self.errback
                                 )
            return
        if json_dict.get('message').get('code') == 500:
            # print(response.text)
            # print(response.meta.get('meta_data').get('aaa'))
            return
        # try:
        availableOptions = json_dict.get('data').get('originDestinationResponse')[0].get('availableOptions')
        # except:
        #     traceback.print_exc()
        #     print(response.text)
        #     print(response.status_code)
        currency = json_dict.get('data').get('currency')
        print('6'*66)
        # print(response.request.cookies)
        # 这个请求会显示七天的航班,在这个列表里
        for flight in availableOptions:
            # 判断是否是中转
            flight_segments = flight.get('segments')
            if len(flight_segments) > 1:
                print('is change')
                continue
            flight_segment = flight_segments[0]
            carrier = flight_segment.get('carrierCode')
            flightNumber = flight_segment.get('filghtDesignator')
            deptime_tuple = time.strptime(flight_segment.get('departureDateTime').get('local'), '%Y-%m-%dT%H:%M:%S')
            depTime = time.mktime(deptime_tuple)
            arrtime_tuple = time.strptime(flight_segment.get('arrivalDateTime').get('local'), '%Y-%m-%dT%H:%M:%S')
            arrTime = time.mktime(arrtime_tuple)
            depAirport = flight.get('originAirportCode')
            arrAirport = flight.get('destinationAirportCode')
            availableFare = flight.get('availableFareClasses')
            if not availableFare:
                maxSeats = 0
                adultPrice = 0
            else:
                maxSeats = availableFare[0].get('availableSeats')
                if maxSeats== -1:
                    maxSeats = 9
                adultPrice = availableFare[0].get('price') * 1.03
            adultTax=0
            netFare = adultPrice - adultTax

            isChange = 1
            cabin = 'X'
            getTime = time.time()

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = '[]'
            item['getTime'] = getTime
            yield item
Exemplo n.º 15
0
    def parse(self, response):
        self.isOK = True
        if response.xpath('//*[@id="selectMainBody"]/h2'):
            print(response.xpath('//*[@id="selectMainBody"]/h2').extract()[0])
            return
        # print(response.xpath('//*[@id="market1"]/td[2]/div/span').extract()[0])
        flights = response.xpath('//tr[@id="market1"]')

        for flight in flights:
            # 当天没有航班
            if not flight.xpath('./@data-ismacjourney'):
                print('No flight to day')
                return
            #判断是否中转
            change = flight.xpath(
                '//td[@class="direction JourneyInfo"]/div').extract()
            if len(change) > 2:
                # print('is change')
                continue
            flightNumber = flight.xpath(
                './/div[@class="code"]/text()').extract()[0]
            carrier = re.compile('\D+').search(flightNumber).group()

            dep_dt_str = flight.xpath('./@data-departuretime').extract()[0]
            dep_tupletime = time.strptime(dep_dt_str, '%Y-%m-%dT%H:%M:00')
            depTime = time.mktime(dep_tupletime)
            # 判断第二天的情况
            arr_dt_str = flight.xpath('./@data-arrivaltime').extract()[0]
            arr_tupletime = time.strptime(arr_dt_str, '%Y-%m-%dT%H:%M:00')
            arrTime = time.mktime(arr_tupletime)
            depAirport = flight.xpath('./@data-departure-code').extract()[0]
            arrAirport = flight.xpath('./@data-arrival-code').extract()[0]
            isChange = 1
            price = flight.xpath('.//span[@style="font-size: 12"]/text()'
                                 ).extract()[0].split('  ')
            info = flight.xpath('.//input/@value').extract()[0]
            adultPrice = float(price[0].replace(' ', '').replace(',', '.'))
            adultTax = 0
            netFare = adultPrice - adultTax
            currency_unit = price[1]
            currency = self.custom_settings.get('CURRENCY_CACHE').get(
                currency_unit, currency_unit)
            # 目前暂未发现座位,使用请求的座位
            maxSeats = self.custom_settings.get('SEAT')

            cabin = 'X'

            # segments = dict(
            #     flightNumber=flightNumber,
            #     aircraftType=aircraftType,
            #     number=1,
            #     departureTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(depTime)),
            #     destinationTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(arrTime)),
            #     airline=carrier,
            #     dep=depAirport,
            #     dest=arrAirport,
            #     seats=maxSeats,
            #     duration=duration,
            #     depTerminal=''
            # )
            getTime = time.time()

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = '[]'
            item['getTime'] = getTime
            item['info'] = info
            yield item
Exemplo n.º 16
0
    def parse(self, response):
        # print('6'*66)
        # print(response.body)
        self.isOK = True
        self.isJS = True
        error = response.xpath('//title/text()')
        if error:
            if error[0].extract() == 'Distil Validate':
                # self.isOK = False
                self.isJS = False
                print(error[0].extract())
            if error[0].extract() == 'Distil Captcha':
                self.isOK = False
                self.isJS = False
                print(error[0].extract())
            js = re.compile('src="(.*?)" .*?><').search(response.text).group(1)
            self.custom_settings['JS_RANDOM_POSTFIX'] = js
            # print(js)
            yield scrapy.Request(
                self.start_urls[0],
                method='POST',
                headers=self.custom_settings.get('HEADERS'),
                body=response.meta.get('meta_data').get('payload'),
                callback=self.parse,
                meta={'meta_data': response.meta.get('meta_data')},
                errback=self.errback)
            return
        try:
            data = re.compile('config : (.*), pageEngine :',
                              re.S).search(response.text).group(1)
        except:
            print(response.text)
        # try:
        data = json.loads(data)
        # except:
        #     data = re.compile('"Availability":(.*),"FareReview":').search(response.text).group(1)
        #     print('6' * 66)
        #     try:
        #         availability = json.loads(data)
        #     except:
        #         print(response.text)
        # print(type(availability))
        # print(availability)
        availability = jsonpath(data, '$..Availability')
        if not availability:
            print('No flight to day')
            self.task.append(response.meta.get('meta_data').get('invalid'))
            return
        currency = availability[0].get('currencyBean').get('code')
        isChange = 1
        proposedFlightsGroup = availability[0].get('proposedBounds')[0].get(
            'proposedFlightsGroup')
        for proposedFlight in proposedFlightsGroup:
            segments = proposedFlight.get('segments')
            #先判断是否是中转
            if len(segments) > 1:
                print('is change')
                continue
            segment = segments[0]
            carrier = segment.get('airline').get('code')
            flightNumber = carrier + str(segment.get('flightNumber'))
            dep_dt_str = segment.get('beginDate')
            dep_tupletime = time.strptime(dep_dt_str, '%b %d, %Y %I:%M:00 %p')
            depTime = time.mktime(dep_tupletime)
            arr_dt_str = segment.get('endDate')
            arr_tupletime = time.strptime(arr_dt_str, '%b %d, %Y %I:%M:00 %p')
            arrTime = time.mktime(arr_tupletime)
            depAirport = segment.get('beginLocation').get('locationCode')
            arrAirport = segment.get('endLocation').get('locationCode')
            #这个ID是定位价格的
            flightId = proposedFlight.get('proposedBoundId')
            #目前还没找到快捷取值方法,先遍历
            recommendationList = availability[0].get('recommendationList')
            final_price = sys.maxint
            maxSeats, netFare, adultTax, adultPrice, cabin = 0, 0, 0, 0, 'x'
            for recommendation in recommendationList:
                #先确定ID
                flightGroupList = recommendation.get('bounds')[0].get(
                    'flightGroupList')
                for flightGroup in flightGroupList:
                    if flightId == flightGroup.get('flightId'):
                        maxSeats = flightGroup.get('numberOfSeatsLeft')
                        cabin = flightGroup.get('rbd')
                    else:
                        # print('6' * 66)
                        # print(flightId,flightGroup.get('flightId'))
                        continue
                    boundAmount = recommendation.get('bounds')[0].get(
                        'boundAmount')
                    adultPrice = float(boundAmount.get('totalAmount'))
                    if adultPrice >= final_price:
                        continue
                    final_price = adultPrice
                    netFare = float(boundAmount.get('amountWithoutTax'))
                    adultTax = float(boundAmount.get('tax'))

            # segments = dict(
            #     flightNumber=flightNumber,
            #     aircraftType=aircraftType,
            #     number=1,
            #     departureTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(depTime)),
            #     destinationTime=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(arrTime)),
            #     airline=carrier,
            #     dep=depAirport,
            #     dest=arrAirport,
            #     seats=maxSeats,
            #     duration=duration,
            #     depTerminal=''
            # )
            getTime = time.time()

            item = WowSpiderItem()
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['fromCity'] = self.portCitys.get(depAirport, depAirport)
            item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['adultPrice'] = adultPrice
            item['adultTax'] = adultTax
            item['netFare'] = netFare
            item['maxSeats'] = maxSeats
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['isChange'] = isChange
            item['segments'] = '[]'
            item['getTime'] = getTime
            yield item
            print(item)
Exemplo n.º 17
0
    def parse(self, response):
        self.isOK = True
        # print('*'*50)
        json_dict = json.loads(response.body)
        try:
            daily_flight_list = json_dict.get('departureRouteList')[0].get(
                'dailyFlightList')
        except:
            return
        #这个请求会显示三天的航班,在这个列表里
        for daily_flight in daily_flight_list:
            #分析当天航班列表
            flightList = daily_flight.get('flightList')
            if not flightList:
                invalid = response.meta.get('invalid')
                invalid['date'] = daily_flight.get('date').replace('-', '')
                self.task.append(invalid)
                # print("no flight")
                continue
            for flight in flightList:
                #判断是否是中转
                if flight.get('connectedFlight'):
                    # print('is change')
                    continue

                fare_seat = flight.get('fare')
                #判断是否有票
                if not fare_seat:
                    #没票是设置为0
                    maxSeats = 0
                    adultPrice = 0
                    currency = 'TRY'
                    cabin = 'X'
                    net_fare = 0
                else:
                    #目前发现是座位数少才会显示,先这样做判断,以后数量多在分析
                    if fare_seat.get('remainingSeatLabel'):
                        maxSeats = int(
                            fare_seat.get('remainingSeatLabel').get('values')
                            [0])
                    else:
                        maxSeats = 9
                    adultPrice = fare_seat.get('shownFare').get('amount')
                    currency_symbol = fare_seat.get('shownFare').get(
                        'currency')
                    currency = self.custom_settings.get('CURRENCY_CACHE').get(
                        currency_symbol) or currency_symbol
                    cabin = fare_seat.get('reservationClass')
                    net_fare = fare_seat.get('totalFareDetailList')[0].get(
                        'subDetailList')[0].get('amount').get('amount')
                netFare = net_fare / self.custom_settings.get('SEAT')
                adultTax = adultPrice - netFare
                isChange = 1
                carrier = flight.get('airline')
                flightNumber = '%s%s' % (carrier, flight.get('flightNo'))
                deptime = time.strptime(flight.get('departureDateTime'),
                                        '%Y-%m-%dT%H:%M:%S')
                depTime = time.mktime(deptime)
                arrtime = time.strptime(flight.get('arrivalDateTime'),
                                        '%Y-%m-%dT%H:%M:%S')
                arrTime = time.mktime(arrtime)
                dep_city_port_name = flight.get('departureLocation')
                arr_city_port_name = flight.get('arrivalLocation')
                # fromCity = dep_city_port_name.get('cityCode')
                # toCity = arr_city_port_name.get('cityCode')
                depAirport = dep_city_port_name.get('portCode')
                arrAirport = arr_city_port_name.get('portCode')

                #增加套餐价格,先定义价格表
                price_dict = {
                    'ECO': 0,
                    'ADVANTAGE': 0,
                    'EXTRA': 0,
                    'SUPER_ECO': 0
                }
                if adultPrice != 0:
                    bundleList = fare_seat.get('bundleList')
                    for bundle in bundleList:
                        package_name = bundle.get('bundleType')
                        package_price = bundle.get('shownFare').get('amount')
                        price_dict[package_name] = package_price

                # segments = '%s:%s:%s' % (price_dict.get('ECO'), price_dict.get('ADVANTAGE'), price_dict.get('EXTRA'))
                segments = [[price_dict.get('ECO'), maxSeats],
                            [price_dict.get('ADVANTAGE'), maxSeats],
                            [price_dict.get('EXTRA'), maxSeats]]
                getTime = time.time()

                item = WowSpiderItem()
                item['flightNumber'] = flightNumber
                item['depTime'] = depTime
                item['arrTime'] = arrTime
                item['fromCity'] = self.portCitys.get(depAirport, depAirport)
                item['toCity'] = self.portCitys.get(arrAirport, arrAirport)
                item['depAirport'] = depAirport
                item['arrAirport'] = arrAirport
                item['currency'] = currency
                item['adultPrice'] = adultPrice
                item['adultTax'] = adultTax
                item['netFare'] = netFare
                item['maxSeats'] = maxSeats
                item['cabin'] = cabin
                item['carrier'] = carrier
                item['isChange'] = isChange
                item['segments'] = json.dumps(segments)
                item['getTime'] = getTime
                yield item