示例#1
0
def ParsePage(content,flight_no,dept_time):
    result = -1
    
    if content != '' and len(content) > 100:

        content_json = json.loads(content)
        if 'OriginDestinationOption' in content_json.keys():
            for each_flight_json in content_json['OriginDestinationOption']:
                try:
                    flight = Flight()
                    
                    flight_nums = len(each_flight_json['FlightSegment'])
                    
                    flight.flight_no = each_flight_json['FlightNos'].replace('-','_')
                    
                    dept_time_temp = each_flight_json['FlightSegment'][0]['DepartureDate'][6:-2]
                    dest_time_temp = each_flight_json['FlightSegment'][-1]['ArrivalDate'][6:-2]
                    
                    flight.dept_time = time.strftime('%Y-%m-%d %H:%M:%S', \
                            time.localtime(float(str(dept_time_temp)[:-3]))).replace(' ','T')
                    flight.dest_time = time.strftime('%Y-%m-%d %H:%M:%S', \
                            time.localtime(float(str(dest_time_temp)[:-3]))).replace(' ','T')
                    flight.dept_day = flight.dept_time.split('T')[0]
                    flight.price = each_flight_json['FareInfo'][0]['TCPrice_Audlt']

                    if flight.flight_no == flight_no and flight.dept_time == dept_time:
                        result = flight.price
                        return result
                        
                except:
                    continue
        else:
            return -1
    return result
示例#2
0
def ParseInterPage(page):
    '''
    '''
    data = jsonlib.read(page.decode("GBK", "ignore"))

    allinfo = []

    for node in data["FlightList"]:
        dept_time = datetime.datetime.strptime(node["DepartTime"],
                                               '%Y-%m-%d %H:%M:%S')
        dept_time = str(dept_time).replace(
            ' ',
            'T',
        )
        dest_time = datetime.datetime.strptime(node["ArrivalTime"],
                                               '%Y-%m-%d %H:%M:%S')
        dest_time = str(dest_time).replace(
            ' ',
            'T',
        )
        # 航班信息
        flight = Flight()
        flight.flight_no = ''
        flight.plane_no = ''
        flight.airline = ''
        dept_id_list = []

        for flightNode in node["FlightDetail"]:
            flight.flight_no = flight.flight_no + flightNode["FlightNo"] + '_'
            flight.airline = flight.airline + flightNode["AirlineName"] + '_'
            flight.plane_no = flight.plane_no + flightNode["CraftType"] + '_'
            dept_id_list.append(flightNode["DPort"])
            flight.dest_id = flightNode["APort"]

        flight.stop = len(dept_id_list)
        flight.dept_id = dept_id_list[0]
        flight.flight_no = flight.flight_no[:-1]
        flight.airline = flight.airline[:-1]
        flight.plane_no = flight.plane_no[:-1]

        flight.dept_time = dept_time
        flight.dest_time = dest_time
        flight.dept_day = flight.dept_time.split('T')[0]

        flight.price = int(node["Price"])
        flight.surcharge = int(
            GetPriceByClass(node["OilFeeImage"], TaxPriceClasses))
        flight.tax = int((GetPriceByClass(node["TaxImage"], TaxPriceClasses)))

        flight.dur = int(node["FlightTime"]) * 60  #飞行时长,s
        flight.currency = "CNY"
        flight.source = "ctrip::ctrip"
        flight.seat_type = node["ClassName"]

        allinfo.append((flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,\
                flight.surcharge,flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop))

    return allinfo
示例#3
0
    def DictTuple(self, Dict_list):

        tickets = []
        flight = Flight()
        for every in Dict_list:

            flight.plane_type = every['plane_type']

            flight.flight_no = every['flight_no']

            flight.flight_corp = every['flight_corp']

            flight.dept_day = every['dept_day']

            flight.stop_time = every['stoptime']

            flight.dept_time = every['dept_time']

            flight.dest_time = every['dest_time']

            flight.stop_id = every['stop_id']

            flight.dept_id = every['dept_id']

            flight.dest_id = every['dest_id']

            flight.dur = every['dur']

            flight.rest = every['rest']

            flight.stop = every['stop']

            flight.return_rule = every['return_rule']

            flight.seat_type = every['seat_type']

            flight.real_class = every['real_class']

            flight.surcharge = every['surcharge']

            flight.promotion = every['promotion']

            flight.package = every['package']

            flight.daydiff = every['daydiff']

            flight.price = every['price']

            flight.tax = every['tax']

            flight_tuple = (flight.flight_no,flight.plane_type,flight.flight_corp,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.rest,flight.price,flight.tax,flight.surcharge,\
                    flight.promotion,flight.currency,flight.seat_type,flight.real_class,flight.stop_id,flight.stop_time,\
                    flight.daydiff,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)

        return tickets
示例#4
0
def parseFlightAndTicket(content_temp, time_zone_A, time_zone_B):
    content = content_temp.encode('utf-8')
    content = content.replace('£', 'GBP')
    flights = {}
    tickets = []
    result = {'ticket': tickets, 'flight': flights}

    flight = Flight()
    eachflight = EachFlight()

    try:
        content_json = json.loads(content)
        flight_content = content_json['Html']
        flight_content = flight_content.replace('\n', '')

        flight.flight_no = 'EZY' + flight_no_pat.findall(flight_content)[0]
        flight.airline = 'easyjet'
        flight.dept_id = dept_id_pat.findall(flight_content)[0]
        flight.dest_id = dest_id_pat.findall(flight_content)[0]
        flight.dept_time = dept_time_pat.findall(flight_content)[0].replace(
            ' ', 'T') + ':00'
        flight.dest_time = dest_time_pat.findall(flight_content)[0].replace(
            ' ', 'T') + ':00'
        flight.price = price_pat.findall(flight_content)[0]
        flight.seat_type = '经济舱'
        flight.source = 'easyjet::easyjet'
        flight.currency = currency_pat.findall(flight_content)[0]
        flight.stop = 0
        flight.dept_day = flight.dept_time.split('T')[0]
        flight.dur = durCal(flight.dept_time, flight.dest_time, time_zone_A,
                            time_zone_B)

        eachflight.flight_key = flight.flight_no + '_' + flight.dept_id + '_' + flight.dest_id
        eachflight.flight_no = flight.flight_no
        eachflight.airline = 'easyjet'
        eachflight.dept_id = flight.dept_id
        eachflight.dest_id = flight.dest_id
        eachflight.dept_time = flight.dept_time
        eachflight.dest_time = flight.dest_time
        eachflight.dur = flight.dur

        flights[eachflight.flight_key] = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, \
                eachflight.dept_id, eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, \
                eachflight.dur)

        tickets = [(flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, flight.dest_id, \
                flight.dept_day, flight.dept_time, flight.dest_time, flight.dur, flight.price, \
                flight.tax, flight.surcharge,  flight.currency, flight.seat_type, \
                flight.source, flight.return_rule, flight.stop)]

        result['flight'] = flights
        result['ticket'] = tickets
        #flight_info_json = flight_info_pat.findall(flight_content)[0]
        #print flight_info_json
    except Exception, e:
        print str(e)
        return result
示例#5
0
def parseFlightAndTicket(content_temp, time_zone_A, time_zone_B):
    content = content_temp.encode('utf-8')
    content = content.replace('£', 'GBP')
    flights = {}
    tickets = []
    result = {'ticket':tickets, 'flight':flights}

    flight = Flight()
    eachflight = EachFlight()
    
    try:
        content_json = json.loads(content)
        flight_content = content_json['Html']
        flight_content = flight_content.replace('\n','')
        
        flight.flight_no = 'EZY' + flight_no_pat.findall(flight_content)[0]
        flight.airline = 'easyjet'
        flight.dept_id = dept_id_pat.findall(flight_content)[0]
        flight.dest_id = dest_id_pat.findall(flight_content)[0]
        flight.dept_time = dept_time_pat.findall(flight_content)[0].replace(' ','T') + ':00'
        flight.dest_time = dest_time_pat.findall(flight_content)[0].replace(' ','T') + ':00'
        flight.price = price_pat.findall(flight_content)[0]
        flight.seat_type = '经济舱'
        flight.source = 'easyjet::easyjet'
        flight.currency = currency_pat.findall(flight_content)[0]
        flight.stop = 0
        flight.dept_day = flight.dept_time.split('T')[0]
        flight.dur = durCal(flight.dept_time, flight.dest_time, time_zone_A, time_zone_B)

        eachflight.flight_key = flight.flight_no + '_' + flight.dept_id + '_' + flight.dest_id
        eachflight.flight_no = flight.flight_no
        eachflight.airline = 'easyjet'
        eachflight.dept_id = flight.dept_id
        eachflight.dest_id = flight.dest_id
        eachflight.dept_time = flight.dept_time
        eachflight.dest_time = flight.dest_time
        eachflight.dur = flight.dur

        flights[eachflight.flight_key] = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, \
                eachflight.dept_id, eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, \
                eachflight.dur)

        tickets = [(flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, flight.dest_id, \
                flight.dept_day, flight.dept_time, flight.dest_time, flight.dur, flight.price, \
                flight.tax, flight.surcharge,  flight.currency, flight.seat_type, \
                flight.source, flight.return_rule, flight.stop)]

        result['flight'] = flights
        result['ticket'] = tickets
        #flight_info_json = flight_info_pat.findall(flight_content)[0]
        #print flight_info_json
    except Exception, e:
        print str(e)
        return result
示例#6
0
def ParseInterPage(page):
    '''
    '''
    data = jsonlib.read(page.decode("GBK", "ignore"))

    allinfo = []

    for node in data["FlightList"]:
        dept_time = datetime.datetime.strptime(node["DepartTime"], '%Y-%m-%d %H:%M:%S')
        dept_time = str(dept_time).replace(' ','T',)
        dest_time = datetime.datetime.strptime(node["ArrivalTime"], '%Y-%m-%d %H:%M:%S') 
        dest_time = str(dest_time).replace(' ','T',)
        # 航班信息
        flight = Flight()
        flight.flight_no = ''
        flight.plane_no = ''
        flight.airline = ''
        dept_id_list = []

        for flightNode in node["FlightDetail"]:
            flight.flight_no = flight.flight_no + flightNode["FlightNo"] + '_'
            flight.airline = flight.airline + flightNode["AirlineName"] + '_'
            flight.plane_no = flight.plane_no + flightNode["CraftType"] + '_'
            dept_id_list.append(flightNode["DPort"])
            flight.dest_id = flightNode["APort"] 

        flight.stop = len(dept_id_list)
        flight.dept_id = dept_id_list[0]
        flight.flight_no = flight.flight_no[:-1]
        flight.airline = flight.airline[:-1]
        flight.plane_no = flight.plane_no[:-1]

        flight.dept_time = dept_time
        flight.dest_time = dest_time
        flight.dept_day = flight.dept_time.split('T')[0]
        
        flight.price = int(node["Price"])
        flight.surcharge = int(GetPriceByClass(node["OilFeeImage"], TaxPriceClasses))
        flight.tax = int((GetPriceByClass(node["TaxImage"], TaxPriceClasses)))

        flight.dur = int(node["FlightTime"]) * 60 #飞行时长,s
        flight.currency = "CNY"
        flight.source = "ctrip::ctrip"
        flight.seat_type = node["ClassName"]

        allinfo.append((flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,\
                flight.surcharge,flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop))
        
    return allinfo
示例#7
0
def ParsePage(tree, params):
    allinfo = []
    nodes = tree.xpath("//div[@class='search_box']")

    for node in nodes:
        # 航班信息
        flight = Flight()
        flight.flight_no = GetFlightNo(node.get("id"))
        strs = node.get("data").split("|")
        flight.dept_id = strs[2]
        flight.dest_id = strs[3]
        flight.airline = GetTextByXpath(
            node, "table[1]/tr/td[1]/div[1]/span/text()")
        flight.plane_no = GetAlphanumeric(
            GetAllText(node.xpath("table[1]/tr/td[1]/div[2]/span")[0]))

        airport_tax, fuel_surcharge = GetTax(
            GetTextByXpath(node, "table[1]/tr/td[5]/div[1]/text()"))

        priceNodes = node.xpath("table[@class='search_table']/tr")
        for priceNode in priceNodes:
            # 机票信息
            flight.dept_time = str(
                datetime.datetime.strptime(strs[0],
                                           '%Y-%m-%d %H:%M:%S')).replace(
                                               ' ',
                                               'T',
                                           )
            flight.dest_time = str(
                datetime.datetime.strptime(strs[1],
                                           '%Y-%m-%d %H:%M:%S')).replace(
                                               ' ',
                                               'T',
                                           )
            flight.dept_day = flight.dept_time.strftime('%Y-%m-%d')

            flight.price = int(GetTextByXpath(priceNode, "td[7]/span/text()"))
            flight.tax = int(airport_tax)
            flight.surcharge = int(fuel_surcharge)
            flight.currency = "CNY"
            flight.source = "ctrip::ctrip"
            flight.seat_type = GetAllText(priceNode.xpath("td[2]")[0])

            allinfo.append((flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                        flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,\
                        flight.surcharge,flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop))

    return allinfo
示例#8
0
def ValidatePage(content,dept_year, flight_no, orig_dept_time):

    result = -1

    each_flight_content = each_flight_content_pat.findall(content)

    if len(each_flight_content) > 0: 
        for each_flight_text in each_flight_content:
            flight = Flight()
            try:
                t_price = all_price_pat.findall(each_flight_text)[0]
                each_flight_text_temp = each_flight_content_temp_pat.findall(each_flight_text)[0]
                each_part_flight = each_part_flight_pat.findall(each_flight_text_temp)
                if len(each_part_flight) >= 1:
                    flight.dept_id = airport_pat.findall(each_part_flight[0])[0][1:-1]                    
                    flight.dest_id = airport_pat.findall(each_part_flight[-1])[-1][1:-1]

                    dept_time_temp = dept_time_temp_pat.findall(each_part_flight[0])[0]
                    dest_time_temp = dest_time_temp_pat.findall(each_part_flight[-1])[-1]
                    flight.dept_day = dept_year + '-' + dept_time_temp[0].strip() + '-' + dept_time_temp[1].strip()

                    flight.dept_time = flight.dept_day + 'T' + dept_time_temp[2].strip() + ':00'
                    flight.dest_time = dept_year + '-' +  dept_time_temp[0].strip() + '-' + \
                            dest_time_temp[0].strip() + 'T' + dest_time_temp[1].strip()[-5:] + ':00'
                    dest_time_temp[0].strip() + 'T' + dest_time_temp[1].strip()[-5:] + ':00'

                    dept_time = int(time.mktime(datetime.datetime.strptime(flight.dept_time,'%Y-%m-%dT%H:%M:%S').timetuple()))
                    dest_time = int(time.mktime(datetime.datetime.strptime(flight.dest_time, '%Y-%m-%dT%H:%M:%S').timetuple()))
                else:
                    continue
                flight.price = price_pat.findall(each_flight_text)[0]
                if len(flight.price) > 1:
                    flight.price = int(flight.price[0])
                else:
                    flight.price = int(t_price)

                flight.flight_no = ''
                for each_flight_text_t in each_part_flight:
                    flight.flight_no = flight.flight_no + flight_no_pat.findall(each_flight_text_t)[0][:8].replace(' ','') + '_'

                flight.flight_no = flight.flight_no[:-1]

                if flight.flight_no == flight_no and flight.dept_time == orig_dept_time:
                    result = flight.price
                    break
            except Exception, e:
                continue
示例#9
0
def ValidatePage(content, flight_no, dept_time):
    result = -1

    if content != '' and len(content) > 100:

        content_json = json.loads(content)
        if 'OriginDestinationOption' in content_json.keys():
            for each_flight_json in content_json['OriginDestinationOption']:
                try:
                    flight = Flight()

                    flight_nums = len(each_flight_json['FlightSegment'])

                    flight.flight_no = each_flight_json['FlightNos'].replace(
                        '-', '_')

                    dept_time_temp = each_flight_json['FlightSegment'][0][
                        'DepartureDate'][6:-2]
                    dest_time_temp = each_flight_json['FlightSegment'][-1][
                        'ArrivalDate'][6:-2]

                    flight.dept_time = time.strftime(
                        '%Y-%m-%d %H:%M:%S',
                        time.localtime(float(
                            str(dept_time_temp)[:-3]))).replace(' ', 'T')
                    flight.dest_time = time.strftime(
                        '%Y-%m-%d %H:%M:%S',
                        time.localtime(float(
                            str(dest_time_temp)[:-3]))).replace(' ', 'T')
                    flight.dept_day = flight.dept_time.split('T')[0]
                    flight.price = each_flight_json['FareInfo'][0][
                        'TCPrice_Audlt']

                    if flight.flight_no == flight_no and flight.dept_time == dept_time:
                        result = flight.price
                        return result
                except:
                    continue
        else:
            return result

    return result
示例#10
0
def ParsePage(tree):
    allinfo = []
    nodes = tree.xpath("//div[@class='search_box']")

    for node in nodes:
        # 航班信息
        flight = Flight()
        flight.flight_no = GetFlightNo(node.get("id"))
        strs = node.get("data").split("|")
        flight.dept_id = strs[2]
        flight.dest_id = strs[3]
        flight.airline = GetTextByXpath(node, "table[1]/tr/td[1]/div[1]/span/text()")
        flight.plane_no = GetAlphanumeric(GetAllText(node.xpath("table[1]/tr/td[1]/div[2]/span")[0]))
        
        airport_tax, fuel_surcharge = GetTax(GetTextByXpath(node, "table[1]/tr/td[5]/div[1]/text()"))

        priceNodes = node.xpath("table[@class='search_table']/tr")
        for priceNode in priceNodes:
            # 机票信息
            flight.dept_time = str(datetime.datetime.strptime(strs[0], '%Y-%m-%d %H:%M:%S')).replace(' ','T',)
            flight.dest_time = str(datetime.datetime.strptime(strs[1], '%Y-%m-%d %H:%M:%S')).replace(' ','T',)
            flight.dept_day = flight.dept_time.strftime('%Y-%m-%d')

            flight.price = int(GetTextByXpath(priceNode, "td[7]/span/text()"))
            flight.tax = int(airport_tax)
            flight.surcharge = int(fuel_surcharge)
            flight.currency = "CNY"
            flight.source = "ctrip::ctrip"
            flight.seat_type = GetAllText(priceNode.xpath("td[2]")[0])

            allinfo.append((flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                        flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,\
                        flight.surcharge,flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop))

        
    return allinfo
示例#11
0
                flight = Flight()
                flight.dept_day = one_day_flights[0]
                strs = one_day_flight[1].split("~")

                if len(strs) != 9:
                    continue
                flight.flight_no = strs[0].strip() + strs[1].strip()
                flight.dept_id = strs[4].strip()
                flight.dest_id = strs[6].strip()
                flight.airline = "ryanair"
                flight.source = "ryanair::ryanair"
                
                dept_time = datetime.datetime.strptime(strs[5], '%m/%d/%Y %H:%M')
                dest_time = datetime.datetime.strptime(strs[7], '%m/%d/%Y %H:%M')
                flight.dept_time = str(dept_time).replace(' ','T')
                flight.dest_time = str(dest_time).replace(' ','T')
                flight.stop = 0

                days = (dest_time - dept_time).days
                dur = (dest_time.hour - dept_time.hour) * 3600 + (dest_time.minute - dept_time.minute) * 60 + days * 86400

                flight.dur = dur
                flight.price = int(GetPrice(one_day_flight[4]))
                flight.currency = currency

                allinfo.append((flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, \
                        flight.dest_id, flight.dept_day, flight.dept_time, flight.dest_time, \
                        flight.dur, flight.price, flight.tax, flight.surcharge, flight.currency, \
                        flight.seat_type, flight.source, flight.return_rule, flight.stop))

    return allinfo
示例#12
0
                strs = one_day_flight[1].split("~")

                if len(strs) != 9:
                    continue
                flight.flight_no = strs[0].strip() + strs[1].strip()
                flight.dept_id = strs[4].strip()
                flight.dest_id = strs[6].strip()
                flight.airline = "ryanair"
                flight.source = "ryanair::ryanair"

                dept_time = datetime.datetime.strptime(strs[5],
                                                       '%m/%d/%Y %H:%M')
                dest_time = datetime.datetime.strptime(strs[7],
                                                       '%m/%d/%Y %H:%M')
                flight.dept_time = str(dept_time).replace(' ', 'T')
                flight.dest_time = str(dest_time).replace(' ', 'T')
                flight.stop = 0

                days = (dest_time - dept_time).days
                dur = (dest_time.hour - dept_time.hour) * 3600 + (
                    dest_time.minute - dept_time.minute) * 60 + days * 86400

                flight.dur = dur
                flight.price = int(GetPrice(one_day_flight[4]))
                flight.currency = currency

                allinfo.append((flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, \
                        flight.dest_id, flight.dept_day, flight.dept_time, flight.dest_time, \
                        flight.dur, flight.price, flight.tax, flight.surcharge, flight.currency, \
                        flight.seat_type, flight.source, flight.return_rule, flight.stop))
示例#13
0
        for flightNode in node["FlightDetail"]:
            flight.flight_no = flight.flight_no + flightNode["FlightNo"] + '_'
            flight.airline = flight.airline + flightNode["AirlineName"] + '_'
            flight.plane_no = flight.plane_no + flightNode["CraftType"] + '_'
            dept_id_list.append(flightNode["DPort"])
            flight.dest_id = flightNode["APort"] 

        #flight.stop = len(dept_id_list)
        flight.dept_id = dept_id_list[0]
        flight.flight_no = flight.flight_no[:-1]
        #flight.airline = flight.airline[:-1]
        #flight.plane_no = flight.plane_no[:-1]

        flight.dept_time = dept_time
        flight.dest_time = dest_time
        flight.dept_day = flight.dept_time.split('T')[0]
        
        flight.price = int(node["Price"])
        #flight.surcharge = int(GetPriceByClass(node["OilFeeImage"], TaxPriceClasses))
        #flight.tax = int((GetPriceByClass(node["TaxImage"], TaxPriceClasses)))

        #flight.dur = int(node["FlightTime"]) * 60 #飞行时长,s
        #flight.currency = "CNY"
        #flight.source = "ctrip::ctrip"
        #flight.seat_type = node["ClassName"]

        if flight.flight_no == params[0] and flight.dept_time == params[1]:
            return flight.price

    return -1
示例#14
0
def ParsePage(content):
    flights = []
    
    if content != '' and len(content) > 100:

        content_json = json.loads(content)
        #print content_json['OriginDestinationOption']
        if 'OriginDestinationOption' in content_json.keys():
            for each_flight_json in content_json['OriginDestinationOption']:
                #print each_flight_json
                try:
                    flight = Flight()
                    
                    flight_nums = len(each_flight_json['FlightSegment'])
                    
                    flight.flight_no = each_flight_json['FlightNos'].replace('-','_')
                    flight.dept_id = each_flight_json['AirPorts'][:3]
                    flight.dest_id = each_flight_json['AirPorts'][-3:]
                    
                    #print flight.flight_no,flight.dept_id,flight.dest_id
                    dept_time_tamp = each_flight_json['FlightSegment'][0]['DepartureDate'][6:-2]
                    dest_time_tamp = each_flight_json['FlightSegment'][-1]['ArrivalDate'][6:-2]
                    #flight.dur = int(dest_time_temp) - int(dept_time_temp)
                    #flight.dur = flight.dur / 1000
                    flight_time_json = each_flight_json['FlightSegment']
                    
                    if flight_nums == 1:
                        time_str_temp = flight_time_json[0]['FlyTime'].encode('utf8')
                    
                        str_num = time_str_temp.find('小')
                        if str_num < 0:
                            h_nums_str = time_str_temp[:time_str_temp.find('时')].strip()
                            m_nums_str = time_str_temp[time_str_temp.find('时')+3:time_str_temp.find('分')].strip()
                        else:
                            h_nums_str = time_str_temp[:time_str_temp.find('小时')].strip()
                            m_nums_str = time_str_temp[time_str_temp.find('小时')+6:time_str_temp.find('分')].strip()
                        flight.dur = 0
                        if h_nums_str != '':
                            flight.dur += int(h_nums_str) * 3600
                        if m_nums_str != '':
                            flight.dur += int(m_nums_str) * 60
                    else:
                        flight.dur = 0
                        for i in range(flight_nums):
                            time_str_temp = flight_time_json[i]['FlyTime'].encode('utf8')

                            str_num = time_str_temp.find('小')
                            if str_num > 0:
                                h_nums_str = time_str_temp[:time_str_temp.find('小时')].strip()
                                m_nums_str = time_str_temp[time_str_temp.find('小时')+6:time_str_temp.find('分')].strip()
                            else:
                                h_nums_str = time_str_temp[:time_str_temp.find('时')].strip()
                                m_nums_str = time_str_temp[time_str_temp.find('时')+3:time_str_temp.find('分')].strip()
                            if h_nums_str != '':
                                flight.dur += int(h_nums_str) * 3600
                            if m_nums_str != '':
                                flight.dur += int(m_nums_str) * 60

                        for i in range(1,flight_nums):
                            dept_time_temp = each_flight_json['FlightSegment'][i]['DepartureDate'][6:-2]
                            dest_time_temp = each_flight_json['FlightSegment'][i-1]['ArrivalDate'][6:-2]
                            flight.dur += (int(dept_time_temp) - int(dest_time_temp)) / 1000
                    flight.dept_time = time.strftime('%Y-%m-%d %H:%M:%S', \
                            time.localtime(float(str(dept_time_tamp)[:-3]))).replace(' ','T')
                    flight.dest_time = time.strftime('%Y-%m-%d %H:%M:%S', \
                            time.localtime(float(str(dest_time_tamp)[:-3]))).replace(' ','T')
                    flight.dept_day = flight.dept_time.split('T')[0]
                    flight.source = 'tongcheng::tongcheng'
                    flight.stop = int(flight_nums) - 1
                    #print flight.stop, flight.dept_time, flight.dept_day
                    flight.currency = 'CNY'
                    flight.price = each_flight_json['FareInfo'][0]['TCPrice_Audlt']
                    flight.tax = each_flight_json['FareInfo'][0]['TaxPrice_Audlt']
                    
                    #print flight.price,flight.tax
                    airline_temp = ''
                    plane_no_temp = ''
                    
                    #print each_flight_json['FlightSegment'][0]
        
                    for i in range(flight_nums):
                        plane_no_temp = plane_no_temp + \
                                each_flight_json['FlightSegment'][i]['Equipment'] + '_'
                    
                        airline_temp = airline_temp + \
                                each_flight_json['FlightSegment'][i]['AirCompanyName'] + '_'
                    
                    flight.plane_no = plane_no_temp[:-1]
                    flight.airline = airline_temp[:-1]
                    #print plane_no_temp,airline_temp
                    flight.seat_type = '经济舱'

                    flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, \
                            flight.dept_id, flight.dest_id, flight.dept_day, flight.dept_time, \
                            flight.dest_time, flight.dur, flight.price, flight.tax, \
                            flight.surcharge, flight.currency, flight.seat_type, \
                            flight.source, flight.return_rule, flight.stop)
                    flights.append(flight_tuple)
                except Exception, e:
                    logger.info('tongchengFlight: Parse this flight failed!' + str(e))
                    continue
        else:
            logger.error('tongchengFlight: Crawl this page failed!')
            return flights
示例#15
0
def vuelingvalidate(content, flight_no, req_dept_time):
    flight_num_list = []
    flight_num_info_temp = flight_no_pat.findall(content)
    if flight_num_info_temp != []:
        for flight_num_info in flight_num_info_temp:
            flight_num_temp_1 = flight_num_info.find('|')
            flight_num_temp_2 = flight_num_info.rfind('~^')

            if flight_num_temp_2 > 0:
                flight_num = flight_num_info[flight_num_temp_1+1:flight_num_temp_1+8].replace('~','') + '_' + \
                        flight_num_info[flight_num_temp_2+2:flight_num_temp_2+9].replace('~','')

            else:
                flight_num = flight_num_info[flight_num_temp_1 +
                                             1:flight_num_temp_1 + 8].replace(
                                                 '~', '')
            flight_num_list.append(flight_num)

        dept_id_list = []
        dest_id_list = []
        station_temp = station_temp_pat.findall(content)
        for station_temp_a in station_temp:
            station_info = station_temp_a.replace('\n', '').replace(' ', '')
            dept_id_num = station_info.find('):')
            dept_id = station_info[dept_id_num - 3:dept_id_num]
            dest_id_num = station_info.rfind(')')
            dest_id = station_info[dest_id_num - 3:dest_id_num]
            dept_id_list.append(dept_id)
            dest_id_list.append(dest_id)

        dept_time_list = []
        dest_time_list = []
        stops_list = []

        flight_time_temp = flight_time_pat.findall(content)
        for time_temp in flight_time_temp:
            dept_time = dept_time_pat.findall(time_temp)[0]
            dest_time = dest_time_pat.findall(time_temp)[0]
            flight_num = flight_num_pat.findall(time_temp)[0]
            dept_time_list.append(dept_time)
            dest_time_list.append(dest_time)
            stops_list.append(flight_num)

        price_list = []
        price_text = price_pat.findall(content)
        for price_temp in price_text:
            price_temp_num = price_temp.rfind('>') + 1
            each_price = price_temp[price_temp_num:-3].replace(',', '.')
            price_list.append(each_price)

        seat_type_list = ['经济舱', '超经济舱', '公务舱']
        seat_type = []

        for i in range(len(price_list)):
            if i % 3 == 0:
                seat_type.append(seat_type_list[0])
            elif i % 3 == 1:
                seat_type.append(seat_type_list[1])
            else:
                seat_type.append(seat_type_list[2])

        flight_no_l,dept_id_l,dest_id_l,dept_time_l,dest_time_l,stops_l = [],[],[],[],[],[]
        for j in range(len(stops_list)):
            for k in range(3):
                flight_no_l.append(flight_num_list[j])
                dept_id_l.append(dept_id_list[j])
                dest_id_l.append(dest_id_list[j])
                dept_time_l.append(dept_time_list[j])
                dest_time_l.append(dest_time_list[j])
                stops_l.append(stops_list[j])

        for i in range(len(price_list)):
            flight = Flight()
            flight.flight_no = flight_no_l[i]
            flight.plane_no = 'NULL'
            flight.airline = 'vueling'
            flight.dept_id = dept_id_l[i]
            flight.dest_id = dest_id_l[i]
            flight.dept_time = dept_time_l[i]
            flight.dest_time = dest_time_l[i]

            dept_time_c = str(dept_time_l[i]).replace('T', ',').replace(
                '-', ',').replace(':', ',').split(',') + [0, 0, 0]
            dept_time_t = date_handle(dept_time_c)
            dest_time_c = str(dest_time_l[i]).replace('T', ',').replace(
                '-', ',').replace(':', ',').split(',') + [0, 0, 0]
            dest_time_t = date_handle(dest_time_c)
            flight.dur = int(time.mktime(dest_time_t)) - int(
                time.mktime(dept_time_t))
            flight.price = price_list[i]
            flight.dept_day = flight.dept_time[:10]
            flight.currency = 'EUR'
            flight.seat_type = seat_type[i]
            flight.source = 'vueling:vueling'
            flight.stop = stops_l[i]
            if flight.flight_no == flight_no and flight.dept_time == req_dept_time:

                return flight.price

    else:
        return result
示例#16
0
def elong_page_parser(htmlcontent):
    '''

    '''

    tickets = []
    flights = {}

    if htmlcontent.find('您访问的页面不存在或暂时无法访问') != -1:
        return tickets, flights

    try:
        flights_json = flightsPattern.findall(htmlcontent)[0]
        allflights = json.loads(flights_json)['FlightLegList']

        for flightInfo in allflights:
            flight = Flight()

            flight.currency = 'CNY'
            flight.seat_type = '经济舱'
            flight.stop = len(flightInfo['segs']) - 1
            flight.price = int(flightInfo['cabs'][0]['oprice'])
            flight.tax = int(flightInfo['tax'])
            flight.source = 'elong::elong'

            flight.airline = ''
            flight.plane_no = ''
            flight.flight_no = ''
            flight.dur = 0

            for singleflightInfo in flightInfo['segs']:
                eachFlight = EachFlight()
                eachFlight.flight_no = singleflightInfo['fltno']
                eachFlight.plane_no = singleflightInfo['plane']
                eachFlight.airline = singleflightInfo['corpn']
                eachFlight.dept_id = singleflightInfo['dport']
                eachFlight.dest_id = singleflightInfo['aport']
                eachFlight.dept_time = time_shifter(
                    singleflightInfo['dtime'])  #convert to 2014-07-11T12:06:00
                eachFlight.dest_time = time_shifter(singleflightInfo['atime'])
                eachFlight.dur = int(singleflightInfo['ftime']) * 60

                eachFlight.flight_key = eachFlight.flight_no + '_' + eachFlight.dept_id + '_' + eachFlight.dest_id

                flights[eachFlight.flight_key] = (eachFlight.flight_no, eachFlight.airline, eachFlight.plane_no, eachFlight.dept_id, \
                        eachFlight.dest_id, eachFlight.dept_time, eachFlight.dest_time, eachFlight.dur)

                flight.airline = flight.airline + eachFlight.airline + '_'
                flight.plane_no = flight.plane_no + eachFlight.plane_no + '_'
                flight.flight_no = flight.flight_no + eachFlight.flight_no + '_'

                flight.dur += eachFlight.dur

            if len(flightInfo['segs']) > 1:
                for i in range(0, len(flightInfo['segs']) - 1):
                    flight.dur += cal_wait_time(
                        time_shifter(flightInfo['segs'][i]['atime']),
                        time_shifter(flightInfo['segs'][i + 1]['dtime']))

            flight.flight_no = flight.flight_no[:-1]
            flight.plane_no = flight.plane_no[:-1]
            flight.airline = flight.airline[:-1]

            flight.dept_id = flightInfo['segs'][0]['dport']
            flight.dest_id = flightInfo['segs'][-1]['aport']
            flight.dept_time = time_shifter(flightInfo['segs'][0]['dtime'])
            flight.dest_time = time_shifter(flightInfo['segs'][-1]['atime'])
            flight.dept_day = flight.dept_time.split('T')[0]

            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,flight.currency,\
                    flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)

    except Exception, e:
        logger.info(str(e))
        return [], {}
示例#17
0
def parsePage(content,dept_year):
    flights  = []
    each_flight_content = each_flight_content_pat.findall(content)
    if len(each_flight_content) > 0: 
        for each_flight_text in each_flight_content:
            flight = Flight()
            try:
                t_price = all_price_pat.findall(each_flight_text)[0]
                each_flight_text_temp = each_flight_content_temp_pat.findall(each_flight_text)[0]
                each_part_flight = each_part_flight_pat.findall(each_flight_text_temp)
                if len(each_part_flight) >= 1:
                    time.sleep(1)
                    flight.dept_id = airport_pat.findall(each_part_flight[0])[0][1:-1]                    
                    flight.dest_id = airport_pat.findall(each_part_flight[-1])[-1][1:-1]
                     
                    dept_time_temp = dept_time_temp_pat.findall(each_part_flight[0])[0]
                    dest_time_temp = dest_time_temp_pat.findall(each_part_flight[-1])[-1]
                    flight.dept_day = dept_year + '-' + dept_time_temp[0].strip() + '-' + \
                            dept_time_temp[1].strip()
                    flight.dept_time = flight.dept_day + 'T' + dept_time_temp[2].strip() + ':00'
                    flight.dest_time = dept_year + '-' +  dept_time_temp[0].strip() + '-' + \
                    dest_time_temp[0].strip() + 'T' + dest_time_temp[1].strip()[-5:] + ':00'
                    
                    dept_time = int(time.mktime(datetime.datetime.strptime(flight.dept_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    dest_time = int(time.mktime(datetime.datetime.strptime(flight.dest_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    flight.dur = dest_time - dept_time + 3600
                    
                    flight.stop = len(each_part_flight) - 1
                else:
                    continue
                flight.price = price_pat.findall(each_flight_text)[0]
                if len(flight.price) > 1:
                    flight.price = int(flight.price[0])
                else:
                    flight.price = int(t_price)

                try:
                    flight.tax = int(t_price) - flight.price
                except:
                    flight.tax = -1.0
                    logger.info('feifanFlight: Can not parse tax info!')

                flight.flight_no = ''
                flight.airline = ''
                flight.plane_no = ''
                for each_flight_text_t in each_part_flight:
                    flight.flight_no = flight.flight_no + flight_no_pat.findall(each_flight_text_t)[0][:8].replace(' ','') + '_'
                    flight.plane_no = flight.plane_no + plane_no_pat.findall(each_flight_text_t)[0].replace(' ','') + '_'
                    flight.airline = flight.airline + airline_pat.findall(each_flight_text_t)[0].replace(' ','') + '_'
                
                flight.flight_no = flight.flight_no[:-1]
                flight.plane_no = flight.plane_no[:-1]
                flight.airline = flight.airline[:-1]
                
                flight.return_rule = return_rule_pat.findall(each_flight_text)[0].replace('<p>','').replace('\n','') \
                        .replace('。','').replace('</p>','。').strip().replace(' ','')
                flight.currency = 'CNY'
                flight.source = 'feifan::feifan'
                flight.seat_type = '经济舱'
                #print flight.return_rule
                flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, flight.dest_id, \
                        flight.dept_day, flight.dept_time, flight.dest_time, flight.dur, flight.price, \
                        flight.tax, flight.surcharge, flight.currency, flight.seat_type, flight.source, \
                        flight.return_rule, flight.stop)

                flights.append(flight_tuple)
            except Exception, e:
                #logger.info('Parse this flight failed!' + str(e))
                continue
示例#18
0
                                3:time_str_temp.find('分')].strip()
                        if h_nums_str != '':
                            flight.dur += int(h_nums_str) * 3600
                        if m_nums_str != '':
                            flight.dur += int(m_nums_str) * 60

                    for i in range(1, flight_nums):
                        dept_time_temp = each_flight_json['FlightSegment'][i][
                            'DepartureDate'][6:-2]
                        dest_time_temp = each_flight_json['FlightSegment'][
                            i - 1]['ArrivalDate'][6:-2]
                        flight.dur += (int(dept_time_temp) -
                                       int(dest_time_temp)) / 1000
                flight.dept_time = time.strftime('%Y-%m-%d %H:%M:%S', \
                        time.localtime(float(str(dept_time_tamp)[:-3]))).replace(' ','T')
                flight.dest_time = time.strftime('%Y-%m-%d %H:%M:%S', \
                        time.localtime(float(str(dest_time_tamp)[:-3]))).replace(' ','T')
                flight.dept_day = flight.dept_time.split('T')[0]
                flight.source = 'tongcheng::tongcheng'
                flight.stop = int(flight_nums) - 1
                flight.currency = 'CNY'
                flight.price = each_flight_json['FareInfo'][0]['TCPrice_Audlt']
                flight.tax = each_flight_json['FareInfo'][0]['TaxPrice_Audlt']

                airline_temp = ''
                plane_no_temp = ''

                for i in range(flight_nums):
                    plane_no_temp = plane_no_temp + \
                            each_flight_json['FlightSegment'][i]['Equipment'] + '_'

                    airline_temp = airline_temp + \
示例#19
0
def directFlight_parser(flightstring, date, airports_dict):
    flight = Flight()

    #直达航班提取出长度为1的列表
    cols01 = re.compile(r'<td class="cols01">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols02 = re.compile(r'<td class="cols02">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols03 = re.compile(r'<td class="cols03">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols04 = re.compile(r'<td class="cols04">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols05 = re.compile(r'<td class="cols05">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols06 = re.compile(r'<td class="cols06">(.*?)</td>',
                        re.S).findall(flightstring)[0]

    aircorp = re.compile(r'</span>(.*?)<br />',
                         re.S).findall(cols01)[0].strip()
    flight_no = re.compile(r'<br />(.*?)&nbsp',
                           re.S).findall(cols01)[0].strip()
    plane_type = re.compile(r'method="PlaneType" >(.*?)</a>',
                            re.S).findall(cols01)[0].strip()

    airports = []
    days = 0
    dept_airport = re.compile(r'</span>(.*?)<br />',
                              re.S).findall(cols02)[0].strip()
    dept_time = re.compile(r'<span class=" t14 bold black">(.*?)</span>',
                           re.S).findall(cols02)[0].strip()
    arr_time_airport = re.compile(r'<br />(.*?)$',
                                  re.S).findall(cols02)[0].strip()
    if arr_time_airport.find('+1天') == -1:
        arr_time, arr_airport = arr_time_airport.split(
            ' ')[0].strip(), arr_time_airport.split(' ')[-1].strip()
    else:
        days += 1
        arr_time, arr_airport = arr_time_airport.split(' ')[0].strip().split(
            '(')[0].strip(), arr_time_airport.split(' ')[-1].strip()
    airports.append(dept_airport)
    airports.append(arr_airport)

    timeinfo = []
    during_time = re.compile(r'(.*?)<br />', re.S).findall(cols03)[0].strip()
    timeinfo.append(dept_time)
    timeinfo.append(arr_time)
    timeinfo.append(during_time)

    during = timeshifter(timeinfo)
    dept_date = datetime.datetime(string.atoi(date[0:4]),
                                  string.atoi(date[5:7]),
                                  string.atoi(date[8:]))
    dest_date = dept_date + datetime.timedelta(days)
    dept_daytime = date + 'T' + dept_time + ':00'
    dest_daytime = str(dest_date).split(' ')[0] + 'T' + arr_time + ':00'

    price = re.compile(r'</span>(.*?)</span>', re.S).findall(cols04)[0].strip()
    tax = re.compile(r'参考税 &yen;(.*?)<div class',
                     re.S).findall(cols04)[0].strip()

    flight.flight_no = flight_no
    flight.plane_no = plane_type
    flight.airline = aircorp
    if airports_dict.has_key(airports[0]):
        flight.dept_id = airports_dict[airports[0]]
    else:
        flight.dept_id = airports[0]
    if airports_dict.has_key(airports[-1]):
        flight.dest_id = airports_dict[airports[-1]]
    else:
        flight.dest_id = airports[-1]
    flight.dept_day = date
    flight.dept_time = dept_daytime
    flight.dest_time = dest_daytime
    flight.dur = during
    flight.price = float(price)
    flight.tax = float(tax)
    flight.surcharge = -1.0
    flight.currency = 'CNY'
    flight.seat_type = '经济舱'
    flight.source = 'elong::elong'
    flight.return_rule = 'NULL'
    flight.stop = 0

    flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
            flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
            flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

    return flight_tuple
示例#20
0
def elong_page_parser(htmlcontent):
    '''

    '''

    tickets = []
    flights = {}

    if htmlcontent.find('您访问的页面不存在或暂时无法访问') != -1:
        return tickets, flights

    try:
        flights_json = flightsPattern.findall(htmlcontent)[0]
        allflights = json.loads(flights_json)['FlightLegList']

        for flightInfo in allflights:
            flight = Flight()

            flight.currency = 'CNY'
            flight.seat_type = '经济舱'
            flight.stop = len(flightInfo['segs']) - 1
            flight.price = int(flightInfo['cabs'][0]['oprice'])
            flight.tax = int(flightInfo['tax'])
            flight.source = 'elong::elong'

            flight.airline = ''
            flight.plane_no = ''
            flight.flight_no = ''
            flight.dur = 0

            for singleflightInfo in flightInfo['segs']:
                eachFlight = EachFlight()
                eachFlight.flight_no = singleflightInfo['fltno']
                eachFlight.plane_no = singleflightInfo['plane']
                eachFlight.airline = singleflightInfo['corpn']
                eachFlight.dept_id = singleflightInfo['dport']
                eachFlight.dest_id = singleflightInfo['aport']
                eachFlight.dept_time = time_shifter(singleflightInfo['dtime'])  #convert to 2014-07-11T12:06:00
                eachFlight.dest_time = time_shifter(singleflightInfo['atime'])
                eachFlight.dur = int(singleflightInfo['ftime']) * 60

                eachFlight.flight_key = eachFlight.flight_no + '_' + eachFlight.dept_id + '_' + eachFlight.dest_id

                flights[eachFlight.flight_key] = (eachFlight.flight_no, eachFlight.airline, eachFlight.plane_no, eachFlight.dept_id, \
                        eachFlight.dest_id, eachFlight.dept_time, eachFlight.dest_time, eachFlight.dur)

                flight.airline = flight.airline + eachFlight.airline + '_'
                flight.plane_no = flight.plane_no + eachFlight.plane_no + '_'
                flight.flight_no = flight.flight_no + eachFlight.flight_no  + '_'

                flight.dur += eachFlight.dur
            
            if len(flightInfo['segs']) > 1:
                for i in range(0, len(flightInfo['segs']) - 1):
                        flight.dur += cal_wait_time(time_shifter(flightInfo['segs'][i]['atime']), time_shifter(flightInfo['segs'][i+1]['dtime']))

            flight.flight_no = flight.flight_no[:-1]
            flight.plane_no = flight.plane_no[:-1]
            flight.airline = flight.airline[:-1]

            flight.dept_id = flightInfo['segs'][0]['dport']
            flight.dest_id = flightInfo['segs'][-1]['aport']
            flight.dept_time = time_shifter(flightInfo['segs'][0]['dtime'])
            flight.dest_time = time_shifter(flightInfo['segs'][-1]['atime'])
            flight.dept_day = flight.dept_time.split('T')[0]

            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,flight.currency,\
                    flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)

    except Exception, e:
        logger.info(str(e))
        return [], {}
示例#21
0
        flight.plane_no = ''

        flight_dur = 0
        
        #direct
        if flight.stop == 0:
            for single_flight in segments[0]['flights']:
                flight.flight_no = single_flight['flightNumber']
                try:
                    flight.airline = Airline[single_flight['airCo']]
                except:
                    flight.airline = single_flight['airCo']

                flight.plane_no = single_flight['equipType']
                flight.dept_time = CalDateTime(single_flight['fromDate'], single_flight['fromTime'])
                flight.dest_time = CalDateTime(single_flight['toDate'], single_flight['toTime'])
                flight.dur = CalDur(single_flight['duration'])

                flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                        flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
                        flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

                tickets.append(flight_tuple)

        elif flight.stop == 1:
            for i in range(0, len(segments[0]['flights'])):
                for j in range(0, len(segments[1]['flights'])):

                    single_flight1 = segments[0]['flights'][i]
                    single_flight2 = segments[1]['flights'][j]
示例#22
0
def parsePage(content, dept_year):
    flights = []
    each_flight_content = each_flight_content_pat.findall(content)
    if len(each_flight_content) > 0:
        for each_flight_text in each_flight_content:
            flight = Flight()
            try:
                t_price = all_price_pat.findall(each_flight_text)[0]
                each_flight_text_temp = each_flight_content_temp_pat.findall(
                    each_flight_text)[0]
                each_part_flight = each_part_flight_pat.findall(
                    each_flight_text_temp)
                if len(each_part_flight) >= 1:
                    time.sleep(1)
                    flight.dept_id = airport_pat.findall(
                        each_part_flight[0])[0][1:-1]
                    flight.dest_id = airport_pat.findall(
                        each_part_flight[-1])[-1][1:-1]

                    dept_time_temp = dept_time_temp_pat.findall(
                        each_part_flight[0])[0]
                    dest_time_temp = dest_time_temp_pat.findall(
                        each_part_flight[-1])[-1]
                    flight.dept_day = dept_year + '-' + dept_time_temp[0].strip() + '-' + \
                            dept_time_temp[1].strip()
                    flight.dept_time = flight.dept_day + 'T' + dept_time_temp[
                        2].strip() + ':00'
                    flight.dest_time = dept_year + '-' +  dept_time_temp[0].strip() + '-' + \
                    dest_time_temp[0].strip() + 'T' + dest_time_temp[1].strip()[-5:] + ':00'

                    dept_time = int(time.mktime(datetime.datetime.strptime(flight.dept_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    dest_time = int(time.mktime(datetime.datetime.strptime(flight.dest_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    flight.dur = dest_time - dept_time + 3600

                    flight.stop = len(each_part_flight) - 1
                else:
                    continue
                flight.price = price_pat.findall(each_flight_text)[0]
                if len(flight.price) > 1:
                    flight.price = int(flight.price[0])
                else:
                    flight.price = int(t_price)

                try:
                    flight.tax = int(t_price) - flight.price
                except:
                    flight.tax = -1.0
                    logger.info('feifanFlight: Can not parse tax info!')

                flight.flight_no = ''
                flight.airline = ''
                flight.plane_no = ''
                for each_flight_text_t in each_part_flight:
                    flight.flight_no = flight.flight_no + flight_no_pat.findall(
                        each_flight_text_t)[0][:8].replace(' ', '') + '_'
                    flight.plane_no = flight.plane_no + plane_no_pat.findall(
                        each_flight_text_t)[0].replace(' ', '') + '_'
                    flight.airline = flight.airline + airline_pat.findall(
                        each_flight_text_t)[0].replace(' ', '') + '_'

                flight.flight_no = flight.flight_no[:-1]
                flight.plane_no = flight.plane_no[:-1]
                flight.airline = flight.airline[:-1]

                flight.return_rule = return_rule_pat.findall(each_flight_text)[0].replace('<p>','').replace('\n','') \
                        .replace('。','').replace('</p>','。').strip().replace(' ','')
                flight.currency = 'CNY'
                flight.source = 'feifan::feifan'
                flight.seat_type = '经济舱'
                #print flight.return_rule
                flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, flight.dest_id, \
                        flight.dept_day, flight.dept_time, flight.dest_time, flight.dur, flight.price, \
                        flight.tax, flight.surcharge, flight.currency, flight.seat_type, flight.source, \
                        flight.return_rule, flight.stop)

                flights.append(flight_tuple)
            except Exception, e:
                #logger.info('Parse this flight failed!' + str(e))
                continue
示例#23
0
def parse_page(content, price_dict):

    flights = {}
    tickets = []
    result = {'ticket':tickets, 'flight':flights}

    try:
        json_temp = json.loads(content)
    except:
        return result

    if json_temp['Status'] == 'SUCCESS':
        for each_flight_json in json_temp['datalist']:
            flight = Flight()
            try:
                flight.flight_no = each_flight_json['Key']
                flight.stop = int(each_flight_json['OW'])
                flight.price = price_dict[flight.flight_no]
                #error price
                flight.tax = each_flight_json['AIP'][0]['TX']
                flight.dept_id = each_flight_json['ODO'][0]['OL']
                flight.dest_id = each_flight_json['ODO'][-1]['DL']
                flight.dept_time = each_flight_json['ODO'][0]['DD'] + ':00'
                flight.dest_time = each_flight_json['ODO'][-1]['AD'] + ':00'
                flight.currency = 'CNY'
                
                flight.source = 'jijitong::jijitong'
                flight.seat_type = '经济舱'
                flight.dept_day = flight.dept_time.split('T')[0]

                flight_num = len(flight.flight_no.split('_'))

                if flight_num == 1:
                    dur_A_temp = each_flight_json['ODO'][0]['ET']
                    flight.dur = int(dur_A_temp) * 60
                else:
                    dur_A_temp = 0
                    dur_A_temp2 = 0
                    for dept_content in each_flight_json['ODO'][:flight_num]:
                        dur_A_temp += int(dept_content['ET']) * 60

                    for x in range(1,flight_num):
                        #print x
                        dept_time_str = each_flight_json['ODO'][x-1]['AD']
                        #print dept_time_str
                        dest_time_str = each_flight_json['ODO'][x]['DD']
                        #print dest_time_str
                        dur_A_temp2 += durCal(dept_time_str, dest_time_str)
                        #print dur_A_temp2
                    flight.dur = dur_A_temp + dur_A_temp2

                plane_no = ''
                airline = ''
                for each_json_temp in each_flight_json['ODO']:

                    plane_no = plane_no + each_json_temp['EQ'] + '_'
                    airline = airline + each_json_temp['COA'] + '_'

                    try:
                        eachflight = EachFlight()
                        eachflight.flight_no = each_json_temp['MA']
                        eachflight.dept_id = each_json_temp['OL']
                        eachflight.dest_id = each_json_temp['DL']
                        eachflight.airline = each_json_temp['COA']
                        eachflight.plane_no = each_json_temp['EQ']
                        eachflight.dept_time = each_json_temp['DD'] + ':00'
                        eachflight.dest_time = each_json_temp['AD'] + ':00'
                        eachflight.dur = int(each_json_temp['ET']) * 60

                        eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id

                        eachflight_tuple = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, eachflight.dept_id, \
                                eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, eachflight.dur)
                        flights[eachflight.flight_key] = eachflight_tuple
                        #print eachflight_tuple
                    except Exception, e:
                        print str(e)
                        continue

                flight.plane_no = plane_no[:-1]
                flight.airline = airline[:-1]
                flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, \
                        flight.dest_id, flight.dept_day, flight.dept_time, flight.dest_time, \
                        flight.dur, flight.price, flight.tax, flight.surcharge, flight.currency, \
                        flight.seat_type, flight.source, flight.return_rule, flight.stop) 

                tickets.append(flight_tuple)
            except Exception,e:
                logger.error('Can not parse flight info!' + str(e))
                continue
示例#24
0
            
            flight.dur = int(flight_info_list[0][5]) * 60
            flight.dept_id = flight_info_list[0][1]
            flight.dest_id = flight_info_list[0][3]
            dept_day_temp = flight_info_list[0][4]
            flight.dept_day = day_calculator(dept_day_temp)

            each_flight_list = flight_info_list[0][7]

            dept_time_mins = int(each_flight_list[0][5])

            flight.dept_time = time_calculator(flight.dept_day, dept_time_mins)
            dest_time_day = each_flight_list[-1][6]
            dest_time_mins = each_flight_list[-1][7]
            dest_time_day = day_calculator(dest_time_day)
            flight.dest_time = time_calculator(dest_time_day, dest_time_mins)

            flight_no = ''
            airline = ''
            plane_no = ''

            for each_flight_content in each_flight_list:

                flight_no += each_flight_content[2] + each_flight_content[10] + '_'
                airline_temp = ''
                plane_no += each_flight_content[12] + '_'
                try:
                    airline_temp = airline_dict[each_flight_content[2]]
                except:
                    logger.info('[AIRLINECODE]' + each_flight_content[2])
示例#25
0
def parser(content):
    #get section
    all_info = []
    flights = []

    section = section_pat.findall(content)

    for temp in section:
        every_flight = []

        #get flight number
        flights_temp = flight_no_pat.findall(temp)[0].split(':')
        if len(flights_temp) == 1:
            flight_string1 = flights_temp[0]
            flight_num = flight_string1[:flight_string1.find('-')]
        elif len(flights_temp) >= 2:
            flight_num2 = ''
            for flight_temp_aplha in flights_temp:
                flight_num2 = flight_num2 + '_' + flight_temp_aplha[:flight_temp_aplha
                                                                    .find('-')]
            flight_num = flight_num2
        every_flight.append(flight_num[1:])

        #get plane number
        every_flight.append('')

        #get airline name
        airline_name = airline_name_pat.findall(temp)[0]
        every_flight.append(airline_name)

        #get departure code
        departure_code = departure_code_pat.findall(temp)
        every_flight.append(departure_code[0])

        #get arrival code
        arrival_code = arrival_code_pat.findall(temp)
        arrival_code_length = len(arrival_code)
        every_flight.append(arrival_code[arrival_code_length - 1])

        #get departure time
        departure_time_temp = departure_time_pat.findall(temp)
        dep_time = '2014 ' + departure_time_temp[0][4:].replace(',', '')
        departure_time = str(datetime.strptime(dep_time,
                                               '%Y %d %b %I:%M %p')).replace(
                                                   ' ', 'T')
        every_flight.append(str(departure_time))

        #get arrival time
        arrival_time_temp = arrival_time_pat.findall(temp)
        arrival_time_length = len(arrival_time_temp)
        arr_time = '2014 ' + arrival_time_temp[arrival_time_length -
                                               1][4:].replace(',', '')
        arrival_time = str(datetime.strptime(arr_time,
                                             '%Y %d %b %I:%M %p')).replace(
                                                 ' ', 'T')
        every_flight.append(str(arrival_time))

        #get flight duration
        flight_dur = []
        #day_pat = re.compile(r'(\d*?d)\s*?()')

        flight_duration = flight_duration_pat.findall(temp)
        for each_time in flight_duration:
            day_num = day_pat.findall(each_time)
            hour_num = hour_pat.findall(each_time)
            min_num = min_pat.findall(each_time)
            if day_num != []:
                day_num_temp = int(day_num[0])
            else:
                day_num_temp = 0

            if hour_num != []:
                hour_num_temp = int(hour_num[0])
            else:
                hour_num_temp = 0

            if min_num != []:
                min_num_temp = int(min_num[0])
            else:
                min_num_temp = 0

            flight_dur = day_num_temp * 86400 + hour_num_temp * 3600 + min_num_temp * 60

        every_flight.append(flight_dur)
        """
        #get waiting time
        waiting_time_pat = re.compile(r'<div class="flight-leg2 fl-layover">(.*?)</div>')
        waiting_time = waiting_time_pat.findall(temp)
        """

        #get tax
        tax = -1.0
        every_flight.append(tax)

        #get surcharge
        surcharge = -1.0
        every_flight.append(surcharge)

        #get currency
        currency = "CNY"
        every_flight.append(currency)

        #get seat type
        seat_type = '经济舱'
        every_flight.append(seat_type)

        #get return rule
        return_rule = ''
        every_flight.append(return_rule)

        tickets = []
        tickets_info = tickets_info_pat.findall(temp)

        for each_ticket in tickets_info:
            ticket = []
            #get tickets price
            tickets_price_temp = tickets_price_pat.findall(each_ticket)[0]
            m = tickets_price_temp.find('>') + 1
            ticket_price = tickets_price_temp[m:].replace(',', '')
            ticket.append(ticket_price)

            #get ticket source
            ticket_web = tickets_web_pat.findall(each_ticket)[0]
            blnum = ticket_web.rfind('/')
            dnum = ticket_web.rfind('.')
            ticket_web_name = ticket_web[blnum + 1:dnum].replace('-', '_')
            m = ticket_web_name.find('.')
            if m > 0:
                ticket_web_name = ticket_web_name[:m]
            ticket.append('wego::' + ticket_web_name)

            #get others tickets links
            ticket_link = tickets_links_pat.findall(each_ticket)[0]
            ticket.append(ticket_link)
            tickets.append(ticket)
        every_flight.append(tickets)

        #get stops
        stops_temp = stops_pat.findall(every_flight[0])
        stops = len(stops_temp)
        every_flight.append(stops)

        #get update time
        update_time = time.strftime('%Y-%m-%dT%H:%M:%S',
                                    time.localtime(time.time()))
        every_flight.append(update_time)

        all_info.append(every_flight)

    for x in all_info:
        for y in range(len(x[13])):
            flight = Flight()
            flight.flight_no = x[0]
            flight.plane_no = 'NULL'  #x[1]
            flight.airline = x[2]
            flight.dept_id = x[3]
            flight.dest_id = x[4]
            flight.dept_time = x[5]
            flight.dest_time = x[6]
            flight.dur = x[7]
            flight.price = x[13][y][0]
            flight.tax = x[8]
            flight.surcharge = x[9]
            flight.currency = x[10]
            flight.seat_type = x[11]
            flight.source = x[13][y][1]
            flight.return_rule = 'NULL'  #x[12]
            #flight.book_url = 'http://www.wego.cn' + x[13][y][2]
            flight.stop = x[14]

            if 'T' in flight.dept_time:
                flight.dept_day = flight.dept_time.split('T')[0]
            else:
                pass


            flight_t = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                             flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,\
                             flight.tax,flight.surcharge,flight.currency,flight.seat_type,flight.source,\
                             flight.return_rule,flight.stop)
            flights.append(flight_t)
    return flights
示例#26
0
        flight_dur = 0

        #direct
        if flight.stop == 0:
            for single_flight in segments[0]['flights']:
                flight.flight_no = single_flight['flightNumber']
                try:
                    flight.airline = Airline[single_flight['airCo']]
                except:
                    flight.airline = single_flight['airCo']

                flight.plane_no = single_flight['equipType']
                flight.dept_time = CalDateTime(single_flight['fromDate'],
                                               single_flight['fromTime'])
                flight.dest_time = CalDateTime(single_flight['toDate'],
                                               single_flight['toTime'])
                flight.dur = CalDur(single_flight['duration'])

                flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                        flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
                        flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

                tickets.append(flight_tuple)

        elif flight.stop == 1:
            for i in range(0, len(segments[0]['flights'])):
                for j in range(0, len(segments[1]['flights'])):

                    single_flight1 = segments[0]['flights'][i]
                    single_flight2 = segments[1]['flights'][j]
示例#27
0
def transferFlight_parser(flightstring,date,airports_dict):
    flight = Flight()
    
    #中转航班,cols01-03有多个,cols04-06有一个
    cols01 = re.compile(r'<td class="cols01">(.*?)</td>',re.S).findall(flightstring)
    cols02 = re.compile(r'<td class="cols02">(.*?)</td>',re.S).findall(flightstring)
    cols03 = re.compile(r'<td class="cols03">(.*?)</td>',re.S).findall(flightstring)
    cols04 = re.compile(r'<td class="cols04">(.*?)</td>',re.S).findall(flightstring)[0]
    cols05 = re.compile(r'<td class="cols05">(.*?)</td>',re.S).findall(flightstring)[0]
    cols06 = re.compile(r'<td class="cols06">(.*?)</td>',re.S).findall(flightstring)[0]

    flight.stop = len(cols01) - 1

    if flight.stop > 2:
        return [] #暂定不要两次以上转机的方案

    aircorps = []
    flight_nos = []
    plane_types = []
    dept_times = []
    during_times = []
    airports = []
    days = 0
    timeinfo = []
    i = 0
    for i in range(0,len(cols01)):
        aircorp = re.compile(r'</span>(.*?)<br />',re.S).findall(cols01[i])[0].strip()
        flight_no = re.compile(r'<br />(.*?)&nbsp',re.S).findall(cols01[i])[0].strip()
        plane_type = re.compile(r'method="PlaneType" >(.*?)</a>',re.S).findall(cols01[i])[0].strip()

        dept_airport = re.compile(r'</span>(.*?)<br />',re.S).findall(cols02[i])[0].strip()
        if dept_airport.find('+2天') != -1:
            days += 2
        elif dept_airport.find('+1天') != -1:
            days += 1
        
        arr_time_airport = re.compile(r'<br />(.*?)$',re.S).findall(cols02[i])[0].strip()
        dept_time = re.compile(r'<span class=" t14 bold black">(.*?)</span>',re.S).findall(cols02[i])[0].strip()
        if arr_time_airport.find('+1天') == -1:
            arr_time, arr_airport = arr_time_airport.split(' ')[0].strip(),arr_time_airport.split(' ')[-1].strip()
        else:
            arr_time, arr_airport = arr_time_airport.split(' ')[0].strip().split('(')[0].strip(),arr_time_airport.split(' ')[-1].strip()
            if i == len(cols01) - 1:
                days += 1

        during_time =  re.compile(r'(.*?)<br />',re.S).findall(cols03[i])[0].strip()

        aircorps.append(aircorp)
        flight_nos.append(flight_no)
        plane_types.append(plane_type)
        dept_times.append(dept_time)
        during_times.append(during_time)
        airports.append(dept_airport)
        airports.append(arr_airport)
        timeinfo.append(dept_time)
        timeinfo.append(arr_time)
        timeinfo.append(during_time)

    during = 0#timeshifter(timeinfo)
    dept_date = datetime.datetime(string.atoi(date[0:4]),string.atoi(date[5:7]),string.atoi(date[8:]))
    dest_date = dept_date + datetime.timedelta(days)
    dept_daytime = date + 'T' + timeinfo[0]  + ':00'
    dest_daytime = str(dest_date).split(' ')[0] + 'T' + timeinfo[-2] + ':00'

    price = re.compile(r'</span>(.*?)</span>',re.S).findall(cols04)[0].strip()
    tax = re.compile(r'参考税 &yen;(.*?)<div class',re.S).findall(cols04)[0].strip()
   
    if flight.stop == 1:
        flight_no_str = flight_nos[0]+'_'+flight_nos[1]
        plane_no_str = plane_types[0]+'_'+plane_types[1]
        aircorp_str = aircorps[0]+'_'+aircorps[1] #也可以改为多家航空公司
    elif flight.stop == 2:
        flight_no_str = flight_nos[0]+'_'+flight_nos[1]+'_'+flight_nos[2]
        plane_no_str = plane_types[0]+'_'+plane_types[1]+'_'+flight_nos[2]
        aircorp_str = aircorps[0]+'_'+aircorps[1]+'_'+aircorps[2] #也可以改为多家航空公司
    else:
        return []

    flight.flight_no = flight_no_str
    flight.plane_no = plane_no_str
    flight.airline = aircorp_str
    if airports_dict.has_key(airports[0]):
        flight.dept_id = airports_dict[airports[0]]
    else:
        flight.dept_id = airports[0]
    if airports_dict.has_key(airports[-1]):
        flight.dest_id = airports_dict[airports[-1]]
    else:
        flight.dest_id = airports[-1]
    flight.dept_day = date
    flight.dept_time = dept_daytime
    flight.dest_time = dest_daytime
    flight.dur = during
    flight.price = int(price)
    flight.tax = int(tax)
    flight.surcharge = -1.0
    flight.currency = 'CNY'
    flight.seat_type = '经济舱'
    flight.source = 'elong::elong'             
    flight.return_rule = 'NULL'
    
    #flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
             #flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
             #flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

    #return flight_tuple
    return flight
示例#28
0
def ceair_page_parser(content):

    flights = {}
    tickets = []

    infos = json.loads(content[content.find('{'):])

    if infos['resultMsg'] != '':

        return tickets, flights

    currency = infos['currency']
    all_flights = infos['tripItemList'][0]['airRoutingList']

    for one_flight in all_flights:
        flight_info = one_flight['flightList']

        flight = Flight()
        flight.source = 'ceair::ceair'
        flight.stop = len(flight_info) - 1
        flight.currency = currency

        flight_nos = []
        plane_types = []
        airlines = []

        durings = []
        wait_times = []

        flight.dept_id = flight_info[0]['deptCd']
        flight.dest_id = flight_info[-1]['arrCd']
        flight.dept_time = standard_timeformatter(flight_info[0]['deptTime'])
        flight.dest_time = standard_timeformatter(flight_info[-1]['arrTime'])
        flight.dept_day = flight_info[0]['deptTime'].split(' ')[0]

        for item in flight_info:

            eachflight = EachFlight()

            eachflight.flight_no = item['flightNo']
            eachflight.airline = '东方航空'
            eachflight.plane_no = item['acfamily']
            eachflight.dept_id = item['deptCd']
            eachflight.dest_id = item['arrCd']
            eachflight.dept_time = standard_timeformatter(item['deptTime'])
            eachflight.dest_time = standard_timeformatter(item['arrTime'])
            eachflight.dur = hm_to_sec(item['duration'])

            eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id

            flights[eachflight.flight_key] = (eachflight.flight_no,
                                              eachflight.airline,
                                              eachflight.plane_no,
                                              eachflight.dept_id,
                                              eachflight.dest_id,
                                              eachflight.dept_time,
                                              eachflight.dest_time,
                                              eachflight.dur)

            flight_nos.append(eachflight.flight_no)
            plane_types.append(eachflight.plane_no)
            airlines.append(eachflight.airline)

            durings.append(eachflight.dur)
            wait_times.append(hm_to_sec(item['stayTime']))

        flight.flight_no = ''
        for flight_no in flight_nos:
            flight.flight_no = flight.flight_no + flight_no + '_'
        flight.flight_no = flight.flight_no[:-1]

        flight.plane_no = ''
        for plane_type in plane_types:
            flight.plane_no = flight.plane_no + plane_type + '_'
        flight.plane_no = flight.plane_no[:-1]

        flight.airline = ''
        for airline in airlines:
            flight.airline = flight.airline + airline + '_'
        flight.airline = flight.airline[:-1]

        flight.dur = 0
        for during in durings:
            flight.dur = flight.dur + during

        for wait_time in wait_times:
            flight.dur = flight.dur + wait_time

        if one_flight['priceDisp']['economy'] != '':
            flight.seat_type = '经济舱'
            flight.price = int(one_flight['priceDisp']['economy'])

            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
                    flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)

        if one_flight['priceDisp']['business'] != '':
            flight.seat_type = '商务舱'
            flight.price = int(one_flight['priceDisp']['business'])
            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
                    flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)
    return tickets, flights
示例#29
0
def transferFlight_parser(flightstring, date, airports_dict):
    flight = Flight()

    #中转航班,cols01-03有多个,cols04-06有一个
    cols01 = re.compile(r'<td class="cols01">(.*?)</td>',
                        re.S).findall(flightstring)
    cols02 = re.compile(r'<td class="cols02">(.*?)</td>',
                        re.S).findall(flightstring)
    cols03 = re.compile(r'<td class="cols03">(.*?)</td>',
                        re.S).findall(flightstring)
    cols04 = re.compile(r'<td class="cols04">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols05 = re.compile(r'<td class="cols05">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols06 = re.compile(r'<td class="cols06">(.*?)</td>',
                        re.S).findall(flightstring)[0]

    flight.stop = len(cols01) - 1

    if flight.stop > 2:
        return []  #暂定不要两次以上转机的方案

    aircorps = []
    flight_nos = []
    plane_types = []
    dept_times = []
    during_times = []
    airports = []
    days = 0
    timeinfo = []
    i = 0
    for i in range(0, len(cols01)):
        aircorp = re.compile(r'</span>(.*?)<br />',
                             re.S).findall(cols01[i])[0].strip()
        flight_no = re.compile(r'<br />(.*?)&nbsp',
                               re.S).findall(cols01[i])[0].strip()
        plane_type = re.compile(r'method="PlaneType" >(.*?)</a>',
                                re.S).findall(cols01[i])[0].strip()

        dept_airport = re.compile(r'</span>(.*?)<br />',
                                  re.S).findall(cols02[i])[0].strip()
        if dept_airport.find('+2天') != -1:
            days += 2
        elif dept_airport.find('+1天') != -1:
            days += 1

        arr_time_airport = re.compile(r'<br />(.*?)$',
                                      re.S).findall(cols02[i])[0].strip()
        dept_time = re.compile(r'<span class=" t14 bold black">(.*?)</span>',
                               re.S).findall(cols02[i])[0].strip()
        if arr_time_airport.find('+1天') == -1:
            arr_time, arr_airport = arr_time_airport.split(
                ' ')[0].strip(), arr_time_airport.split(' ')[-1].strip()
        else:
            arr_time, arr_airport = arr_time_airport.split(' ')[0].strip(
            ).split('(')[0].strip(), arr_time_airport.split(' ')[-1].strip()
            if i == len(cols01) - 1:
                days += 1

        during_time = re.compile(r'(.*?)<br />',
                                 re.S).findall(cols03[i])[0].strip()

        aircorps.append(aircorp)
        flight_nos.append(flight_no)
        plane_types.append(plane_type)
        dept_times.append(dept_time)
        during_times.append(during_time)
        airports.append(dept_airport)
        airports.append(arr_airport)
        timeinfo.append(dept_time)
        timeinfo.append(arr_time)
        timeinfo.append(during_time)

    during = timeshifter(timeinfo)
    dept_date = datetime.datetime(string.atoi(date[0:4]),
                                  string.atoi(date[5:7]),
                                  string.atoi(date[8:]))
    dest_date = dept_date + datetime.timedelta(days)
    dept_daytime = date + 'T' + timeinfo[0] + ':00'
    dest_daytime = str(dest_date).split(' ')[0] + 'T' + timeinfo[-2] + ':00'

    price = re.compile(r'</span>(.*?)</span>', re.S).findall(cols04)[0].strip()
    tax = re.compile(r'参考税 &yen;(.*?)<div class',
                     re.S).findall(cols04)[0].strip()

    if flight.stop == 1:
        flight_no_str = flight_nos[0] + '_' + flight_nos[1]
        plane_no_str = plane_types[0] + '_' + plane_types[1]
        aircorp_str = aircorps[0] + '_' + aircorps[1]  #也可以改为多家航空公司
    elif flight.stop == 2:
        flight_no_str = flight_nos[0] + '_' + flight_nos[1] + '_' + flight_nos[
            2]
        plane_no_str = plane_types[0] + '_' + plane_types[
            1] + '_' + flight_nos[2]
        aircorp_str = aircorps[0] + '_' + aircorps[1] + '_' + aircorps[
            2]  #也可以改为多家航空公司
    else:
        return []

    flight.flight_no = flight_no_str
    flight.plane_no = plane_no_str
    flight.airline = aircorp_str
    if airports_dict.has_key(airports[0]):
        flight.dept_id = airports_dict[airports[0]]
    else:
        flight.dept_id = airports[0]
    if airports_dict.has_key(airports[-1]):
        flight.dest_id = airports_dict[airports[-1]]
    else:
        flight.dest_id = airports[-1]
    flight.dept_day = date
    flight.dept_time = dept_daytime
    flight.dest_time = dest_daytime
    flight.dur = during
    flight.price = int(price)
    flight.tax = int(tax)
    flight.surcharge = -1.0
    flight.currency = 'CNY'
    flight.seat_type = '经济舱'
    flight.source = 'elong::elong'
    flight.return_rule = 'NULL'

    flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
             flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
             flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

    return flight_tuple
示例#30
0
        for flight_info in flight_infos:
            flight = Flight()
            flight_aircorp = ''
            flight_plane = ''
            flight_no = ''

            flight.price = int(float(flight_info['adultSalesPrice']) + 1)#解析出数据是小数,取int加1
            flight.tax = int(float(flight_info['adultTax']) + 1)
            flight.dur = int(flight_info['tripTime']) * 60

            segments = flight_info['trips'][0]['segments']
            
            flight.dept_id = segments[0]['departureAirportCode']
            flight.dest_id = segments[-1]['arrivalAirportCode']
            flight.dept_time = timeshifter(segments[0]['departureTime'])
            flight.dest_time = timeshifter(segments[-1]['arrivalTime'])
            flight.dept_day = flight.dept_time.split('T')[0]

            flight.currency = currency
            flight.seat_type = '经济舱'
            flight.stop = len(segments) - 1

            flight.source = source

            for segment in segments:
                flight_aircorp += segment['airlineName'] + '_'
                flight_plane += segment['aircraftCode'].split(' ')[-1] + '_' #Airbus A330 -> A330
                flight_no += segment['airlineCode'] + segment['flightNumber'] + '_' #拼接航空公司代码和航班代码

                singleflight = EachFlight()
                singleflight.flight_no = segment['airlineCode'] + segment['flightNumber']
示例#31
0
                logger.error('airfranceFlight :: price_value class not found!')
                result['error'] = PARSE_ERROR
                return result

            price = float(price)

            flight = Flight()
            flight.tax = 0
            flight.flight_no = flight_no
            flight.plane_type = plane_type
            flight.flight_corp = flight_corp
            flight.dept_id = dept_id
            flight.dest_id = dest_id
            flight.dept_day = dept_day
            flight.dept_time = dept_time
            flight.dest_time = dest_time
            flight.dur = dur
            flight.price = price
            flight.currency = currency
            flight.seat_type = seat_type
            flight.real_class = real_class
            flight.stop_id = stop_id
            flight.stop_time = stop_time
            flight.daydiff = daydiff
            flight.source = source
            flight.stop = stop
            flight_tuple = (flight.flight_no,flight.plane_type,flight.flight_corp,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.rest,flight.price,flight.tax,flight.surcharge,\
                    flight.promotion,flight.currency,flight.seat_type,flight.real_class,flight.package,flight.stop_id,flight.stop_time,\
                    flight.daydiff,flight.source,flight.return_rule,flight.change_rule,flight.stop,flight.share_flight,flight.stopby,\
                    flight.baggage,flight.transit_visa,flight.reimbursement,flight.flight_meals,flight.ticket_type,flight.others_info)
示例#32
0
def parser(content):
    #get section
    all_info = []
    flights = []

    section = section_pat.findall(content)

    for temp in section:
        every_flight = []

        #get flight number
        flights_temp = flight_no_pat.findall(temp)[0].split(':')
        if len(flights_temp) == 1:
            flight_string1 = flights_temp[0]
            flight_num = flight_string1[:flight_string1.find('-')]
        elif len(flights_temp) >= 2:
            flight_num2 = ''
            for flight_temp_aplha in flights_temp:
                flight_num2 = flight_num2 + '_' + flight_temp_aplha[:flight_temp_aplha.find('-')]
            flight_num = flight_num2
        every_flight.append(flight_num[1:])

        #get plane number
        every_flight.append('')

        #get airline name
        airline_name = airline_name_pat.findall(temp)[0]
        every_flight.append(airline_name)

        #get departure code
        departure_code = departure_code_pat.findall(temp)
        every_flight.append(departure_code[0])

        #get arrival code
        arrival_code = arrival_code_pat.findall(temp)
        arrival_code_length = len(arrival_code)
        every_flight.append(arrival_code[arrival_code_length-1])

        #get departure time
        departure_time_temp = departure_time_pat.findall(temp)
        dep_time = '2014 ' + departure_time_temp[0][4:].replace(',','')
        departure_time = str(datetime.strptime(dep_time,'%Y %d %b %I:%M %p')).replace(' ','T')
        every_flight.append(str(departure_time))

        #get arrival time
        arrival_time_temp = arrival_time_pat.findall(temp)
        arrival_time_length = len(arrival_time_temp)
        arr_time = '2014 ' + arrival_time_temp[arrival_time_length-1][4:].replace(',','')
        arrival_time = str(datetime.strptime(arr_time, '%Y %d %b %I:%M %p')).replace(' ','T')
        every_flight.append(str(arrival_time))

        #get flight duration
        flight_dur = []
        #day_pat = re.compile(r'(\d*?d)\s*?()')

        flight_duration = flight_duration_pat.findall(temp)
        for each_time in flight_duration:
            day_num = day_pat.findall(each_time)
            hour_num = hour_pat.findall(each_time)
            min_num = min_pat.findall(each_time)
            if day_num != []:
                day_num_temp = int(day_num[0])
            else:
                day_num_temp = 0

            if hour_num != []:
                hour_num_temp = int(hour_num[0])
            else:
                hour_num_temp = 0

            if min_num != []:
                min_num_temp = int(min_num[0])
            else:
                min_num_temp = 0

            flight_dur = day_num_temp * 86400 + hour_num_temp * 3600 + min_num_temp * 60

        every_flight.append(flight_dur)

        """
        #get waiting time
        waiting_time_pat = re.compile(r'<div class="flight-leg2 fl-layover">(.*?)</div>')
        waiting_time = waiting_time_pat.findall(temp)
        """

        #get tax
        tax = -1.0
        every_flight.append(tax)

        #get surcharge
        surcharge = -1.0
        every_flight.append(surcharge)

        #get currency
        currency = "CNY"
        every_flight.append(currency)

        #get seat type
        seat_type = '经济舱'
        every_flight.append(seat_type)

         #get return rule
        return_rule = ''
        every_flight.append(return_rule)

        tickets = []
        tickets_info = tickets_info_pat.findall(temp)

        for each_ticket in tickets_info:
            ticket = []
            #get tickets price
            tickets_price_temp = tickets_price_pat.findall(each_ticket)[0]
            m = tickets_price_temp.find('>') + 1
            ticket_price = tickets_price_temp[m:].replace(',','')
            ticket.append(ticket_price)

            #get ticket source
            ticket_web = tickets_web_pat.findall(each_ticket)[0]
            blnum = ticket_web.rfind('/')
            dnum = ticket_web.rfind('.')
            ticket_web_name = ticket_web[blnum+1:dnum].replace('-','_')
            m = ticket_web_name.find('.')
            if m > 0:
                ticket_web_name = ticket_web_name[:m]
            ticket.append('wego::' + ticket_web_name)

            #get others tickets links
            ticket_link = tickets_links_pat.findall(each_ticket)[0]
            ticket.append(ticket_link)
            tickets.append(ticket)
        every_flight.append(tickets)

        #get stops
        stops_temp = stops_pat.findall(every_flight[0])
        stops = len(stops_temp)
        every_flight.append(stops)

        #get update time
        update_time = time.strftime('%Y-%m-%dT%H:%M:%S',time.localtime(time.time()))
        every_flight.append(update_time)

        all_info.append(every_flight)

    for x in all_info:
        for y in range(len(x[13])):
            flight = Flight()
            flight.flight_no = x[0]
            flight.plane_no = 'NULL'#x[1]
            flight.airline = x[2]
            flight.dept_id = x[3]
            flight.dest_id = x[4]
            flight.dept_time = x[5]
            flight.dest_time = x[6]
            flight.dur = x[7]
            flight.price = x[13][y][0]
            flight.tax = x[8]
            flight.surcharge = x[9]
            flight.currency = x[10]
            flight.seat_type = x[11]
            flight.source = x[13][y][1]
            flight.return_rule = 'NULL'#x[12]
            #flight.book_url = 'http://www.wego.cn' + x[13][y][2]
            flight.stop = x[14]
            
            if 'T' in flight.dept_time:
                flight.dept_day = flight.dept_time.split('T')[0]
            else:
                pass
        

            flight_t = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                             flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,\
                             flight.tax,flight.surcharge,flight.currency,flight.seat_type,flight.source,\
                             flight.return_rule,flight.stop)
            flights.append(flight_t)
    return flights
示例#33
0
def vuelingparser(content,flight_no,req_dept_time):
    #allinfos = []
    #get flight num
    flight_num_list = []
    flight_num_info_temp = flight_no_pat.findall(content)
    if flight_num_info_temp != []:
        for flight_num_info in flight_num_info_temp:
            flight_num_temp_1 = flight_num_info.find('|')
            flight_num_temp_2 = flight_num_info.rfind('~^')

            if flight_num_temp_2 > 0:
                flight_num = flight_num_info[flight_num_temp_1+1:flight_num_temp_1+8]\
                        .replace('~','') + '_' + \
                        flight_num_info[flight_num_temp_2+2:flight_num_temp_2+9].replace('~','')
            else:
                flight_num = flight_num_info[flight_num_temp_1+1:flight_num_temp_1+8].replace('~','')
            flight_num_list.append(flight_num)

        #get station information
        #set station_temp,dept_id and dest_id pattern
        dept_id_list = []
        dest_id_list = []
        station_temp = station_temp_pat.findall(content)
        for station_temp_a in station_temp:
            station_info = station_temp_a.replace('\n', '').replace(' ','')
            dept_id_num = station_info.find('):')
            dept_id = station_info[dept_id_num-3:dept_id_num]
            dest_id_num = station_info.rfind(')')
            dest_id = station_info[dest_id_num-3:dest_id_num]
            dept_id_list.append(dept_id)
            dest_id_list.append(dest_id)

        #get flight_time information
        #set dept_time,dest_time,flight_time pattern
        dept_time_list = []
        dest_time_list = []
        stops_list = []

        flight_time_temp = flight_time_pat.findall(content)
        for time_temp in flight_time_temp:
            dept_time = dept_time_pat.findall(time_temp)[0]
            dest_time = dest_time_pat.findall(time_temp)[0]
            flight_num = flight_num_pat.findall(time_temp)[0]
            dept_time_list.append(dept_time)
            dest_time_list.append(dest_time)
            stops_list.append(flight_num)

        #get each kind flight price
        price_list = []
        price_text = price_pat.findall(content)
        for price_temp in price_text:
            price_temp_num = price_temp.rfind('>') + 1
            each_price = price_temp[price_temp_num:-3].replace(',','.')
            price_list.append(each_price)

        #set seat_type
        seat_type_list = ['经济舱','超经济舱','公务舱']
        seat_type = []

        for i in range(len(price_list)):
            if i % 3 == 0:
                seat_type.append(seat_type_list[0])
            elif i % 3 == 1:
                seat_type.append(seat_type_list[1])
            else:
                seat_type.append(seat_type_list[2])

        flight_no_l,dept_id_l,dest_id_l,dept_time_l,dest_time_l,stops_l = [],[],[],[],[],[]
        for j in range(len(stops_list)):
            for k in range(3):
                flight_no_l.append(flight_num_list[j])
                dept_id_l.append(dept_id_list[j])
                dest_id_l.append(dest_id_list[j])
                dept_time_l.append(dept_time_list[j])
                dest_time_l.append(dest_time_list[j])
                stops_l.append(stops_list[j])

        for i in range(len(price_list)):
            flight = Flight()
            flight.flight_no = flight_no_l[i]
            flight.plane_no = 'NULL'
            flight.airline = 'vueling'
            flight.dept_id = dept_id_l[i]
            flight.dest_id = dest_id_l[i]
            flight.dept_time = dept_time_l[i]
            flight.dest_time = dest_time_l[i]

            dept_time_c = str(dept_time_l[i]).replace('T',',').replace('-',',').replace(':',',').split(',') + [0,0,0]
            dept_time_t = date_handle(dept_time_c)
            dest_time_c = str(dest_time_l[i]).replace('T',',').replace('-',',').replace(':',',').split(',') + [0,0,0]
            dest_time_t = date_handle(dest_time_c)
            flight.dur = int(time.mktime(dest_time_t)) - int(time.mktime(dept_time_t))
            flight.price = price_list[i]
            flight.dept_day = flight.dept_time[:10]
            flight.currency = 'EUR'
            flight.seat_type = seat_type[i]
            flight.source = 'vueling:vueling'
            flight.stop = stops_l[i]
            if flight.flight_no == flight_no and flight.dept_time == req_dept_time:
                return flight.price

        '''
            flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, \
                    flight.dest_id, flight.dept_day, flight.dept_time, flight.dest_time, \
                    flight.dur, flight.price, flight.tax, flight.surcharge, flight.currency, \
                    flight.seat_type, flight.source, flight.return_rule, flight.stop)

            allinfos.append(flight_tuple)
        return allinfos
        '''
    else:
        return -1
示例#34
0
def ValidatePage(content, dept_year, flight_no, orig_dept_time):

    result = -1

    each_flight_content = each_flight_content_pat.findall(content)

    if len(each_flight_content) > 0:
        for each_flight_text in each_flight_content:
            flight = Flight()
            try:
                t_price = all_price_pat.findall(each_flight_text)[0]
                each_flight_text_temp = each_flight_content_temp_pat.findall(
                    each_flight_text)[0]
                each_part_flight = each_part_flight_pat.findall(
                    each_flight_text_temp)
                if len(each_part_flight) >= 1:
                    flight.dept_id = airport_pat.findall(
                        each_part_flight[0])[0][1:-1]
                    flight.dest_id = airport_pat.findall(
                        each_part_flight[-1])[-1][1:-1]

                    dept_time_temp = dept_time_temp_pat.findall(
                        each_part_flight[0])[0]
                    dest_time_temp = dest_time_temp_pat.findall(
                        each_part_flight[-1])[-1]
                    flight.dept_day = dept_year + '-' + dept_time_temp[
                        0].strip() + '-' + dept_time_temp[1].strip()

                    flight.dept_time = flight.dept_day + 'T' + dept_time_temp[
                        2].strip() + ':00'
                    flight.dest_time = dept_year + '-' +  dept_time_temp[0].strip() + '-' + \
                            dest_time_temp[0].strip() + 'T' + dest_time_temp[1].strip()[-5:] + ':00'
                    dest_time_temp[0].strip() + 'T' + dest_time_temp[1].strip(
                    )[-5:] + ':00'

                    dept_time = int(
                        time.mktime(
                            datetime.datetime.strptime(
                                flight.dept_time,
                                '%Y-%m-%dT%H:%M:%S').timetuple()))
                    dest_time = int(
                        time.mktime(
                            datetime.datetime.strptime(
                                flight.dest_time,
                                '%Y-%m-%dT%H:%M:%S').timetuple()))
                else:
                    continue
                flight.price = price_pat.findall(each_flight_text)[0]
                if len(flight.price) > 1:
                    flight.price = int(flight.price[0])
                else:
                    flight.price = int(t_price)

                flight.flight_no = ''
                for each_flight_text_t in each_part_flight:
                    flight.flight_no = flight.flight_no + flight_no_pat.findall(
                        each_flight_text_t)[0][:8].replace(' ', '') + '_'

                flight.flight_no = flight.flight_no[:-1]

                if flight.flight_no == flight_no and flight.dept_time == orig_dept_time:
                    result = flight.price
                    break
            except Exception, e:
                continue
示例#35
0
def directFlight_parser(flightstring,date,airports_dict):
    flight = Flight()

    #直达航班提取出长度为1的列表
    cols01 = re.compile(r'<td class="cols01">(.*?)</td>',re.S).findall(flightstring)[0]
    cols02 = re.compile(r'<td class="cols02">(.*?)</td>',re.S).findall(flightstring)[0]
    cols03 = re.compile(r'<td class="cols03">(.*?)</td>',re.S).findall(flightstring)[0]
    cols04 = re.compile(r'<td class="cols04">(.*?)</td>',re.S).findall(flightstring)[0]
    cols05 = re.compile(r'<td class="cols05">(.*?)</td>',re.S).findall(flightstring)[0]
    cols06 = re.compile(r'<td class="cols06">(.*?)</td>',re.S).findall(flightstring)[0]

    aircorp = re.compile(r'</span>(.*?)<br />',re.S).findall(cols01)[0].strip()
    flight_no = re.compile(r'<br />(.*?)&nbsp',re.S).findall(cols01)[0].strip()
    plane_type = re.compile(r'method="PlaneType" >(.*?)</a>',re.S).findall(cols01)[0].strip()

    airports = []
    days = 0
    dept_airport = re.compile(r'</span>(.*?)<br />',re.S).findall(cols02)[0].strip()
    dept_time = re.compile(r'<span class=" t14 bold black">(.*?)</span>',re.S).findall(cols02)[0].strip()
    arr_time_airport = re.compile(r'<br />(.*?)$',re.S).findall(cols02)[0].strip()
    if arr_time_airport.find('+1天') == -1:
        arr_time, arr_airport = arr_time_airport.split(' ')[0].strip(),arr_time_airport.split(' ')[-1].strip()
    else:
        days += 1
        arr_time, arr_airport = arr_time_airport.split(' ')[0].strip().split('(')[0].strip(),arr_time_airport.split(' ')[-1].strip()
    airports.append(dept_airport)
    airports.append(arr_airport)

    timeinfo = []
    during_time =  re.compile(r'(.*?)<br />',re.S).findall(cols03)[0].strip()
    timeinfo.append(dept_time)
    timeinfo.append(arr_time)
    timeinfo.append(during_time)
    
    during = 0#timeshifter(timeinfo)
    dept_date = datetime.datetime(string.atoi(date[0:4]),string.atoi(date[5:7]),string.atoi(date[8:]))
    dest_date = dept_date + datetime.timedelta(days)
    dept_daytime = date + 'T' + dept_time + ':00'
    dest_daytime = str(dest_date).split(' ')[0] + 'T' + arr_time + ':00'

    price = re.compile(r'</span>(.*?)</span>',re.S).findall(cols04)[0].strip()
    tax = re.compile(r'参考税 &yen;(.*?)<div class',re.S).findall(cols04)[0].strip()

    flight.flight_no = flight_no
    flight.plane_no = plane_type
    flight.airline = aircorp
    if airports_dict.has_key(airports[0]):
        flight.dept_id = airports_dict[airports[0]]
    else:
        flight.dept_id = airports[0]
    if airports_dict.has_key(airports[-1]):
        flight.dest_id = airports_dict[airports[-1]]
    else:
        flight.dest_id = airports[-1]
    flight.dept_day = date
    flight.dept_time = dept_daytime
    flight.dest_time = dest_daytime
    flight.dur = during
    flight.price = float(price)
    flight.tax = float(tax)
    flight.surcharge = -1.0
    flight.currency = 'CNY'
    flight.seat_type = '经济舱'
    flight.source = 'elong::elong'
    flight.return_rule = 'NULL'
    flight.stop = 0

    #flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
            #flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
            #flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

    #return flight_tuple
    return flight
示例#36
0
def parsePage(content,dept_year, flight_no, orig_dept_time):

    result = -1

    each_flight_content = each_flight_content_pat.findall(content)

    if len(each_flight_content) > 0: 
        for each_flight_text in each_flight_content:
            flight = Flight()
            try:
                t_price = all_price_pat.findall(each_flight_text)[0]
                each_flight_text_temp = each_flight_content_temp_pat.findall(each_flight_text)[0]
                each_part_flight = each_part_flight_pat.findall(each_flight_text_temp)
                if len(each_part_flight) >= 1:
                    flight.dept_id = airport_pat.findall(each_part_flight[0])[0][1:-1]                    
                    flight.dest_id = airport_pat.findall(each_part_flight[-1])[-1][1:-1]
                     
                    dept_time_temp = dept_time_temp_pat.findall(each_part_flight[0])[0]
                    dest_time_temp = dest_time_temp_pat.findall(each_part_flight[-1])[-1]
                    flight.dept_day = dept_year + '-' + dept_time_temp[0].strip() + '-' + \
                            dept_time_temp[1].strip()
                    flight.dept_time = flight.dept_day + 'T' + dept_time_temp[2].strip() + ':00'
                    flight.dest_time = dept_year + '-' +  dept_time_temp[0].strip() + '-' + \
                    dest_time_temp[0].strip() + 'T' + dest_time_temp[1].strip()[-5:] + ':00'
                    
                    dept_time = int(time.mktime(datetime.datetime.strptime(flight.dept_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    dest_time = int(time.mktime(datetime.datetime.strptime(flight.dest_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    flight.dur = dest_time - dept_time + 3600
                    
                    flight.stop = len(each_part_flight) - 1
                else:
                    continue
                flight.price = price_pat.findall(each_flight_text)[0]
                if len(flight.price) > 1:
                    flight.price = int(flight.price[0])
                else:
                    flight.price = int(t_price)

                try:
                    flight.tax = int(t_price) - flight.price
                except:
                    flight.tax = -1.0
                    logger.info('Can not parse tax info!')

                flight.flight_no = ''
                flight.airline = ''
                flight.plane_no = ''
                for each_flight_text_t in each_part_flight:
                    flight.flight_no = flight.flight_no + flight_no_pat.findall(each_flight_text_t)[0][:8].replace(' ','') + '_'
                    flight.plane_no = flight.plane_no + plane_no_pat.findall(each_flight_text_t)[0].replace(' ','') + '_'
                    flight.airline = flight.airline + airline_pat.findall(each_flight_text_t)[0].replace(' ','') + '_'
                
                flight.flight_no = flight.flight_no[:-1]
                flight.plane_no = flight.plane_no[:-1]
                flight.airline = flight.airline[:-1]
                
                flight.return_rule = return_rule_pat.findall(each_flight_text)[0].replace('<p>','').replace('\n','') \
                        .replace('。','').replace('</p>','。').strip().replace(' ','')
                flight.currency = 'CNY'
                flight.source = 'feifan::feifan'
                flight.seat_type = '经济舱'

                if flight.flight_no == flight_no and flight.dept_time == orig_dept_time:
                    result = flight.price
                    break
            except Exception, e:
                continue
示例#37
0
def vuelingparser(content, flight_no, req_dept_time):
    #allinfos = []
    #get flight num
    flight_num_list = []
    flight_num_info_temp = flight_no_pat.findall(content)
    if flight_num_info_temp != []:
        for flight_num_info in flight_num_info_temp:
            flight_num_temp_1 = flight_num_info.find('|')
            flight_num_temp_2 = flight_num_info.rfind('~^')

            if flight_num_temp_2 > 0:
                flight_num = flight_num_info[flight_num_temp_1+1:flight_num_temp_1+8]\
                        .replace('~','') + '_' + \
                        flight_num_info[flight_num_temp_2+2:flight_num_temp_2+9].replace('~','')
            else:
                flight_num = flight_num_info[flight_num_temp_1 +
                                             1:flight_num_temp_1 + 8].replace(
                                                 '~', '')
            flight_num_list.append(flight_num)

        #get station information
        #set station_temp,dept_id and dest_id pattern
        dept_id_list = []
        dest_id_list = []
        station_temp = station_temp_pat.findall(content)
        for station_temp_a in station_temp:
            station_info = station_temp_a.replace('\n', '').replace(' ', '')
            dept_id_num = station_info.find('):')
            dept_id = station_info[dept_id_num - 3:dept_id_num]
            dest_id_num = station_info.rfind(')')
            dest_id = station_info[dest_id_num - 3:dest_id_num]
            dept_id_list.append(dept_id)
            dest_id_list.append(dest_id)

        #get flight_time information
        #set dept_time,dest_time,flight_time pattern
        dept_time_list = []
        dest_time_list = []
        stops_list = []

        flight_time_temp = flight_time_pat.findall(content)
        for time_temp in flight_time_temp:
            dept_time = dept_time_pat.findall(time_temp)[0]
            dest_time = dest_time_pat.findall(time_temp)[0]
            flight_num = flight_num_pat.findall(time_temp)[0]
            dept_time_list.append(dept_time)
            dest_time_list.append(dest_time)
            stops_list.append(flight_num)

        #get each kind flight price
        price_list = []
        price_text = price_pat.findall(content)
        for price_temp in price_text:
            price_temp_num = price_temp.rfind('>') + 1
            each_price = price_temp[price_temp_num:-3].replace(',', '.')
            price_list.append(each_price)

        #set seat_type
        seat_type_list = ['经济舱', '超经济舱', '公务舱']
        seat_type = []

        for i in range(len(price_list)):
            if i % 3 == 0:
                seat_type.append(seat_type_list[0])
            elif i % 3 == 1:
                seat_type.append(seat_type_list[1])
            else:
                seat_type.append(seat_type_list[2])

        flight_no_l,dept_id_l,dest_id_l,dept_time_l,dest_time_l,stops_l = [],[],[],[],[],[]
        for j in range(len(stops_list)):
            for k in range(3):
                flight_no_l.append(flight_num_list[j])
                dept_id_l.append(dept_id_list[j])
                dest_id_l.append(dest_id_list[j])
                dept_time_l.append(dept_time_list[j])
                dest_time_l.append(dest_time_list[j])
                stops_l.append(stops_list[j])

        for i in range(len(price_list)):
            flight = Flight()
            flight.flight_no = flight_no_l[i]
            flight.plane_no = 'NULL'
            flight.airline = 'vueling'
            flight.dept_id = dept_id_l[i]
            flight.dest_id = dest_id_l[i]
            flight.dept_time = dept_time_l[i]
            flight.dest_time = dest_time_l[i]

            dept_time_c = str(dept_time_l[i]).replace('T', ',').replace(
                '-', ',').replace(':', ',').split(',') + [0, 0, 0]
            dept_time_t = date_handle(dept_time_c)
            dest_time_c = str(dest_time_l[i]).replace('T', ',').replace(
                '-', ',').replace(':', ',').split(',') + [0, 0, 0]
            dest_time_t = date_handle(dest_time_c)
            flight.dur = int(time.mktime(dest_time_t)) - int(
                time.mktime(dept_time_t))
            flight.price = price_list[i]
            flight.dept_day = flight.dept_time[:10]
            flight.currency = 'EUR'
            flight.seat_type = seat_type[i]
            flight.source = 'vueling:vueling'
            flight.stop = stops_l[i]
            if flight.flight_no == flight_no and flight.dept_time == req_dept_time:
                return flight.price
        '''
            flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, \
                    flight.dest_id, flight.dept_day, flight.dept_time, flight.dest_time, \
                    flight.dur, flight.price, flight.tax, flight.surcharge, flight.currency, \
                    flight.seat_type, flight.source, flight.return_rule, flight.stop)

            allinfos.append(flight_tuple)
        return allinfos
        '''
    else:
        return -1
示例#38
0
def ceair_page_parser(content):
    
    flights = {}
    tickets = []

    infos = json.loads(content[content.find('{'):])
    
    if infos['resultMsg'] != '':

        return tickets, flights

    currency = infos['currency']
    all_flights = infos['tripItemList'][0]['airRoutingList']

    for one_flight in all_flights:
        flight_info = one_flight['flightList']

        flight = Flight()
        flight.source = 'ceair::ceair'
        flight.stop = len(flight_info) - 1
        flight.currency = currency

        flight_nos = []
        plane_types = []
        airlines = []
        
        durings = []
        wait_times = []
        
        flight.dept_id = flight_info[0]['deptCd']
        flight.dest_id = flight_info[-1]['arrCd']
        flight.dept_time = standard_timeformatter(flight_info[0]['deptTime'])
        flight.dest_time = standard_timeformatter(flight_info[-1]['arrTime'])
        flight.dept_day = flight_info[0]['deptTime'].split(' ')[0]
        
        for item in flight_info:

            eachflight = EachFlight()

            eachflight.flight_no = item['flightNo']
            eachflight.airline = '东方航空'
            eachflight.plane_no = item['acfamily']
            eachflight.dept_id = item['deptCd']
            eachflight.dest_id = item['arrCd']
            eachflight.dept_time = standard_timeformatter(item['deptTime'])
            eachflight.dest_time = standard_timeformatter(item['arrTime'])
            eachflight.dur = hm_to_sec(item['duration'])

            eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id

            flights[eachflight.flight_key] = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, eachflight.dept_id, eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, eachflight.dur)

            flight_nos.append(eachflight.flight_no)
            plane_types.append(eachflight.plane_no)
            airlines.append(eachflight.airline)

            durings.append(eachflight.dur)
            wait_times.append(hm_to_sec(item['stayTime']))

        flight.flight_no = ''
        for flight_no in flight_nos:
            flight.flight_no = flight.flight_no + flight_no + '_'
        flight.flight_no = flight.flight_no[:-1]

        flight.plane_no = ''
        for plane_type in plane_types:
            flight.plane_no = flight.plane_no + plane_type + '_'
        flight.plane_no = flight.plane_no[:-1]

        flight.airline = ''
        for airline in airlines:
            flight.airline = flight.airline + airline + '_'
        flight.airline = flight.airline[:-1]

        flight.dur = 0
        for during in durings:
            flight.dur = flight.dur + during
        
        for wait_time in wait_times:
            flight.dur = flight.dur + wait_time

        if one_flight['priceDisp']['economy'] != '':
            flight.seat_type = '经济舱'
            flight.price = int(one_flight['priceDisp']['economy'])

            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
                    flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)

        if one_flight['priceDisp']['business'] != '':
            flight.seat_type = '商务舱'
            flight.price = int(one_flight['priceDisp']['business'])
            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
                    flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)
    return tickets, flights