def united(origin, destination, searchdate, searchkey): dt = datetime.datetime.strptime(searchdate, '%m/%d/%Y') sys.stdout=codecs.getwriter('utf-8')(sys.stdout) date = dt.strftime('%Y-%m-%d') date_format = dt.strftime('%a, %b %-d') payload_date = dt.strftime('%d, %b %Y') currentdatetime = datetime.datetime.now() stime = currentdatetime.strftime('%Y-%m-%d %H:%M:%S') searchkey = searchkey if not DEV_LOCAL: db = customfunction.dbconnection() cursor = db.cursor() url = "https://www.united.com/ual/en/us/flight-search/book-a-flight/results/awd?f=" + origin + "&t=" + destination + "&d=" + date + "&tt=1&at=1&sc=7&px=1&taxng=1&idx=1" display = Display(visible=0, size=(800, 600)) display.start() chromedriver = "/usr/bin/chromedriver" os.environ["webdriver.chrome.driver"] = chromedriver #driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any']) #driver.set_window_size(1120, 550) driver = webdriver.Chrome(chromedriver) try: driver.get(url) time.sleep(2) driver.execute_script(""" (function(XHR) { "use strict"; var count = 0; var open = XHR.prototype.open; var send = XHR.prototype.send; XHR.prototype.open = function(method, url, async, user, pass) { this._url = url; open.call(this, method, url, async, user, pass); }; XHR.prototype.send = function(data) { var self = this; var oldOnReadyStateChange; var url = this._url; function onReadyStateChange() { if(self.readyState == 4) { var json_response = JSON.parse(self.responseText); if(json_response.hasOwnProperty("status") && json_response["status"] == "success" && json_response.hasOwnProperty("data")) { var tripdata = json_response["data"] if(tripdata["Trips"]) { var element = document.createElement('div'); element.id = "interceptedResponse"; element.appendChild(document.createTextNode("")); document.body.appendChild(element); element.appendChild(document.createTextNode(self.responseText)); count = count+1; } } } if(oldOnReadyStateChange) { oldOnReadyStateChange(); } } /* Set xhr.noIntercept to true to disable the interceptor for a particular call */ if(!this.noIntercept) { if(this.addEventListener) { this.addEventListener("readystatechange", onReadyStateChange, false); } else { oldOnReadyStateChange = this.onreadystatechange; this.onreadystatechange = onReadyStateChange; } } send.call(this, data); } })(XMLHttpRequest); UA.Booking.FlightSearch.init(); """) WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, "interceptedResponse"))) html_page = driver.page_source # print html_page # return soup = BeautifulSoup(html_page,"xml") maindata = soup.findAll("div",{"id":"interceptedResponse"}) # print maindata # return json_string = maindata[0].text jsonOb = json.loads(json_string) ''' Flex calender data ''' try: flex_value = [] calenderData = jsonOb["data"]["Calendar"]["Months"] for i in range(0,len(calenderData)): flexcalender = calenderData[i]['Weeks'] for j in range(0,len(flexcalender)): calweeks = flexcalender[j] for row in calweeks: Month = calweeks["Month"] caldays = calweeks["Days"] for d in range(0,len(caldays)): Month = caldays[d]["Month"] ecosaver,busssaver,firstsaver = '','','' if Month > 0: cabinOption = caldays[d]["ProductClass"] if cabinOption == '': "--------------- Standard Award may be available -----------------" elif 'cabin-option-one' in cabinOption and 'cabin-option-two' not in cabinOption: ecosaver = "saver" "------------- Economy ------------------------------" elif 'cabin-option-two' in cabinOption and 'cabin-option-one' not in cabinOption: busssaver = "saver" "------------- premium cabin-------------------------" else: "--------------- Economy & premium cabin--------------------------" ecosaver = "saver" busssaver = "saver" travelDate = dt.strftime('%Y-%m-%d') Year = caldays[d]["Year"] DateValue = caldays[d]["DateValue"] fulldate = str(Year)+"/"+str(Month)+"/"+str(DateValue) flexdate = datetime.datetime.strptime(fulldate, '%Y/%m/%d') flexdate1 = flexdate.strftime('%Y-%m-%d') ''' print "PromoProductClass ", caldays[d]["PromoProductClass"] print "Cheapest ", caldays[d]["Cheapest"] #print "ProductClass ", caldays[d]["ProductClass"] print "DayNotInThisMonth ", caldays[d]["DayNotInThisMonth"] print "Display ", caldays[d]["Display"] ''' flex_value.append((str(stime),str(searchkey),origin,destination,str(travelDate),str(flexdate1),ecosaver,busssaver,"united")) if not DEV_LOCAL: cursor.executemany ("INSERT INTO pexproject_flexibledatesearch (scrapertime,searchkey,source,destination,journey,flexdate,economyflex,businessflex,datasource) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s);", flex_value) db.commit() except: 'no calender data' ''' End flex calender ''' flightDetails = jsonOb["data"]["Trips"][0]["Flights"] except: print "No data Found" display.stop() driver.quit() if not DEV_LOCAL: cursor.execute ("INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", ("flag", str(searchkey), stime, "flag", "test", "flag", "flag", "flag", "0","0", "0","0", "0", "0", "flag", "flag", "flag", "united", "flag", "flag", "flag", "flag", "flag", "flag", "flag")) db.commit() return searchkey comma = '' values_string = [] totalrecords = len(flightDetails) recordcount = 1 for i in range(0,len(flightDetails)): source = flightDetails[i]["Origin"] depttime = flightDetails[i]["DepartTimeFormat"] test = (datetime.datetime.strptime(depttime, '%I:%M %p')) test1 = test.strftime('%H:%M') lastdestination = flightDetails[i]["LastDestination"]['Code'] lastdesttime = flightDetails[i]["LastDestination"]['TimeFormatted'] if '.' in lastdesttime: lastdesttime = lastdesttime.replace('.','') arivetime1 = (datetime.datetime.strptime(lastdesttime, '%I:%M %p')) arivetime = arivetime1.strftime('%H:%M') stoppage = flightDetails[i]["StopsandConnections"] if stoppage == 0: stoppage = "NONSTOP" elif stoppage == 1: stoppage = "1 STOP" else: stoppage = str(stoppage)+" STOPS" Flightno =flightDetails[i]["FlightNumber"] flightcode = flightDetails[i]["OperatingCarrier"] Flightno = "Flight "+flightcode+" "+str(Flightno) TravelMinutes = flightDetails[i]["TravelMinutes"] MaxLayoverTime = flightDetails[i]["MaxLayoverTime"] TravelMinutes = TravelMinutes firstFlighthour = int(TravelMinutes)/60 firstFlightminute = int(TravelMinutes) % 60 firstFlightDuration = str(firstFlighthour)+"h "+str(firstFlightminute)+"m" MaxLayoverTime = MaxLayoverTime firstFlightTotalTime = TravelMinutes TravelMinutesTotal = flightDetails[i]["TravelMinutesTotal"] travelhour = int(TravelMinutesTotal)/60 travelminute = int(TravelMinutesTotal) % 60 totaltime = str(travelhour)+"h "+str(travelminute)+"m" connection = jsonOb["data"]["Trips"][0]["Flights"][i]["Connections"] lastFlightTravelDuration = '' if connection: lastFlightTravelTime = connection[0]["TravelMinutes"] lastFlightTravelhour = lastFlightTravelTime/60 lastFlightTravelminute = lastFlightTravelTime % 60 lastFlightTravelDuration = str(lastFlightTravelhour)+"h "+str(lastFlightTravelminute)+"m" DepartDateFormat = flightDetails[i]["DepartDateFormat"] #print "**************Destination*****************/n" #print "DestinationDescription", flightDetails[i]["DestinationDescription"] DestinationDateTime = flightDetails[i]["DestinationDateTime"] lastdestdatetime = flightDetails[i]["LastDestinationDateTime"] #print "******** Extra Info *******************\n" FlightSegmentJson = flightDetails[i]["FlightSegmentJson"] segmentJsonObj = json.loads(FlightSegmentJson) #print "segmentJsonObj",segmentJsonObj departdetails = [] arivaildetails = [] flightdeatails = [] operator = [] for k in range(0,len(segmentJsonObj)): #print "Origin", segmentJsonObj[k]["Origin"] FlightNumber = segmentJsonObj[k]["FlightNumber"] FlightDate = segmentJsonObj[k]["FlightDate"] OriginDescription = segmentJsonObj[k]["OriginDescription"] OperatingCarrierCode = segmentJsonObj[k]["OperatingCarrierCode"] departinfo_time = datetime.datetime.strptime(FlightDate, '%m/%d/%Y %H:%M') FlightDate = departinfo_time.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail(get_airport_code(OriginDescription)) or OriginDescription deptdetail = FlightDate+" | from "+airport_ departdetails.append(deptdetail) stopstation = segmentJsonObj[k]["Stops"] if stopstation != None: stopnJsonobj = stopstation if len(stopnJsonobj) > 0: for l in range(0,len(stopnJsonobj)): stopOrigin = stopnJsonobj[l]["OriginDescription"] stopFlightDate = stopnJsonobj[l]["FlightDate"] stopFlightDate = datetime.datetime.strptime(stopFlightDate, '%m/%d/%Y %H:%M') stopFlightDate = stopFlightDate.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail(get_airport_code(stopOrigin)) or stopOrigin stopOriginDetails = stopFlightDate+" | from "+airport_ departdetails.append(stopOriginDetails) # # what is it, Pradeep? # stopOriginDetails = stopFlightDate+" | at "+airport_ # arivaildetails.append(stopOriginDetails) stopDestination = stopnJsonobj[l]["DestinationDescription"] if stopnJsonobj[l]["Destination"].strip() == lastdestination.strip(): lastdestdatetime = datetime.datetime.strptime(lastdestdatetime, '%m/%d/%Y %H:%M') lastdestdatetime = lastdestdatetime.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail(get_airport_code(stopDestination)) or stopDestination destdetail = lastdestdatetime+" | at "+airport_ arivaildetails.append(destdetail) else: DestinationDateTime = datetime.datetime.strptime(DestinationDateTime, '%m/%d/%Y %H:%M') DestinationDateTime = DestinationDateTime.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail(get_airport_code(stopDestination)) or stopDestination fullAriveinfo = DestinationDateTime+" | at "+airport_ arivaildetails.append(fullAriveinfo) stopOperator = stopnJsonobj[l]["OperatingCarrierDescription"] if stopOperator != None: operator.append(stopOperator) else: stopOperator = segmentJsonObj[k]["MarketingCarrierDescription"] if stopOperator != None: operator.append(stopOperator) stopFlightNumber = stopnJsonobj[l]["FlightNumber"] stopOperatingCarrierCode = stopnJsonobj[l]["OperatingCarrierCode"] stopflightDetail = stopOperatingCarrierCode+" "+stopFlightNumber stopEquipmentDescription = stopnJsonobj[l]["EquipmentDescription"] # --- NORM --- if stopEquipmentDescription.startswith('Canadair Regional Jet'): stopEquipmentDescription = stopEquipmentDescription.replace('Canadair Regional Jet', 'Bombardier CRJ') elif stopEquipmentDescription[:3] in ['CS1', '32B', '359']: stopEquipmentDescription = customfunction.AIRCRAFTS[stopEquipmentDescription[:3]] + stopEquipmentDescription[3:] stopflightDetail = stopflightDetail+" | "+stopEquipmentDescription flightdeatails.append(stopflightDetail) else: DestinationDescription = segmentJsonObj[k]["DestinationDescription"] if segmentJsonObj[k]["Destination"].strip() == lastdestination.strip(): lastdestdatetime = datetime.datetime.strptime(lastdestdatetime, '%m/%d/%Y %H:%M') lastdestdatetime = lastdestdatetime.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail(get_airport_code(DestinationDescription)) or DestinationDescription destdetail = lastdestdatetime+" | at "+airport_ else: DestinationDateTime = datetime.datetime.strptime(DestinationDateTime, '%m/%d/%Y %H:%M') DestinationDateTime = DestinationDateTime.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail(get_airport_code(DestinationDescription)) or DestinationDescription destdetail = DestinationDateTime+" | at "+airport_ arivaildetails.append(destdetail) operatedby = segmentJsonObj[k]["OperatingCarrierDescription"] if operatedby != None: operator.append(operatedby) else: operatedby = segmentJsonObj[k]["MarketingCarrierDescription"] if operatedby != None: operator.append(operatedby) EquipmentDescription = segmentJsonObj[k]["EquipmentDescription"] # --- NORM --- if EquipmentDescription.startswith('Canadair Regional Jet'): EquipmentDescription = EquipmentDescription.replace('Canadair Regional Jet', 'Bombardier CRJ') elif EquipmentDescription in ['CS1', '32B', '359']: EquipmentDescription = customfunction.AIRCRAFTS[EquipmentDescription] if source.strip() == segmentJsonObj[k]["Origin"].strip(): filghtFormat = OperatingCarrierCode+" "+FlightNumber+" | "+EquipmentDescription+" ("+firstFlightDuration+")" else: filghtFormat = OperatingCarrierCode+" "+FlightNumber+" | "+EquipmentDescription+" ("+lastFlightTravelDuration+")" flightdeatails.append(filghtFormat) economy = 0 ecoTax = 0 business = 0 businessTax = 0 first = 0 firstTax = 0 ecoFareClassCode = [] busFareClassCode = [] firtFareClassCode = [] ecoFareCode = '' businessFareCode ='' firstFareCode = '' eco_code = [] bus_code = [] first_code = [] eco_fare_code = '' bus_fare_code = '' first_fare_code = '' for j in range(0, len(flightDetails[i]["Products"])): productstype = flightDetails[i]["Products"][j]["DataSourceLabelStyle"] pricesMiles = flightDetails[i]["Products"][j]["Prices"] tax = 0 TaxAndFees = flightDetails[i]["Products"][j]["TaxAndFees"] if TaxAndFees: tax = TaxAndFees["Amount"] miles = 0 if pricesMiles: miles = flightDetails[i]["Products"][j]["Prices"][0]["Amount"] Description = flightDetails[i]["Products"][j]["Description"] BookingCode = flightDetails[i]["Products"][j]["BookingCode"] ProductTypeDescription = flightDetails[i]["Products"][j]["ProductTypeDescription"] if ProductTypeDescription: BookingCode = BookingCode+" "+ProductTypeDescription if 'Economy' in productstype and economy == 0 : economy = miles ecoTax = tax ecoFareCode = BookingCode ecoFareClassCode.append(BookingCode) if flightDetails[i]["Products"][j]["BookingCode"]: eco_code.append(flightDetails[i]["Products"][j]["BookingCode"]) elif 'Business' in productstype and business == 0 and miles: business = miles businessTax = tax businessFareCode = BookingCode busFareClassCode.append(BookingCode) if flightDetails[i]["Products"][j]["BookingCode"]: bus_code.append(flightDetails[i]["Products"][j]["BookingCode"]) elif 'First' in productstype and first == 0 and miles: first = miles firstTax = tax firstFareCode = BookingCode firtFareClassCode.append(BookingCode) if flightDetails[i]["Products"][j]["BookingCode"]: first_code.append(flightDetails[i]["Products"][j]["BookingCode"]) if connection: connectingFarecode = connection[0]["Products"] for m in range(0,len(connectingFarecode)): connectingDescription = connectingFarecode[m]["Description"] connectingProductstype = connectingFarecode[m]["DataSourceLabelStyle"] connectingBookingCode = connectingFarecode[m]["BookingCode"] productdesc = connectingFarecode[m]["ProductTypeDescription"] if productdesc: connectingBookingCode = connectingBookingCode+" "+productdesc if connectingProductstype and 'Economy' in connectingProductstype: ecoFareClassCode.append(connectingBookingCode) if connectingFarecode[m]["BookingCode"]: eco_code.append(connectingFarecode[m]["BookingCode"]) elif connectingProductstype and 'Business' in connectingProductstype: busFareClassCode.append(connectingBookingCode) if connectingFarecode[m]["BookingCode"]: bus_code.append(connectingFarecode[m]["BookingCode"]) elif connectingProductstype and 'First' in connectingProductstype: firtFareClassCode.append(connectingBookingCode) if connectingFarecode[m]["BookingCode"]: first_code.append(connectingFarecode[m]["BookingCode"]) if len(ecoFareClassCode) > 0: ecoFareCode = '@'.join(ecoFareClassCode) if len(busFareClassCode) > 0: businessFareCode = '@'.join(busFareClassCode) if len(firtFareClassCode) > 0: firstFareCode = '@'.join(firtFareClassCode) if len(eco_code) > 0: eco_fare_code = ','.join(eco_code) if len(bus_code) > 0: bus_fare_code = ','.join(bus_code) if len(first_code) > 0: first_fare_code = ','.join(first_code) departdetailsText = '@'.join(departdetails) arivedetailsText = '@'.join(arivaildetails) planedetails = '@'.join(flightdeatails) operatedbytext = '' if len(operator) > 0: operatedbytext = '@'.join(operator) recordcount = recordcount+1 values_string.append((Flightno, str(searchkey), stime, stoppage, "test", source, lastdestination, test1, arivetime, totaltime, str(economy), str(ecoTax), str(business), str(businessTax), str(first), str(firstTax),"Economy", "Business", "First", "united", departdetailsText, arivedetailsText, planedetails, operatedbytext,ecoFareCode,businessFareCode,firstFareCode,eco_fare_code,bus_fare_code,first_fare_code)) if recordcount > 50 or i == (totalrecords)-1 and len(values_string)>0: if not DEV_LOCAL: cursor.executemany ("INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code,eco_fare_code,business_fare_code,first_fare_code) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", values_string) db.commit() values_string =[] recordcount = 1 if not DEV_LOCAL: cursor.execute ("INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", ("flag", str(searchkey), stime, "flag", "test", "flag", "flag", "flag", "0","0", "0","0", "0", "0", "flag", "flag", "flag", "united", "flag", "flag", "flag", "flag", "flag", "flag", "flag")) db.commit() display.stop() driver.quit() return searchkey
def airchina(ocity_code, dcity_code, searchdate, searchkey): driver = webdriver.PhantomJS(service_args=[ '--ignore-ssl-errors=true', '--ssl-protocol=any', '--load-images=false' ], service_log_path='/tmp/ghostdriver.log') driver.set_window_size(1120, 1080) url = SEARCH_URL % (ocity_code, dcity_code, searchdate) sys.stdout = codecs.getwriter('utf-8')(sys.stdout) log_path = 'airchina_log' if DEV_LOCAL else '/home/upwork/airchina_log' log_file = open(log_path, 'a') if DEBUG else sys.stdout log_file.write('\n\n' + '=' * 70 + '\n\n') log_file.write(url + '\n\n') db = customfunction.dbconnection() if not DEV_LOCAL else None flightinfo = [] currentdatetime = datetime.datetime.now() stime = currentdatetime.strftime('%Y-%m-%d %H:%M:%S') try: driver.get(SET_LANG_URL) driver.get(url) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, "idTbl_OutboundAirList"))) cookies = { 'JSESSIONID': get_cookie(driver, 'JSESSIONID', '/AMRWeb'), 'BIGipServerAMR_web': get_cookie(driver, 'BIGipServerAMR_web', '/'), } html_page = driver.page_source # log_file.write(html_page.encode('utf8')) soup = BeautifulSoup(html_page, "lxml") maindata = soup.select('#idTbl_OutboundAirList tbody tr') index = -1 for flight in maindata: index = index + 1 tds = flight.select('td') flightno = tds[0].string departure = tds[1].strong.string arrival = tds[2].strong.string origin_dest = tds[3].string.split('-') origin = origin_dest[0] destination = origin_dest[1] url = DETAIL_URL % (flightno, searchdate, origin + destination) driver.get(url) html_page = driver.page_source # log_file.write(html_page.encode('utf8')) detail_soup = BeautifulSoup(html_page, "lxml") tables = detail_soup.select('table') trs = tables[0].select('tr') operatedby = get_clean_string(trs[0].select('td')[1].div.string) duration = get_clean_string(trs[1].select('td')[1].div.string) trs = tables[1].select('tr') departinfo_airport = get_neat_string( trs[0].select('td')[0].div.select('span')[0].string) departinfo_time = get_neat_string( trs[0].select('td')[0].div.select('span')[1].text) departinfo_time = datetime.datetime.strptime( departinfo_time, ' %a, %d %b %Y %H:%M ') departinfo_time = departinfo_time.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail( get_airport_code(departinfo_airport)) or departinfo_airport departinfo = departinfo_time + ' | from ' + airport_ arrivalinfo_airport = get_neat_string( trs[0].select('td')[1].div.select('span')[0].string) arrivalinfo_time = get_neat_string( trs[0].select('td')[1].div.select('span')[1].text) arrivalinfo_time = datetime.datetime.strptime( arrivalinfo_time, ' %a, %d %b %Y %H:%M ') arrivalinfo_time = arrivalinfo_time.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail( get_airport_code(arrivalinfo_airport)) or arrivalinfo_airport arrivalinfo = arrivalinfo_time + ' at ' + airport_ planeinfo = get_clean_string(trs[1].td.div.string) planeinfo = '%s | %s (%s)' % ( flightno, customfunction.AIRCRAFTS[planeinfo], duration) firstmile = tds[5].font firstmile = firstmile.label.string if firstmile else 0 firsttax = get_tax(log_file, cookies, index, 'O') if firstmile else 0 businessmile = tds[6].font businessmile = businessmile.label.string if businessmile else 0 businesstax = get_tax(log_file, cookies, index, 'I') if businessmile else 0 economymile = tds[7].font economymile = economymile.label.string if economymile else 0 economytax = get_tax(log_file, cookies, index, 'X') if economymile else 0 stoppage = "NONSTOP" flightinfo.append( ('Flight ' + flightno, str(searchkey), stime, stoppage, "test", origin, destination, departure + ':00', arrival + ':00', duration, economymile, economytax, businessmile, businesstax, firstmile, firsttax, "Economy", "Business", "First", "airchina", departinfo, arrivalinfo, planeinfo, operatedby, 'X Economy', 'I Business', 'O First', 'X', 'I', 'O')) log_file.write(str(flightinfo) + '\n') except Exception, e: log_file.write('Error/No data Message: ' + str(e) + '\n')
def get_flight_info(driver, maindata, flightdate, searchkey, stime): flightinfo = [] for flight in maindata: tds = flight.select('td') flightno = tds[0].string departure = tds[1].strong.string arrival = tds[2].strong.string origin_dest = tds[3].string.split('-') origin = origin_dest[0] destination = origin_dest[1] url = DETAIL_URL % (flightno, flightdate, origin + destination) driver.get(url) html_page = driver.page_source # log_file.write(html_page.encode('utf8')) detail_soup = BeautifulSoup(html_page, "lxml") tables = detail_soup.select('table') trs = tables[0].select('tr') operatedby = get_clean_string(trs[0].select('td')[1].div.string) duration = get_clean_string(trs[1].select('td')[1].div.string) trs = tables[1].select('tr') departinfo_airport = get_neat_string( trs[0].select('td')[0].div.select('span')[0].string) departinfo_time = get_neat_string( trs[0].select('td')[0].div.select('span')[1].text) departinfo_time = datetime.datetime.strptime(departinfo_time, ' %a, %d %b %Y %H:%M ') departinfo_time = departinfo_time.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail( get_airport_code(departinfo_airport)) or departinfo_airport departinfo = departinfo_time + ' | from ' + airport_ arrivalinfo_airport = get_neat_string( trs[0].select('td')[1].div.select('span')[0].string) arrivalinfo_time = get_neat_string( trs[0].select('td')[1].div.select('span')[1].text) arrivalinfo_time = datetime.datetime.strptime(arrivalinfo_time, ' %a, %d %b %Y %H:%M ') arrivalinfo_time = arrivalinfo_time.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail( get_airport_code(arrivalinfo_airport)) or arrivalinfo_airport arrivalinfo = arrivalinfo_time + ' at ' + airport_ planeinfo = get_clean_string(trs[1].td.div.string) planeinfo = '%s | %s (%s)' % ( flightno, customfunction.AIRCRAFTS[planeinfo], duration) firstmile = tds[5].input firstmile = 1 if firstmile else 0 firsttax = 0 businessmile = tds[6].input businessmile = 1 if businessmile else 0 businesstax = 0 economymile = tds[7].input economymile = 1 if economymile else 0 economytax = 0 stoppage = "NONSTOP" flightinfo.append([ 'Flight ' + flightno, searchkey, stime, stoppage, "test", origin, destination, departure + ':00', arrival + ':00', duration, economymile, economytax, businessmile, businesstax, firstmile, firsttax, "Economy", "Business", "First", "airchina", departinfo, arrivalinfo, planeinfo, operatedby, 'X Economy', 'I Business', 'O First', 'X', 'I', 'O' ]) return flightinfo
def virginAmerica(from_airport,to_airport,searchdate,searchid, passenger=1): if not DEV_LOCAL: db = customfunction.dbconnection() cursor = db.cursor() from_airport = from_airport.strip().upper() to_airport = to_airport.strip().upper() dt = datetime.datetime.strptime(searchdate, '%m/%d/%Y') searchdate = dt.strftime('%Y-%m-%d') currentdatetime = datetime.datetime.now() stime = currentdatetime.strftime('%Y-%m-%d %H:%M:%S') url = 'https://www.virginamerica.com/api/v2/booking/search' header = {"Content-Type": "application/json"} body = { "oneWay":{ "origin":from_airport, "dest":to_airport, "departureDate":searchdate, "numOfAdults":passenger, "bookingType":"POINTS" } } try: res = requests.post(url=url, headers=header, data=json.dumps(body)) res_json = res.json() flightList = res_json["response"]["departingFlightsInfo"]["flightList"] value_string = [] for key, val in flightList.items(): for flightInfo in val: flightType = flightInfo['flightType'] fareList = flightInfo['fareList'] economy = 0 business = 0 first = 0 ecotax = 0 busstax = 0 firsttax = 0 ecoFareClass = '' bussFareClass = '' firstFareClass = '' eco_fare_code = '' bus_fare_code = '' first_fare_code = '' for key_, val_ in fareList.items(): if 'fareBasisCode' in val_: Taxes = float(val_['pointsFare']['totalTax']) Miles = int(val_['pointsFare']['totalPoints']) fareCode = [item['classOfService'] for item in val_['classOfServiceList']] if 'MCS' in key_ and (0 == business or business > Miles): business = Miles busstax = Taxes bus_fare_code = ','.join(fareCode) bussFareClass = ' Business@'.join(fareCode)+' Business' elif 'MC' in key_ and (0 == economy or economy > Miles): economy = Miles ecotax = Taxes eco_fare_code = ','.join(fareCode) ecoFareClass = ' Economy@'.join(fareCode)+' Economy' elif 'FIRST' in key_ and (0 == first or first > Miles): first = Miles firsttax = Taxes first_fare_code = ','.join(fareCode) firstFareClass = ' First@'.join(fareCode)+' First' #print "seatsRemaining",val_['seatsRemaining'] flightDetails ='' "++++++++++++++++flightDetails ++++++++++++++++++++++++++++" source = '' dest = '' departureTime='' arivalTime='' flightNo = '' duration = '' ariveArray = [] departArray = [] flightArray = [] if 'NON_STOP' in flightType: flightDetails = flightInfo['flightSegment'] "########### Source ####################" source = flightDetails["departure"] departureDateTime = flightDetails["departureDateTime"] dept = departureDateTime.split("T") #print "deptDate",dept[0] departTime = dept[1].split("-") departTimeFormat = (datetime.datetime.strptime(departTime[0], '%H:%M:%S')) departureTime = departTimeFormat.strftime('%H:%M') airport_ = customfunction.get_airport_detail(source) or source departDisplay = dept[0]+" "+departureTime+" | from "+airport_ departArray.append(departDisplay) "############ Destination ######################" dest = flightDetails["arrival"] arrivalDateTime = flightDetails["arrivalDateTime"] arrival = arrivalDateTime.split("T") ariveTime = arrival[1].split("-") arivalTime = ariveTime[0] ariveTimeFormat = (datetime.datetime.strptime(ariveTime[0], '%H:%M:%S')) arivalTime = ariveTimeFormat.strftime('%H:%M') airport_ = customfunction.get_airport_detail(dest) or dest ariveDisplay = arrival[0]+" "+arivalTime+" | at "+airport_ ariveArray.append(ariveDisplay) elapsedTime = flightDetails["elapsedTime"] duration = str((int(elapsedTime)/60))+"h "+str((int(elapsedTime)%60))+"m" "########### Flight Details #############################" aircraftType = flightDetails["aircraftType"] flightNo = "VX "+str(flightDetails["flightNum"]) flightDisplay = flightNo+" | Airbus "+aircraftType+" ("+duration+")" flightArray.append(flightDisplay) classOfService = flightDetails["classOfService"] segNum = flightDetails["segNum"] else: flightDetails = flightInfo['flightList'] oldAriveTime = '' tripDuration = 0 for k in range(0,len(flightDetails)): flightType = flightDetails[k]['flightType'] "########### Source ####################" departure = flightDetails[k]['flightSegment']["departure"] departureDateTime = flightDetails[k]['flightSegment']["departureDateTime"] dept = departureDateTime.split("T") departTime = dept[1].split("-") departTimeFormat = (datetime.datetime.strptime(departTime[0], '%H:%M:%S')) departTimeFormat = departTimeFormat.strftime('%H:%M') airport_ = customfunction.get_airport_detail(departure) or departure departDisplay = dept[0]+" "+departTimeFormat+" | from "+airport_ departArray.append(departDisplay) "############ Destination ######################" ariveAt = flightDetails[k]['flightSegment']["arrival"] arrivalDateTime = flightDetails[k]['flightSegment']["arrivalDateTime"] arrival = arrivalDateTime.split("T") ariveTime = arrival[1].split("-") ariveTimeFormat = (datetime.datetime.strptime(ariveTime[0], '%H:%M:%S')) ariveTimeFormat = ariveTimeFormat.strftime('%H:%M') if k == len(flightDetails)-1: dest = ariveAt arivalTime = ariveTimeFormat timedelta = 0 if oldAriveTime: waitingTime = datetime.datetime.strptime(departTimeFormat,'%H:%M') - datetime.datetime.strptime(oldAriveTime,'%H:%M') timedelta = (waitingTime.total_seconds())/60 airport_ = customfunction.get_airport_detail(ariveAt) or ariveAt ariveDisplay = str(arrival[0])+" "+str(ariveTimeFormat)+" | at "+airport_ ariveArray.append(ariveDisplay) "########### Flight Details #############################" flightNum = flightDetails[k]['flightSegment']["flightNum"] if k == 0: source = departure flightNo = "VX "+str(flightNum) departureTime = departTimeFormat classOfService = flightDetails[k]['flightSegment']["classOfService"] elapsedTime = flightDetails[k]['flightSegment']["elapsedTime"] aircraftType = flightDetails[k]['flightSegment']["aircraftType"] flightairTime = str((int(elapsedTime)/60))+"h "+str((int(elapsedTime)%60))+"m" flightDisplay = "VX "+str(flightNum)+" | Airbus "+aircraftType+" ("+flightairTime+")" flightArray.append(flightDisplay) tripDuration = tripDuration+timedelta+elapsedTime segNum = flightDetails[k]['flightSegment']["segNum"] oldAriveTime = ariveTimeFormat duration = str((int(tripDuration)/60))+"h "+str((int(tripDuration)%60))+"m" stoppage = '' stop = len(departArray) - 1 if stop == 0: stoppage = "NONSTOP" elif stop == 1: stoppage = "1 STOP" else: stoppage = str(stop)+" STOPS" departdetailtext= '@'.join(departArray) arivedetailtext = '@'.join(ariveArray) planedetailtext = '@'.join(flightArray) operatortext = '' value_string.append((str(flightNo), str(searchid), stime, stoppage, "test", source, dest, departureTime, arivalTime, duration, str(economy), str(ecotax), str(business),str(busstax), str(first), str(firsttax), "Economy", "Business", "First", "Virgin America", departdetailtext, arivedetailtext, planedetailtext, operatortext,ecoFareClass,bussFareClass,firstFareClass,eco_fare_code,bus_fare_code,first_fare_code)) if len(value_string) == 50: cursor.executemany ("INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code,eco_fare_code,business_fare_code,first_fare_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", value_string) db.commit() print value_string value_string =[] if len(value_string) > 0: if not DEV_LOCAL: cursor.executemany ("INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code,eco_fare_code,business_fare_code,first_fare_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", value_string) db.commit() print len(value_string),"row inserted" except: print "Something wrong" finally: if not DEV_LOCAL: cursor.execute ("INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", ("flag", str(searchid), stime, "flag", "test", "flag", "flag", "flag", "0","0", "0","0", "0", "0", "flag", "flag", "flag", "Virgin America", "flag", "flag", "flag", "flag", "flag", "flag", "flag")) db.commit()
def aeroflot(ocity_code, dcity_code, searchdate, searchkey): driver = webdriver.PhantomJS(service_args=[ '--ignore-ssl-errors=true', '--ssl-protocol=any', '--load-images=false' ], service_log_path='/tmp/ghostdriver.log') driver.set_window_size(1120, 1080) url = URL % (ocity_code, dcity_code, searchdate) driver.get(url) execution = driver.current_url[-4:] DATA['execution'] = execution sys.stdout = codecs.getwriter('utf-8')(sys.stdout) log_path = 'aeroflot_log' if DEV_LOCAL else '/home/upwork/aeroflot_log' log_file = open(log_path, 'a') if DEBUG else sys.stdout log_file.write('\n\n' + '=' * 70 + '\n\n') log_file.write(url + '\n\n') db = customfunction.dbconnection() if not DEV_LOCAL else None currentdatetime = datetime.datetime.now() stime = currentdatetime.strftime('%Y-%m-%d %H:%M:%S') flightinfo = [] try: html_page = driver.page_source soup = BeautifulSoup(html_page, "lxml") radio_id = soup.find('input', id=re.compile('^flight_both_'))['id'] radio = driver.find_element_by_id(radio_id) time.sleep(1) radio.click() cookies = { 'JSESSIONID': get_cookie(driver, 'JSESSIONID', '/SSW2010'), 'WLPCOOKIE': get_cookie(driver, 'WLPCOOKIE', '/SSW2010'), } maindata = soup.select("#dtcontainer-both tbody tr") operatorArray = [] taxArray = [] # log_file.write(str(len(maindata))) for flight in maindata: ths, tds = flight.select('th'), flight.select('td') operatedby = ths[2].select('.carrier-name')[0].string operatorArray.append(operatedby) maintax = 0 firsttax = 0 businesstax = 0 for flight_type_code, tax_field_name in TAXES.items(): flight_id = find_flight_id(flight, flight_type_code) # log_file.write(str(flight_id)) # log_file.write('\n') if flight_id: DATA['contextObject'] = [CONTEXTOBJECT % flight_id] resp = requests.post( FLIGHT_URL, cookies=cookies, data=DATA, ) resp_html = resp.json( )['content'][0]['partials']['initialized'] h = HTMLParser.HTMLParser() resp_html = h.unescape(resp_html) taxsoup = BeautifulSoup(resp_html, 'lxml') # log_file.write(resp_html.encode('utf8')) # log_file.write('\n\n') tax_amount = taxsoup.find('div', class_='total-top').find_all( 'span', class_='prices-amount') if len(tax_amount) > 0: tax_amount = tax_amount[1].text else: tax_amount = 0 if 'main' in tax_field_name: maintax = float(tax_amount) elif 'business' in tax_field_name: businesstax = float(tax_amount) elif 'first' in tax_field_name: firsttax = float(tax_amount) taxArray.append({ "maintax": maintax, "businesstax": businesstax, "firsttax": firsttax }) # log_file.write(str(taxArray)) # log_file.write('\n') json_text = re.search(r'^\s*var templateData = \s*({.*?})\s*;\s*$', html_page, flags=re.DOTALL | re.MULTILINE).group(1) jsonData = json.loads(json_text) tempdata = jsonData["rootElement"]["children"][1]["children"][0][ "children"][5]["model"]["allItineraryParts"] count = 0 for k in range(0, len(tempdata)): segments = tempdata[k]["segments"] rowRecord = tempdata[k]["itineraryPartData"] fltno = '' origin = rowRecord["departureCode"] dest = rowRecord["arrivalCode"] departureDate = rowRecord["departureDate"] deptDateTime = departureDate.split(" ") originDetails = [] destDetails = [] flightsDetails = [] operatorCarrier = [] for counter in range(0, len(segments)): bookingCode = segments[counter]['bookingClass'] bookingClassCabin = segments[counter]['allClassOfService'] segOrigin = segments[counter]["departureCode"] segDepartDate = segments[counter]["departureDate"] airport_ = customfunction.get_airport_detail( segOrigin) or segOrigin segDetailFormat = segDepartDate[:-3] + " | from " + airport_ originDetails.append(segDetailFormat) segDest = segments[counter]["arrivalCode"] segArive = segments[counter]["arrivalDate"] airport_ = customfunction.get_airport_detail( segDest) or segDest destdetailFormat = segArive[:-3] + " | at " + airport_ destDetails.append(destdetailFormat) if len(operatorArray) > count: operatorCarrier.append(operatorArray[count]) count = count + 1 deptDate = deptDateTime[0] depttime = deptDateTime[1] depttime1 = (datetime.datetime.strptime(depttime, '%H:%M:%S')) departtime = depttime1.strftime('%H:%M') arrivalDate = rowRecord["arrivalDate"] arrivalDateTime = arrivalDate.split(" ") arivaldt = arrivalDateTime[0] arivalTime = arrivalDateTime[1] arivalTime1 = (datetime.datetime.strptime(arivalTime, '%H:%M:%S')) arive = arivalTime1.strftime('%H:%M') totalTripDuration = rowRecord["totalTripDuration"] totalMinte = (int(totalTripDuration) / 60000) hr = totalMinte / 60 minute = totalMinte % 60 tripDuration = str(hr) + "h " + str(minute) + "m" departureCodes = rowRecord["departureCodes"] operatingCarrier = rowRecord["operatingCarrier"] flightDurations = rowRecord["flightDurations"] flightNumber = rowRecord["flightNumber"] airlineCodes = rowRecord["airlineCodes"] aircraftType = rowRecord["aircraftType"] for f in range(0, len(flightNumber)): flightNo = airlineCodes[f] + " " + str(flightNumber[f]) if f == 0: fltno = flightNo fltTime = flightDurations[f] fltMinuteTime = int(fltTime) / 60000 fltMinuteTimeHour = fltMinuteTime / 60 fltMinuteTime = fltMinuteTime % 60 fltTimeFormat = str(fltMinuteTimeHour) + "h " + str( fltMinuteTime) + "m" fltFormat = flightNo + " | " + customfunction.AIRCRAFTS[ aircraftType[f]] + " (" + fltTimeFormat + ")" flightsDetails.append(fltFormat) originDetailString = '@'.join(originDetails) arivedetailtext = '@'.join(destDetails) planedetailtext = '@'.join(flightsDetails) operatortext = '' if len(operatorCarrier) > 0: operatortext = '@'.join(operatorCarrier) noOfStop = len(departureCodes) - 1 stoppage = '' if noOfStop == 0: stoppage = "NONSTOP" elif noOfStop == 1: stoppage = "1 STOP" else: stoppage = str(noOfStop) + " STOPS" allPrices = tempdata[k]["basketsRef"] economy = 0 ecotax = 0 business = 0 businesstax = 0 first = 0 firsttax = 0 for key in allPrices: farePrices = allPrices[key]["prices"][ "moneyElements"] #["priceAlternatives"] miles = farePrices[0]["moneyTO"]["amount"] if 'AE' in key: # X economy economy = miles ecotax = taxArray[k]['maintax'] elif 'AB' in key: # F economy business = miles businesstax = taxArray[k]['businesstax'] elif 'AC' in key: # O Business first = miles firsttax = taxArray[k]['firsttax'] if economy or business: flightinfo.append( ('Flight ' + str(fltno), searchkey, stime, stoppage, "test", origin, dest, departtime, arive, tripDuration, str(economy), str(ecotax), str(business), str(businesstax), str(0), str(0), "Economy", "Business", "First", "aeroflot", originDetailString, arivedetailtext, planedetailtext, operatortext, 'X Economy', 'O Business', 'First', 'X', 'O', 'I')) if first: flightinfo.append( ('Flight ' + str(fltno), searchkey, stime, stoppage, "test", origin, dest, departtime, arive, tripDuration, str(first), str(firsttax), str(0), str(0), str(0), str(0), "Economy", "Business", "First", "aeroflot", originDetailString, arivedetailtext, planedetailtext, operatortext, 'F Comfort', 'O Business', 'First', 'F', 'O', 'I')) log_file.write(str(flightinfo)) except Exception, e: log_file.write('Error Message: ' + str(e) + '\n') log_file.write('Error or No data!\n\n')
def alaska(ocity_code, dcity_code, searchdate, searchkey): sss = datetime.datetime.now() driver = webdriver.PhantomJS(service_args=[ '--ignore-ssl-errors=true', '--ssl-protocol=any', '--load-images=false' ], service_log_path='/tmp/ghostdriver.log') driver.set_window_size(1120, 1080) # driver = webdriver.Firefox() url = "https://www.alaskaair.com" def storeFlag(searchkey, stime): cursor.execute( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", ("flag", str(searchkey), stime, "flag", "test", "flag", "flag", "flag", "0", "0", "0", "0", "0", "0", "flag", "flag", "flag", "alaska", "flag", "flag", "flag", "flag", "flag", "flag", "flag")) db.commit() sys.stdout = codecs.getwriter('utf-8')(sys.stdout) db = customfunction.dbconnection() if not DEV_LOCAL else None flightinfo = [] currentdatetime = datetime.datetime.now() stime = currentdatetime.strftime('%Y-%m-%d %H:%M:%S') try: driver.get(url) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, "oneWay"))) oneway = driver.find_element_by_id('oneWay') oneway.click() if oneway.get_attribute('value') == 'false': driver.execute_script("arguments[0].click();", oneway) # print '########', oneway.get_attribute('value') # driver.execute_script("document.getElementById('oneWay').setAttribute('checked', 'checked')") milebtn = driver.find_element_by_id("awardReservation") driver.execute_script( "document.getElementById('awardReservation').setAttribute('checked', 'checked')" ) origin = driver.find_element_by_id("fromCity1") origin.clear() origin.send_keys(ocity_code.strip()) destination = driver.find_element_by_id("toCity1") destination.clear() destination.send_keys(dcity_code.strip()) flight_date = driver.find_element_by_id("departureDate1") flight_date.clear() flight_date.send_keys(str(searchdate)) if oneway.get_attribute('value') == 'false': driver.execute_script("arguments[0].click();", oneway) # driver.execute_script("document.getElementById('departureDate1').setAttribute('value', '"+str(searchdate)+"')") driver.find_element_by_id("findFlights").send_keys(Keys.ENTER) except: print "before data page" if not DEV_LOCAL: storeFlag(searchkey, stime) driver.quit() return searchkey try: driver.save_screenshot('/root/out_enter.png') WebDriverWait(driver, 25).until( EC.presence_of_element_located((By.ID, "ContinueButton"))) html_page = driver.page_source if DEV_LOCAL: log_file = open('/root/1.html', 'w') # log_file.write(html_page.encode('utf8')) # print html_page # return soup = BeautifulSoup(html_page, "lxml") flights = soup.find(id='MatrixTable0').find_all(role='listitem') # print len(flights), '########3' for flight in flights: tds = flight.select('td.FlightCell') maindata = tds[0].select('.OptionDetails')[0] SegmentContainer = maindata.select('.SegmentContainer') flightno, origin, destination, departure = [], [], [], [] arrival, planeinfo, duration, operatedby = [], [], [], [] departure_t, arrival_t = [], [] for sc_ in SegmentContainer: detail_line = sc_.select('.DetailsLine') flightno_ = get_clean_string( detail_line[0].select('.DetailsFlightNumber')[0].text) for s_t_k, s_t_v in partners.items(): if s_t_k in flightno_: flightno_ = flightno_.replace(s_t_k, s_t_v) operatedby_ = 'Operated By ' + s_t_k break origin_ = get_clean_string( detail_line[0].select('.DetailsStation')[0].text) departure_ = get_clean_string(detail_line[0].select( '.DetailsTime span')[-1].text) + ' ' + searchdate[-2:] # print departure_, '@@@@@@@@' # formatting date departure_ = datetime.datetime.strptime( departure_, '%I:%M%p, %a, %b %d %y') departure_t_ = departure_.strftime('%H:%M:%S') departure_ = departure_.strftime('%Y/%m/%d %H:%M') destination_ = get_clean_string( detail_line[-1].select('.DetailsStation')[0].text) arrival_ = get_clean_string(detail_line[-1].select( '.DetailsTime span')[-1].text) + ' ' + searchdate[-2:] # print flightno_, arrival_, '@@@@@@@2' arrival_ = datetime.datetime.strptime(arrival_, '%I:%M%p, %a, %b %d %y') arrival_t_ = arrival_.strftime('%H:%M:%S') arrival_ = arrival_.strftime('%Y/%m/%d %H:%M') DetailsSmall = sc_.select('.DetailsSmall') planeinfo_ = get_clean_string( DetailsSmall[0].select('ul li')[0].text).replace( 'Aircraft: ', '') duration_ = get_clean_string( DetailsSmall[1].select('ul li')[0].text).replace( 'Duration: ', '').replace('hours', 'h ').replace('minutes', 'm') flightno.append(flightno_) if not DEV_LOCAL: origin_ = customfunction.get_airport_detail( customfunction.get_airport_code(origin_)) or origin_ departure.append(departure_ + ' | from ' + origin_) departure_t.append(departure_t_) if not DEV_LOCAL: destination_ = customfunction.get_airport_detail( customfunction.get_airport_code( destination_)) or destination_ arrival.append(arrival_ + ' | at ' + destination_) arrival_t.append(arrival_t_) if not DEV_LOCAL: planeinfo_ = customfunction.AIRCRAFTS.get( planeinfo_, planeinfo_) planeinfo.append("{} | {} ({})".format(flightno_, planeinfo_, duration_)) duration.append(duration_) operatedby.append(operatedby_) stop = int(flight['stops']) if stop == 0: stoppage = "NONSTOP" elif stop == 1: stoppage = "1 STOP" else: stoppage = str(stop) + " STOPS" total_duration = get_clean_string( maindata.select('.smallText.rightaligned.DetailsSmall') [0].text).replace('Total duration: ', '').replace('hours', 'h').replace('minutes', 'm') main_mile, main_tax, business_mile, business_tax, first_mile, first_tax = get_miles_taxes( flight.select('td.matrix-cell')) departinfo = '@'.join(departure) arrivalinfo = '@'.join(arrival) planeinfo = '@'.join(planeinfo) operatedby = '@'.join(operatedby) flightinfo.append( (flightno[0], str(searchkey), stime, stoppage, "test", flight['orig'], flight['dest'], departure_t[0], arrival_t[-1], total_duration, main_mile, main_tax, business_mile, business_tax, first_mile, first_tax, "Economy", "Business", "First", "alaska", departinfo, arrivalinfo, planeinfo, operatedby, '', '', '', '', '', '')) if DEV_LOCAL: print(flightno[0], str(searchkey), stime, stoppage, "test", flight['orig'], flight['dest'], departure_t[0], arrival_t[-1], total_duration, main_mile, main_tax, business_mile, business_tax, first_mile, first_tax, "Economy", "Business", "First", "alaska", departinfo, arrivalinfo, planeinfo, operatedby, '', '', '', '', '', '') except Exception, e: # raise print 'Something is wrong'
def parse_flight(flight, ibe_conversation, driver, log_file, db, searchkey, searchdate): currentdatetime = datetime.datetime.now() stime = currentdatetime.strftime('%Y-%m-%d %H:%M:%S') searchdate = datetime.datetime.strptime(searchdate, '%Y-%m-%d') departure = flight.find_all( "time", {"data-qa": "timeDepartureTotal_flightItemShort"})[0].text arrival = flight.find_all( "time", {"data-qa": "timeArrivedTotal_flightItemShort"})[0].text origin = flight.find_all( "span", {"data-qa": "airportDepartureTotal_flightItemShort"})[0].text destination = flight.find_all( "span", {"data-qa": "airportArrivedTotal_flightItemShort"})[0].text duration = flight.find_all("div", {"class": "duration"})[0].text e_node = (flight.find_all("div", {"data-qa": "tariff_economy optimum"}) or flight.find_all( "div", {"data-qa": "tariff_economy optimum_active"}))[0] eo_mile, eo_tax = get_miles_tax(e_node, ibe_conversation, driver) e_node = (flight.find_all("div", {"data-qa": "tariff_economy priority"}) or flight.find_all( "div", {"data-qa": "tariff_economy priority_active"}))[0] ep_mile, ep_tax = get_miles_tax(e_node, ibe_conversation, driver) e_node = (flight.find_all("div", {"data-qa": "tariff_business optimum"}) or flight.find_all( "div", {"data-qa": "tariff_business optimum_active"}))[0] bo_mile, bo_tax = get_miles_tax(e_node, ibe_conversation, driver) e_node = (flight.find_all("div", {"data-qa": "tariff_business priority"}) or flight.find_all( "div", {"data-qa": "tariff_business priority_active"}))[0] bp_mile, bp_tax = get_miles_tax(e_node, ibe_conversation, driver) stoppage = -1 departs = [] arrives = [] flights = [] flightnos = [] operatedby = [] flightinfo = [] for extend_info in flight.find_all("div", {"data-qa": "extended_info"}): stoppage += 1 e_info = extend_info.find_all("div", {"class": "info-flight"})[0] e_departure = e_info.find_all( "time", {"data-qa": "timeDeparture_flightItem"})[0].text departdate = searchdate departdatestr = departdate.strftime('%Y/%m/%d') e_arrival = e_info.find_all( "time", {"data-qa": "timeArrived_flightItem"})[0].text next_day = extend_info.find_all("div", {"class": "arrival-time"})[0] next_day = next_day.find_all("span", {"data-qa": "nextDayArrived"}) arrivaldate = searchdate + timedelta( days=1) if next_day else searchdate arrivaldatestr = arrivaldate.strftime('%Y/%m/%d') e_flightno = e_info.find_all("span", {"data-qa": "number_flightItem"})[0].text e_origin = e_info.find_all("li")[0].text e_destination = e_info.find_all("li")[1].text e_plane1 = e_info.find_all("li")[2].text e_plane2 = e_info.find_all("li")[3].text e_duration = e_info.find_all("li")[4].text.strip() e_origin_ = e_info.find_all( "span", {"data-qa": "airportDeparture_flightItem"})[0].text e_destination_ = e_info.find_all( "span", {"data-qa": "airportArrived_flightItem"})[0].text e_origin = re.sub(r"\s+", " ", e_origin, flags=re.UNICODE) e_destination = re.sub(r"\s+", " ", e_destination, flags=re.UNICODE) e_plane1 = re.sub(r"\s+", " ", e_plane1, flags=re.UNICODE) e_plane2 = re.sub(r"\s+", " ", e_plane2, flags=re.UNICODE) airport_ = customfunction.get_airport_detail(e_origin_) or e_origin_ depart = '{} {} | from {}'.format(departdatestr, e_departure, airport_) airport_ = customfunction.get_airport_detail( e_destination_) or e_destination_ arrive = '{} {} | at {}'.format(arrivaldatestr, e_arrival, airport_) # --- NORM --- for key_, val_ in specialties.items(): e_plane2 = e_plane2.replace(key_, val_) flight_ = '{} | {} ({})'.format(e_flightno, e_plane2, e_duration) flightnos.append(e_flightno) departs.append(depart) arrives.append(arrive) flights.append(flight_) operatedby.append(e_plane1) if stoppage == 0: stoppage = "NONSTOP" elif stoppage == 1: stoppage = "1 STOP" else: stoppage = str(stoppage) + " STOPS" departdetailsText = '@'.join(departs) arivedetailsText = '@'.join(arrives) planedetails = '@'.join(flights) operatedbytext = '@'.join(operatedby) if eo_mile != '0' or bo_mile != '0': flightinfo.append( ('Flight ' + flightnos[0], str(searchkey), stime, stoppage, "test", origin, destination, departure + ':00', arrival + ':00', duration, eo_mile, eo_tax, bo_mile, bo_tax, '0', '0', "Economy", "Business", "First", "s7", departdetailsText, arivedetailsText, planedetails, operatedbytext, 'Economy Optimum@Economy Optimum@Economy Optimum', 'Business Optimum@Business Optimum@Business Optimum', 'Business Priority@Business Priority@Business Priority', 'X', 'I', 'O')) if ep_mile != '0' or bp_mile != '0': flightinfo.append( ('Flight ' + flightnos[0], str(searchkey), stime, stoppage, "test", origin, destination, departure + ':00', arrival + ':00', duration, ep_mile, ep_tax, bp_mile, bp_tax, '0', '0', "Economy", "Business", "First", "s7", departdetailsText, arivedetailsText, planedetails, operatedbytext, 'Economy Priority@Economy Priority@Economy Priority', 'Business Priority@Business Priority@Business Priority', 'Business Priority@Business Priority@Business Priority', 'X', 'I', 'O')) if db: cursor = db.cursor() cursor.executemany( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code,eco_fare_code,business_fare_code,first_fare_code) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", flightinfo) db.commit()
def virginAustralia(from_airport, to_airport, searchdate, searchid, cabinName, isflag): if not DEV_LOCAL: db = customfunction.dbconnection() cursor = db.cursor() currentdatetime = datetime.datetime.now() stime = currentdatetime.strftime('%Y-%m-%d %H:%M:%S') dt = datetime.datetime.strptime(searchdate, '%m/%d/%Y') dateday = dt.strftime('%d') datemonth = dt.strftime('%Y%m') cabinType = '' if cabinName == "maincabin": cabinType = "E" else: cabinType = "B" url = "http://www.virginaustralia.com/au/en/bookings/flights/make-a-booking/?bookingType=flights&passthru=0&trip_type=0&origin=" + from_airport + "&destination=" + to_airport + "&travel_class=" + cabinType + "&adults=1&children=0&infants=0&date_flexible=0&use_points=1&showPromoCode=1&date_start_day=" + str( dateday) + "&date_start_month=" + str( datemonth ) #+"&date_end_day="+str(dateday)+"&date_end_month="+str(datemonth) display = Display(visible=0, size=(800, 600)) display.start() driver = webdriver.Chrome() def storeFlag(searchid, stime, isflag): if isflag: if not DEV_LOCAL: cursor.execute( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", ("flag", str(searchid), stime, "flag", "test", "flag", "flag", "flag", "0", "0", "0", "0", "0", "0", "flag", "flag", "flag", "Virgin Australia", "flag", "flag", "flag", "flag", "flag", "flag", "flag")) db.commit() display.stop() driver.quit() try: driver.get(url) # print driver.page_source submitbtn = WebDriverWait( driver, 5).until(lambda driver: driver.find_element_by_xpath( "//*[contains(text(), 'Find Flights')]")) driver.execute_script("arguments[0].click();", submitbtn) except: storeFlag(searchid, stime, isflag) return try: # check Invalid Input WebDriverWait(driver, 2).until( EC.presence_of_element_located((By.ID, "page-dialog"))) storeFlag(searchid, stime, isflag) return except: print "form submitted" try: # check No flight data errorValue = WebDriverWait( driver, 5).until(lambda driver: driver.find_elements_by_class_name( "flightAdvisoryMessages")) storeFlag(searchid, stime, isflag) return except: print "data found" try: # driver.save_screenshot('/root/out_enter.png'); WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, "dtcontainer-0"))) html_page = driver.page_source soup = BeautifulSoup(html_page, "xml") templatedata = soup.find('script', text=re.compile('var templateData = ')) # time.sleep(1) json_text = re.search(r'^\s*var templateData = \s*({.*?})\s*;\s*$', templatedata.string, flags=re.DOTALL | re.MULTILINE).group(1) jsonData = json.loads(json_text) tempdata = jsonData["rootElement"]["children"][1]["children"][0][ "children"][7]["model"] except: storeFlag(searchid, stime, isflag) return searchid i = 1 try: operatordiv = soup.findAll( True, {'class': re.compile(r'\operating-carrier-wrapper\b')}) # print operatordiv, '@@@@@@@@@2' operatorArray = [] for div in operatordiv: #print "========================================================================" optrtext = '' optr = div.find( "span", {"class": ["carrier-name", "operator-meta-data-internal"]}) if optr: optrtext = optr.text if '/' in optrtext[0]: optrtext = optrtext.replace('/', '') operatorArray.append(optrtext) maindata = tempdata['dayOffers'][0]["itineraryOffers"] operatorDetails = [] value_string = [] operatorcounter = 0 for k in range(0, len(maindata)): flightsDetails = [] segments = maindata[k]["segments"] rowRecord = maindata[k]["itineraryPartData"] fltno = '' #@@@@@@ Depart Details @@@@@@@@@@@@@@@@@ origin = rowRecord["departureCode"] dest = rowRecord["arrivalCode"] departureDate = rowRecord["departureDate"] deptDateTime = departureDate.split(" ") #departdetailFormat = departureDate+" | from "+origin #originDetails.append(departdetailFormat) #@@@@@@@ Segment info @@@@@@@@@@@@@@@@@ originDetails = [] destDetails = [] operatingCarrier = [] fareClassCode = [] eco_fare_code = '' bus_fare_code = '' sep = '' for counter in range(0, len(segments)): bookingCode1 = '' bookingCode = segments[counter]['bookingClass'] if cabinType == 'E': eco_fare_code = eco_fare_code + sep + bookingCode sep = ',' bookingCode1 = bookingCode + " Economy" else: bus_fare_code = bus_fare_code + sep + bookingCode sep = ',' bookingCode1 = bookingCode + " Business" fareClassCode.append(bookingCode1) segOrigin = segments[counter]["departureCode"] segDepartDate = segments[counter]["departureDate"] if DEV_LOCAL: airport_ = segOrigin else: airport_ = customfunction.get_airport_detail( segOrigin) or segOrigin segDetailFormat = segDepartDate[:-3] + " | from " + airport_ originDetails.append(segDetailFormat) segDest = segments[counter]["arrivalCode"] segArive = segments[counter]["arrivalDate"] if DEV_LOCAL: airport_ = segDest else: airport_ = customfunction.get_airport_detail( segDest) or segDest destdetailFormat = segArive[:-3] + " | at " + airport_ destDetails.append(destdetailFormat) if len(operatorArray) >= operatorcounter: operatingCarrier.append(operatorArray[operatorcounter]) operatorcounter = operatorcounter + 1 deptDate = deptDateTime[0] depttime = deptDateTime[1] depttime1 = (datetime.datetime.strptime(depttime, '%H:%M:%S')) departtime = depttime1.strftime('%H:%M') arrivalDate = rowRecord["arrivalDate"] arrivalDateTime = arrivalDate.split(" ") arivaldt = arrivalDateTime[0] arivalTime = arrivalDateTime[1] arivalTime1 = (datetime.datetime.strptime(arivalTime, '%H:%M:%S')) arive = arivalTime1.strftime('%H:%M') totalTripDuration = rowRecord["totalTripDuration"] totalMinte = (int(totalTripDuration) / 60000) hr = totalMinte / 60 minute = totalMinte % 60 tripDuration = str(hr) + "h " + str(minute) + "m" departureCodes = rowRecord["departureCodes"] #arrivalCodes = rowRecord["arrivalCodes"] #operatingCarrier = rowRecord["operatingCarrier"] flightDurations = rowRecord["flightDurations"] flightNumber = rowRecord["flightNumber"] airlineCodes = rowRecord["airlineCodes"] aircraftType = rowRecord["aircraftType"] for f in range(0, len(flightNumber)): flightNo = airlineCodes[f] + " " + str(flightNumber[f]) if f == 0: fltno = flightNo fltTime = flightDurations[f] fltMinuteTime = int(fltTime) / 60000 fltMinuteTimeHour = fltMinuteTime / 60 fltMinuteTime = fltMinuteTime % 60 fltTimeFormat = str(fltMinuteTimeHour) + "h " + str( fltMinuteTime) + "m" if DEV_LOCAL: fltFormat = flightNo + " | " + aircraftType[ f] + " (" + fltTimeFormat + ")" else: fltFormat = flightNo + " | " + customfunction.AIRCRAFTS[ aircraftType[f]] + " (" + fltTimeFormat + ")" flightsDetails.append(fltFormat) originDetailString = '@'.join(originDetails) arivedetailtext = '@'.join(destDetails) planedetailtext = '@'.join(flightsDetails) ecoFareCode = '' busFareCode = '' firstFareCode = '' if len(fareClassCode) > 0: if cabinType == 'E': ecoFareCode = '@'.join(fareClassCode) else: busFareCode = '@'.join(fareClassCode) noOfStop = len(departureCodes) - 1 stoppage = '' if noOfStop == 0: stoppage = "NONSTOP" elif noOfStop == 1: stoppage = "1 STOP" else: stoppage = str(noOfStop) + " STOPS" if len(operatingCarrier) > 0: operatortext = '@'.join(operatingCarrier) allPrices = maindata[k]["basketsRef"] for key in allPrices: farePrices = maindata[k]["basketsRef"][key]["prices"][ "priceAlternatives"] economy = 0 ecotax = 0 business = 0 businesstax = 0 first = 0 firsttax = 0 for m in range(0, len(farePrices)): saverPrice = farePrices[m]["pricesPerCurrency"] taxes = 0 miles = 0 if "AUD" in saverPrice: taxes = saverPrice["AUD"]["amount"] miles = saverPrice["FFCURRENCY"]["amount"] if cabinType == 'E': economy = miles ecotax = taxes else: business = miles businesstax = taxes value_string.append( (str(fltno), str(searchid), stime, stoppage, "test", origin, dest, departtime, arive, tripDuration, str(economy), str(ecotax), str(business), str(businesstax), str(first), str(firsttax), "Economy", "Business", "First", "Virgin Australia", originDetailString, arivedetailtext, planedetailtext, operatortext, ecoFareCode, busFareCode, firstFareCode, eco_fare_code, bus_fare_code)) if len(value_string) == 50: if not DEV_LOCAL: cursor.executemany( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code,eco_fare_code,business_fare_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", value_string) db.commit() else: print value_string value_string = [] if len(value_string) > 0: if not DEV_LOCAL: cursor.executemany( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code,eco_fare_code,business_fare_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", value_string) db.commit() else: print value_string if isflag: if not DEV_LOCAL: cursor.execute( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", ("flag", str(searchid), stime, "flag", "test", "flag", "flag", "flag", "0", "0", "0", "0", "0", "0", "flag", "flag", "flag", "Virgin Australia", "flag", "flag", "flag", "flag", "flag", "flag", "flag")) db.commit() display.stop() driver.quit() except: raise if isflag: if not DEV_LOCAL: cursor.execute( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", ("flag", str(searchid), stime, "flag", "test", "flag", "flag", "flag", "0", "0", "0", "0", "0", "0", "flag", "flag", "flag", "Virgin Australia", "flag", "flag", "flag", "flag", "flag", "flag", "flag")) db.commit() display.stop() driver.quit() return searchid
def delta(orgn, dest, searchdate, searchkey): if not DEV_LOCAL: db = customfunction.dbconnection() cursor = db.cursor() db.set_character_set('utf8') url = "http://www.delta.com/" searchid = str(searchkey) currentdatetime = datetime.datetime.now() stime = currentdatetime.strftime('%Y-%m-%d %H:%M:%S') sys.stdout = codecs.getwriter('utf-8')(sys.stdout) driver = webdriver.PhantomJS(service_args=[ '--ignore-ssl-errors=true', '--ssl-protocol=any', '--load-images=false' ], service_log_path='/tmp/ghostdriver.log') driver.set_window_size(1120, 1080) # driver = webdriver.Firefox() def storeFlag(searchkey, stime): if not DEV_LOCAL: cursor.execute( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", ("flag", str(searchkey), stime, "flag", "test", "flag", "flag", "flag", "0", "0", "0", "0", "0", "0", "flag", "flag", "flag", "delta", "flag", "flag", "flag", "flag", "flag", "flag", "flag")) db.commit() driver.quit() try: driver.get(url) time.sleep(1) flg = 0 pageStatus = '' while flg < 15 and pageStatus != 'complete': time.sleep(1) print "flg", flg pageStatus = driver.execute_script('return document.readyState;') print "pageStatus", pageStatus flg = flg + 1 WebDriverWait(driver, 6).until( EC.presence_of_element_located((By.ID, "oneWayBtn"))) oneway = driver.find_element_by_id('oneWayBtn') #oneway.click() driver.execute_script("arguments[0].click();", oneway) driver.execute_script( "document.getElementById('originCity').setAttribute('value', '" + orgn.strip() + "')") # origin = driver.find_element_by_id("originCity") # origin.clear() # origin.send_keys(orgn.strip()) driver.execute_script( "document.getElementById('destinationCity').setAttribute('value', '" + dest.strip() + "')") # destination = driver.find_element_by_id("destinationCity") # destination.send_keys(dest.strip()) ddate = driver.find_element_by_id("departureDate") driver.execute_script( "document.getElementById('departureDate').setAttribute('value', '" + str(searchdate) + "')") milebtn = driver.find_element_by_id("milesBtn") milebtn.click() driver.find_element_by_id("findFlightsSubmit").send_keys(Keys.ENTER) except: print "before data page" storeFlag(searchkey, stime) return searchkey time.sleep(1) try: WebDriverWait(driver, 5).until( EC.presence_of_element_located((By.ID, "submitAdvanced"))) print "no data" storeFlag(searchkey, stime) return searchkey except: print "Data found" try: # driver.save_screenshot('/root/out_enter.png'); WebDriverWait(driver, 15).until( EC.presence_of_element_located((By.ID, "showAll-footer"))) print "More than one page" driver.execute_script(""" DWRHandler.currentPage = -1; var _shopInputDo=shopInputDo; shoppingUtil.scrollWindow("top"); FilterFunctions.showFilterMsg(); FlightUtil.emptyResults(); ResummarizeFlightResultsDWR.pageResults(DWRHandler.currentPage, _shopInputDo.currentSessionCheckSum, delta.airShopping.cacheKey, { async: true, callback: function(searchResults) { if (searchResults != null) { var jsonData = {}; jsonData['jsonobj'] = JSON.stringify(searchResults); var cabininfo = document.getElementsByClassName('tblHeadUp')[0].innerHTML; jsonData['cabinTypes'] = cabininfo; localStorage.setItem('deltaData', JSON.stringify(jsonData)); var element = document.createElement('div'); element.id = "submitAdvanced"; element.appendChild(document.createTextNode("text")); document.body.appendChild(element); throw new Error("Results found"); if (searchResults.errorFwdURL == null) { jsonResultPopulation(searchResults); paginationPopulation(searchResults); if (shoppingUtil.isIE8()) { if (DWRHandler.currentPage == -1) { ieTimeout = setTimeout("RenderTemplate.renderResult();FilterFunctions.hideFilterMsg();RenderTemplate.adjustHeight();", 200); } else { ieTimeout = setTimeout("RenderTemplate.renderResult();FilterFunctions.hideFilterMsg();RenderTemplate.adjustHeight();", 100); } } else { RenderTemplate.renderResult(); FilterFunctions.hideFilterMsg(); RenderTemplate.adjustHeight(); } if (DWRHandler.currentPage == -1) { $("#showAll").hide(); $("#showAll-footer").hide(); } contienuedOnload(false); if (searchResults.debugInfo != null && ((typeof(printRequestResponse) !== "undefined") && printRequestResponse == true)) { $("#requestXml").text(searchResults.debugInfo.itaRequest); $("#responceXml").text(searchResults.debugInfo.itaResponse); $("#reqRes").show(); } } else { window.location.replace(searchResults.errorFwdURL); } } else { FilterFunctions.errorHandling(); } $(".tableHeaderHolderFareBottom.return2Top").show(); }, exceptionHandler: FilterFunctions.errorHandling }); """) except: print "single page" try: driver.execute_script(""" var sortBy = "deltaScheduleAward" ; SearchFlightResultsDWR.searchResults(currentSessionCheckSum, sortBy[0], delta.airShopping.numberOfColumnsToRequest, delta.airShopping.cacheKey, { async: true, timeout: 65000, callback: function(searchResults) { var jsonData = {}; jsonData['jsonobj'] = JSON.stringify(searchResults); var cabininfo = document.getElementsByClassName('tblHeadUp')[0].innerHTML; jsonData['cabinTypes'] = cabininfo; localStorage.setItem('deltaData', JSON.stringify(jsonData)); var element = document.createElement('div'); element.id = "submitAdvanced"; element.appendChild(document.createTextNode("text")); document.body.appendChild(element); throw new Error("Results found"); if (searchResults.errorFwdURL == null || searchResults.errorFwdURL == "") { flightResultsObj.isDOMReady(searchResults, action, false); FilterFunctions.hideFilterMsg(); } else { flightResultsObj.isDOMReady(searchResults, false, true); } if (!action) { Wait.hide(); $(".tableHeaderHolderFareBottom").show(); $("#nextGenAirShopping .tableHeaderHolder").show(); } }, errorHandler: function(msg, exc) { shoppingUtil.errorHandler(msg, exc); }, exceptionHandler: function(msg, exc) { (action) ? FilterFunctions.hideFilterMsg(): ""; shoppingUtil.exceptionHandler(msg, exc); } }); """) except: storeFlag(searchkey, stime) return searchkey try: WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, "submitAdvanced"))) result = driver.execute_script( """ return localStorage.getItem('deltaData'); """) deltaObj = json.loads(result) a_file = open('1.json', 'w') a_file.write(json.dumps(deltaObj, indent=4)) # a_file.write(result.encode('utf8')) # print deltaObj, '@@@@@@' # return searchResult = json.loads(deltaObj['jsonobj']) # a_file = open('2.json', 'w') # a_file.write(json.dumps(searchResult, indent=4)) # return cabinhead = "<tr>" + deltaObj['cabinTypes'] + "</tr>" soup = BeautifulSoup(cabinhead, "xml") tds = soup.findAll("td") pricecol = '' pricecol = soup.findAll("a", {"class": "tblHeadBigtext lnkCabinName"}) if len(pricecol) < 1: pricecol = soup.findAll("label", {"class": "tblHeadBigtext"}) flightData = searchResult["itineraries"] except: # raise storeFlag(searchkey, stime) return searchkey values_string = [] for i in range(0, len(flightData)): totalFareDetails = flightData[i]['totalFare'] slicesDetails = flightData[i]['slices'] departDetail = [] ariveDetail = [] flightDetail = [] operatorDetail = [] SourceCOde = '' tripDuration = '' destinationCode = '' arivalTime = '' departTime = '' flightNo = '' for k in range(0, len(slicesDetails)): tripDuration = slicesDetails[k]['duration'] SourceCOde = slicesDetails[k]['sliceOrigin']['airportCode'] destinationCode = slicesDetails[k]['sliceDestination'][ 'airportCode'] arivalTime = slicesDetails[k]['sliceArrivalTime'] departTime = slicesDetails[k]['sliceDepartureTime'] layover = slicesDetails[k]['flights'] "**************************************** FLIGHT DETAILS ****************************************" FlightFlag = 0 for m in range(0, len(layover)): legData = layover[m]['legs'] for n in range(0, len(legData)): legdetail = legData[n] if legdetail: fromAirport = '' destAirport = '' "=========================LEG INFO==================================" if 'origin' in legdetail: orgnCode = legdetail['origin']['airportCode'] cityname = legdetail['origin']['nearByCities'][0][ 'name'] cityCode = legdetail['origin']['nearByCities'][0][ 'country']['region']['code'] fromAirport = orgnCode if 'destination' in legdetail: destCode = legdetail['destination']['airportCode'] destcityname = legdetail['destination'][ 'nearByCities'][0]['name'] destcityCode = legdetail['destination'][ 'nearByCities'][0]['country']['region']['code'] destAirport = destCode duration = legdetail['duration'] schedDepartureTime = legdetail['schedDepartureTime'] schedDepartureDate = legdetail['schedDepartureDate'] schedArrivalTime = legdetail['schedArrivalTime'] schedArrivalDate = legdetail['schedArrivalDate'] '@@@@@@@ departDetails format @@@@@@@' departinfo_time = schedDepartureDate + " " + schedDepartureTime departinfo_time = datetime.datetime.strptime( departinfo_time, '%a %b %d %Y %I:%M%p') departinfo_time = departinfo_time.strftime( '%Y/%m/%d %H:%M') if not DEV_LOCAL: fromAirport = customfunction.get_airport_detail( fromAirport) or fromAirport fromDetail = departinfo_time + " | from " + fromAirport departDetail.append(fromDetail) departinfo_time = schedArrivalDate + " " + schedArrivalTime departinfo_time = datetime.datetime.strptime( departinfo_time, '%a %b %d %Y %I:%M%p') departinfo_time = departinfo_time.strftime( '%Y/%m/%d %H:%M') if not DEV_LOCAL: destAirport = customfunction.get_airport_detail( destAirport) or destAirport toDetails = departinfo_time + " | at " + destAirport ariveDetail.append(toDetails) aircraft = legdetail['aircraft']['shortName'] airlineCode = legdetail['marketAirline']['airline'][ 'airlineCode'] flightNumber = legdetail['flightNumber'] if FlightFlag == 0: flightNo = airlineCode + " " + str(flightNumber) # --- NORM --- if aircraft[:3] == 'MD-': aircraft = 'McDonnell Douglas MD ' + aircraft[3:] elif aircraft[:3] == 'CRJ': aircraft = 'Bombardier ' + aircraft flightFormat = airlineCode + " " + str( flightNumber ) + " | " + aircraft + " (" + duration + ")" flightDetail.append(flightFormat) operatedby = legdetail['operatingAirline']['airline'][ 'airlineName'] operatorDetail.append(operatedby) FlightFlag = FlightFlag + 1 "====================Fare info =================================" fareFlag = 0 cabintype1 = '' cabintype2 = '' cabintype3 = '' ecofare = 0 echoTax = 0 bussfare = 0 busstax = 0 firstFare = 0 firsttax = 0 ecofareClass = '' bussFareClass = '' firstFareClass = '' eco_fare_code = '' bus_fare_code = '' first_fare_code = '' for j in range(0, len(totalFareDetails)): cabintype = '' miles = 0 taxes = 0 fareCode = [] if totalFareDetails[j]['cabinName'] != None: tax = 0 fareCodeHolder = totalFareDetails[j]['miscFlightInfos'] for c in range(0, len(fareCodeHolder)): fareCabin = fareCodeHolder[c]['cabinName'] bookingCode = fareCodeHolder[c]['displayBookingCode'] fareCode.append(bookingCode) bookingCode = bookingCode + " " + fareCabin cabinName = totalFareDetails[j]['cabinName'] miles = totalFareDetails[j]['totalAwardMiles'] if ',' in miles: miles = miles.replace(',', '') taxInt = totalFareDetails[j]['totalPriceLeft'] if ',' in taxInt: taxInt = taxInt.replace(',', '') taxFloat = totalFareDetails[j]['totalPriceRight'] if taxFloat == '' or taxFloat == None: taxFloat = 0 tax = float(taxInt) + float(taxFloat) currencyCode = totalFareDetails[j]['currencyCode'] if currencyCode and currencyCode != 'USD': currencychange = urllib.urlopen( "https://www.exchangerate-api.com/%s/%s/%f?k=e002a7b64cabe2535b57f764" % (currencyCode, "USD", float(tax))) taxes = currencychange.read() else: taxes = tax if len(pricecol) > 1: if j == 0: cabintype = "Economy" if j == 1 and 'First' not in pricecol[1].text: cabintype = "Business" if j == 2 and len( pricecol) > 2 and 'First' not in pricecol[2].text: cabintype = "Business" else: if len(pricecol) > 0 and len(pricecol) < 2: if 'Main Cabin' in pricecol[0].text: cabintype = "Economy" elif 'First' not in pricecol[0].text: cabintype = 'Business' if 'Economy' in cabintype: ecofare = miles echoTax = taxes cabintype1 = "Economy" if len(fareCode) > 0: eco_fare_code = ','.join(fareCode) ecofareClass = ' Economy@'.join(fareCode) + ' Economy' ecofareClass = ecofareClass elif 'Business' in cabintype: cabintype2 = "Business" bussfare = miles busstax = taxes if len(fareCode) > 0: bus_fare_code = ','.join(fareCode) bussFareClass = ' Business@'.join(fareCode) + ' Business' else: cabintype3 = "First" firstFare = miles firsttax = taxes if len(fareCode) > 0: first_fare_code = ','.join(fareCode) firstFareClass = ' First@'.join(fareCode) + ' First' departdetailtext = '@'.join(departDetail) ariveDetailtext = '@'.join(ariveDetail) flightDetailtext = '@'.join(flightDetail) operatorDetailtext = '@'.join(operatorDetail) stoppage = '' stop = int(len(departDetail) - 1) if stop == 0: stoppage = "NONSTOP" elif stop == 1: stoppage = "1 STOP" else: stoppage = str(stop) + " STOPS" arivalTime1 = (datetime.datetime.strptime(arivalTime, '%I:%M%p')) arivalTime = arivalTime1.strftime('%H:%M') departTime1 = (datetime.datetime.strptime(departTime, '%I:%M%p')) departTime = departTime1.strftime('%H:%M') if len(pricecol) > 1 and 'Delta Comfort+' in pricecol[1].text: bussFareClass = bussFareClass.replace('Business', 'Economy') values_string.append( (flightNo, str(searchkey), stime, stoppage, "test", SourceCOde, destinationCode, departTime, arivalTime, tripDuration, str(ecofare), str(echoTax), '0', '0', str(firstFare), str(firsttax), cabintype1, cabintype2, cabintype3, "delta", departdetailtext, ariveDetailtext, flightDetailtext, operatorDetailtext, ecofareClass, '', firstFareClass, eco_fare_code, '', first_fare_code)) values_string.append( (flightNo, str(searchkey), stime, stoppage, "test", SourceCOde, destinationCode, departTime, arivalTime, tripDuration, str(bussfare), str(busstax), '0', '0', '0', '0', cabintype1, cabintype2, cabintype3, "delta", departdetailtext, ariveDetailtext, flightDetailtext, operatorDetailtext, bussFareClass, '', '', bus_fare_code, '', '')) else: values_string.append( (flightNo, str(searchkey), stime, stoppage, "test", SourceCOde, destinationCode, departTime, arivalTime, tripDuration, str(ecofare), str(echoTax), str(bussfare), str(busstax), str(firstFare), str(firsttax), cabintype1, cabintype2, cabintype3, "delta", departdetailtext, ariveDetailtext, flightDetailtext, operatorDetailtext, ecofareClass, bussFareClass, firstFareClass, eco_fare_code, bus_fare_code, first_fare_code)) if len(values_string) > 50: if not DEV_LOCAL: cursor.executemany( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code,eco_fare_code,business_fare_code,first_fare_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", values_string) db.commit() else: print values_string values_string = [] if len(values_string) > 0: if not DEV_LOCAL: cursor.executemany( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code,eco_fare_code,business_fare_code,first_fare_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", values_string) db.commit() else: print values_string storeFlag(searchkey, stime) return searchkey
def etihad(source, destcode, searchdate, searchkey, scabin): #return searchkey dt = datetime.datetime.strptime(searchdate, '%m/%d/%Y') date = dt.strftime('%d/%m/%Y') db = customfunction.dbconnection() cursor = db.cursor() currentdatetime = datetime.datetime.now() stime = currentdatetime.strftime('%Y-%m-%d %H:%M:%S') search_cabin = '' if scabin == 'maincabin': search_cabin = "Radio1" elif scabin == 'firstclass': search_cabin = "Radio2" else: search_cabin = "Radio3" url = "http://www.etihad.com/en-us/plan-and-book/book-redemption-flights/" driver = webdriver.PhantomJS(service_args=[ '--ignore-ssl-errors=true', '--ssl-protocol=any', '--load-images=false' ], service_log_path='/tmp/ghostdriver.log') driver.set_window_size(1120, 1080) driver.get(url) def storeFlag(searchkey, stime): cursor.execute( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", ("flag", str(searchkey), stime, "flag", "test", "flag", "flag", "flag", "0", "0", "0", "0", "0", "0", "flag", "flag", "flag", "etihad", "flag", "flag", "flag", "flag", "flag", "flag", "flag")) db.commit() print "etihad flag inserted" driver.quit() try: WebDriverWait(driver, 15).until( EC.presence_of_element_located((By.ID, "frm_2012158061206151234"))) driver.execute_script( 'document.getElementById("frm_2012158061206151234").removeAttribute("readonly")' ) oneway = driver.find_element_by_id("frm_oneWayFlight") #oneway.click() driver.execute_script("return arguments[0].click();", oneway) search_cabin1 = driver.find_element_by_id(search_cabin) #search_cabin1.click() driver.execute_script("return arguments[0].click();", search_cabin1) origin = driver.find_element_by_id("frm_2012158061206151234") driver.execute_script("return arguments[0].click();", origin) #origin.click() time.sleep(1) origin.send_keys(str(source)) time.sleep(1) origin.send_keys(Keys.TAB) except: storeFlag(searchkey, stime) return searchkey flag = 0 flag1 = 0 sourceVal = '' def setOrigin(): origin.clear() origin.send_keys(str(source)) time.sleep(1) origin.send_keys(Keys.TAB) try: while flag < 1 and flag1 < 2: sourceVal = origin.get_attribute("value") if (sourceVal == '' or len(sourceVal) <= len(source)): setOrigin() flag1 = flag1 + 1 else: flag = flag + 1 if sourceVal == '': storeFlag(searchkey, stime) return flag = 0 flag1 = 0 driver.execute_script( 'return document.getElementById("frm_20121580612061235").removeAttribute("readonly")' ) to = driver.find_element_by_id("frm_20121580612061235") time.sleep(1) to.send_keys(destcode) time.sleep(1) to.send_keys(Keys.TAB) except: storeFlag(searchkey, stime) return searchkey def setDestination(): to.clear() to.send_keys(destcode) time.sleep(1) to.send_keys(Keys.TAB) destcodeVal = '' while flag < 1 and flag1 < 2: destcodeVal = to.get_attribute("value") if (destcodeVal == '' or len(destcodeVal) <= len(destcode)) and flag < 2: flag1 = flag1 + 1 setDestination() else: flag = flag + 1 if destcodeVal == '': storeFlag(searchkey, stime) return time.sleep(1) ddate = driver.find_element_by_id("frm_2012158061206151238") ddate.clear() ddate.send_keys(date) ddate.send_keys(Keys.TAB) flightbutton = driver.find_element_by_name("webform") flightbutton.send_keys(Keys.ENTER) try: WebDriverWait(driver, 20).until( EC.presence_of_element_located((By.ID, "dtcontainer-both"))) except: storeFlag(searchkey, stime) return searchkey html_page = driver.page_source soup = BeautifulSoup(html_page, "lxml") operators = soup.findAll("th", {"class": "operatingCarrier"}) operatorArray = [] for optr in operators: operatorDiv = optr.findAll("p") for opname in operatorDiv: operatorArray.append(opname.text) templatedata = soup.find('script', text=re.compile('var templateData = ')) time.sleep(1) json_text = re.search(r'^\s*var templateData = \s*({.*?})\s*;\s*$', templatedata.string, flags=re.DOTALL | re.MULTILINE).group(1) jsonData = json.loads(json_text) tempdata = jsonData["rootElement"]["children"][1]["children"][0][ "children"][4]["model"]["allItineraryParts"] value_string = [] opCounter = 0 for k in range(0, len(tempdata)): segments = tempdata[k]["segments"] rowRecord = tempdata[k]["itineraryPartData"] fltno = '' #@@@@@@ Depart Details @@@@@@@@@@@@@@@@@ origin = rowRecord["departureCode"] dest = rowRecord["arrivalCode"] departureDate = rowRecord["departureDate"] deptDateTime = departureDate.split(" ") #@@@@@@@ Segment info @@@@@@@@@@@@@@@@@ originDetails = [] destDetails = [] flightsDetails = [] operatorCarrier = [] bookingFareCode = [] fare_code = '' sep = '' for counter in range(0, len(segments)): bookingFare = '' bookingCode = segments[counter]['bookingClass'] bookingClass = segments[counter]['allClassOfService'] fare_code = fare_code + sep + bookingCode sep = ',' bookingFare = bookingCode + " " + bookingClass bookingFareCode.append(bookingFare) segOrigin = segments[counter]["departureCode"] segDepartDate = segments[counter]["departureDate"] airport_ = customfunction.get_airport_detail( segOrigin) or segOrigin segDetailFormat = segDepartDate[:-3] + " | from " + airport_ originDetails.append(segDetailFormat) segDest = segments[counter]["arrivalCode"] segArive = segments[counter]["arrivalDate"] airport_ = customfunction.get_airport_detail(segDest) or segDest destdetailFormat = segArive[:-3] + " | at " + airport_ destDetails.append(destdetailFormat) if len(operatorArray) > opCounter: operatorCarrier.append(operatorArray[opCounter]) opCounter = opCounter + 1 deptDate = deptDateTime[0] depttime = deptDateTime[1] depttime1 = (datetime.datetime.strptime(depttime, '%H:%M:%S')) departtime = depttime1.strftime('%H:%M') arrivalDate = rowRecord["arrivalDate"] arrivalDateTime = arrivalDate.split(" ") arivaldt = arrivalDateTime[0] arivalTime = arrivalDateTime[1] arivalTime1 = (datetime.datetime.strptime(arivalTime, '%H:%M:%S')) arive = arivalTime1.strftime('%H:%M') totalTripDuration = rowRecord["totalTripDuration"] totalMinte = (int(totalTripDuration) / 60000) hr = totalMinte / 60 minute = totalMinte % 60 tripDuration = str(hr) + "h " + str(minute) + "m" departureCodes = rowRecord["departureCodes"] #arrivalCodes = rowRecord["arrivalCodes"] operatingCarrier = rowRecord["operatingCarrier"] flightDurations = rowRecord["flightDurations"] flightNumber = rowRecord["flightNumber"] airlineCodes = rowRecord["airlineCodes"] aircraftType = rowRecord["aircraftType"] for f in range(0, len(flightNumber)): flightNo = airlineCodes[f] + " " + str(flightNumber[f]) if f == 0: fltno = flightNo fltTime = flightDurations[f] fltMinuteTime = int(fltTime) / 60000 fltMinuteTimeHour = fltMinuteTime / 60 fltMinuteTime = fltMinuteTime % 60 fltTimeFormat = str(fltMinuteTimeHour) + "h " + str( fltMinuteTime) + "m" fltFormat = flightNo + " | " + customfunction.AIRCRAFTS[ aircraftType[f]] + " (" + fltTimeFormat + ")" flightsDetails.append(fltFormat) originDetailString = '@'.join(originDetails) arivedetailtext = '@'.join(destDetails) planedetailtext = '@'.join(flightsDetails) operatortext = '' bookingFareCodeString = '' if len(operatorCarrier) > 0: operatortext = '@'.join(operatorCarrier) if len(bookingFareCode) > 0: bookingFareCodeString = '@'.join(bookingFareCode) noOfStop = len(departureCodes) - 1 stoppage = '' if noOfStop == 0: stoppage = "NONSTOP" elif noOfStop == 1: stoppage = "1 STOP" else: stoppage = str(noOfStop) + " STOPS" allPrices = tempdata[k]["basketsRef"] economylist = [] ecotaxlist = [] businesslist = [] busstaxlist = [] firstlist = [] fisrtaxlist = [] for key in allPrices: farePrices = allPrices[key]["prices"]["priceAlternatives"] #print farePrices classOfService = allPrices[key]["classOfService"] for m in range(0, len(farePrices)): saverPrice = farePrices[m]["pricesPerCurrency"] for infokey in saverPrice: if infokey == "FFCURRENCY": miles = saverPrice[infokey]["amount"] else: rawTaxes = saverPrice[infokey]["amount"] currencychange = urllib.urlopen( "https://www.exchangerate-api.com/%s/%s/%f?k=e002a7b64cabe2535b57f764" % (infokey, "USD", float(rawTaxes))) taxes = currencychange.read() if "ECONOMY" in classOfService: economylist.append(miles) ecotaxlist.append(taxes) elif "BUSINESS" in classOfService: businesslist.append(miles) busstaxlist.append(taxes) elif "FIRST" in classOfService: firstlist.append(miles) fisrtaxlist.append(taxes) if len(economylist) >= len(businesslist) and len(economylist) >= len( firstlist): priceLength = len(economylist) elif len(businesslist) >= len(economylist) and len( businesslist) >= len(firstlist): priceLength = len(businesslist) else: priceLength = len(firstlist) for c in range(0, priceLength): economy = 0 ecotax = 0 business = 0 businesstax = 0 first = 0 firsttax = 0 if c < len(economylist): economy = economylist[c] ecotax = ecotaxlist[c] if c < len(businesslist): business = businesslist[c] businesstax = busstaxlist[c] if c < len(firstlist): first = firstlist[c] firsttax = fisrtaxlist[c] value_string.append( (str(fltno), str(searchkey), stime, stoppage, "test", origin, dest, departtime, arive, tripDuration, str(economy), str(ecotax), str(business), str(businesstax), str(first), str(firsttax), "Economy", "Business", "First", "etihad", originDetailString, arivedetailtext, planedetailtext, operatortext, bookingFareCodeString, bookingFareCodeString, bookingFareCodeString, fare_code, fare_code, fare_code)) if len(value_string) > 50: cursor.executemany( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code,eco_fare_code,business_fare_code,first_fare_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", value_string) db.commit() value_string = [] if len(value_string) > 0: cursor.executemany( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code,eco_fare_code,business_fare_code,first_fare_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", value_string) db.commit() cursor.execute( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby,economy_code,business_code,first_code) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", ("flag", str(searchkey), stime, "flag", "test", "flag", "flag", "flag", "0", "0", "0", "0", "0", "0", "flag", "flag", "flag", "etihad", "flag", "flag", "flag", "flag", "flag", "flag", "flag")) db.commit() driver.quit() #display.stop() return searchkey
def virgindata(tbody, keyid): recordcount = 1 value_string = [] try: if tbody.findAll("tr", {"class": "directRoute "}): trbody = tbody.findAll("tr", {"class": "directRoute "}) else: if tbody.findAll("tr", {"class": "indirectRoute "}): trbody = tbody.findAll("tr", {"class": "indirectRoute "}) except: return keyid for row in trbody: econo = 0 econotax = 0 business = 0 busstax = 0 first = 0 firsttax = 0 stp = '' lyover = '' details = row.find("td", {"class": "flightSearchDetails"}) # error economy = '' #============= price block ================================================================ if row.find("td", {"class": "cellOption economy hasLowestCostMessage"}): economy = row.find( "td", {"class": "cellOption economy hasLowestCostMessage"}) if economy == '' and row.find("td", {"class": "cellOption economy "}): economy = row.find("td", {"class": "cellOption economy "}) if economy: "--------------economy--------------------------------" economy_price = economy.find("span", {"class": "price"}) econprice1 = economy_price.text econprice = re.findall("\d+.\d+", econprice1) if len(econprice) > 0: econo = econprice[0] if ',' in econo: econo = econo.replace(',', '') if len(econprice) > 1: if "USD" not in econprice1: cprice = 0 if ',' in econprice[1]: cprice = econprice[1].replace(',', '') else: cprice = econprice[1] currency_symbol = (re.findall("[a-zA-Z]+", econprice1)) currencychange = urllib.urlopen( "https://www.exchangerate-api.com/%s/%s/%f?k=e002a7b64cabe2535b57f764" % (currency_symbol[1], "USD", float(cprice))) chaged_result = currencychange.read() econotax = chaged_result else: econotax = econprice[1] pre_economy = '' if row.find("td", {"class": "cellOption premEconomy "}): pre_economy = row.find("td", {"class": "cellOption premEconomy "}) if pre_economy == '' and row.find( "td", {"class": "cellOption premEconomy hasLowestCostMessage"}): pre_economy = row.find( "td", {"class": "cellOption premEconomy hasLowestCostMessage"}) if pre_economy: "--------------pre economy--------------------------------" pre_economy_price = pre_economy.find("span", {"class": "price"}) pre_economy = pre_economy_price.text #print pre_economy pre_econo_price = re.findall("\d+.\d+", pre_economy) if len(pre_econo_price) > 0: business = pre_econo_price[0] if ',' in business: business = business.replace(',', '') if len(pre_econo_price) > 1: if "USD" not in pre_economy: eprice = 0 if ',' in pre_econo_price[1]: eprice = pre_econo_price[1].replace(',', '') else: eprice = pre_econo_price[1] currency_symbol = (re.findall("[a-zA-Z]+", pre_economy)) currencychange = urllib.urlopen( "https://www.exchangerate-api.com/%s/%s/%f?k=e002a7b64cabe2535b57f764" % (currency_symbol[1], "USD", float(eprice))) chaged_result = currencychange.read() busstax = chaged_result else: busstax = pre_econo_price[1] #print "pre_econotax",busstax upper_class = '' if row.find("td", {"class": "cellOption upperclass last"}): "--------------upper class--------------------------------" upper_class = row.find( "td", {"class": "cellOption upperclass last"}) else: if row.find("td", { "class": "cellOption upperclass last hasLowestCostMessage" }): upper_class = row.find( "td", { "class": "cellOption upperclass last hasLowestCostMessage" }) if upper_class: upper_class_price = upper_class.find("span", {"class": "price"}) upperclass_price = upper_class_price.text upperprice = re.findall("\d+.\d+", upperclass_price) if len(upperprice) > 0: first = upperprice[0] if ',' in first: first = first.replace(',', '') if len(upperprice) > 1: if "USD" not in upperclass_price: uprice = 0 if ',' in upperprice[1]: uprice = upperprice[1].replace(',', '') else: uprice = upperprice[1] currency_symbol = (re.findall("[a-zA-Z]+", upperclass_price)) currencychange = urllib.urlopen( "https://www.exchangerate-api.com/%s/%s/%f?k=e002a7b64cabe2535b57f764" % (currency_symbol[1], "USD", float(uprice))) chaged_result = currencychange.read() firsttax = chaged_result else: firsttax = upperprice[1] #============================= end price block ========================================================= sourcestn = '' destinationstn = '' depttime = '' arivaltime = '' total_duration = '' heading = details.find("ul") depart = heading.find("li", {"class": "depart"}) departinfo = depart.findAll("p") if len(departinfo) > 0: depttime = departinfo[0].text departfrom1 = departinfo[1].text if 'from' in departfrom1: departfrom = (departfrom1.replace('from', '')).strip() if '(' in departfrom: departfrom1 = departfrom.split('(') sourcestn = departfrom1[1].replace(')', '') arive = heading.find("li", {"class": "arrive"}) ariveinfo = arive.findAll("p") if len(ariveinfo) > 0: arivaltime = ariveinfo[0].text if '+' in arivaltime: arivaltimesplit = arivaltime.split('+') arivaltime = arivaltimesplit[0] ariveat1 = ariveinfo[1].text if 'at' in ariveat1: ariveat = (ariveat1.replace('at', '')).strip() if '(' in ariveat: ariveat2 = ariveat.split('(') destinationstn = ariveat2[1].replace(')', '') stop = heading.find("li", {"class": "stops"}) durations = heading.find("li", {"class": "duration"}) stoppage = stop.text if '0' in stoppage: stp = "NONSTOP" elif '1' in stoppage: stp = "1 STOP" elif '2' in stoppage: stp = "2 STOPS" else: if '3' in stoppage: stp = "3 STOPS" total_duration = (durations.text).strip() if 'Duration' in total_duration: total_duration = (total_duration.replace('Duration', '')).strip() ''' #print "total_duration",total_duration operator = details.find("dl",{"class":"operator"}) operatedby = (operator.find("dd").text).strip() print "operatedby",operatedby ''' #===============================details block==================================================== details_block = details.find("div", {"class": "tooltip"}) details_tr = details_block.findAll("tr") counter = 0 departdlist = [] arivelist = [] planelist = [] operatedby = [] departdetails = '' arivedetails = '' planedetails = '' operatedbytext = '' while (counter < len(details_tr)): #print "counter",counter from_to = details_tr[counter].find("td", {"class": "flightDetails"}) operator = from_to.find("span", {"class": "operator"}).text operatedby.append(operator) #print "operator",operator from_to1 = from_to.find("span", {"class": "flightFromTo"}).text departing_from = '' ariving_at = '' departing_date = '' detaildetptime = '' detailarivetime = '' deptextraday = '' ariveextraday = '' if 'to' in from_to1: from_to1 = from_to1.split('to') departing_from = from_to1[0] if '\n' in departing_from: departing_from1 = departing_from.split("\n") departing_from = departing_from1[0].strip( ) + " " + departing_from1[1].strip() #print "departing_from",departing_from ariving_at = from_to1[1] if '\n' in ariving_at: ariving_at1 = ariving_at.split("\n") ariving_at = ariving_at1[0].strip( ) + " " + ariving_at1[1].strip() #print "ariving_at",ariving_at departing_date = from_to.find("span", { "class": "fullDate" }).text if 'Departing' in departing_date: departing_date = (departing_date.replace('Departing', '')).strip() counter = counter + 1 departtime = details_tr[counter].find("td", {"class": "departs"}) fl_dept_time = departtime.find("span", {"class": "flightDeparts"}) detaildetptime = fl_dept_time.text if departtime.find("span", {"class": "extraDays"}): extradeptdate = departtime.find("span", {"class": "extraDays"}) deptextraday = extradeptdate.text nod = re.findall("\d+.\d+", deptextraday) #print "nod",nod if "+1" in deptextraday: deptextraday = "+1 day" elif "+2" in deptextraday: deptextraday = "+2 day" else: if "+3" in deptextraday: deptextraday = "+3 day" arivetime = details_tr[counter].find("td", {"class": "arrives"}) fl_arive_time = arivetime.find("span", {"class": "flightArrives"}) detailarivetime = fl_arive_time.text if arivetime.find("span", {"class": "extraDays"}): extra_ariveday = arivetime.find("span", {"class": "extraDays"}) ariveextraday = extra_ariveday.text duration = details_tr[counter].find("td", {"class": "duration"}) fl_duration1 = duration.find("span", {"class": "flightDuration"}) fl_duration = (fl_duration1.text).strip() fl_flightno = '' planeno = '' flight_no = details_tr[1].find("td", {"class": "number"}) fl_flightno1 = flight_no.find("span", {"class": "flightNumber"}) planeno = (''.join(fl_flightno1.find('br').next_siblings)) fl_flightno = (fl_flightno1.text).replace(planeno, '') departinfo_time = departing_date + " " + detaildetptime departinfo_time = datetime.datetime.strptime( departinfo_time, '%A %d %B %Y %H:%M') departinfo_time = departinfo_time.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail( get_airport_code(departing_from)) or departing_from deptdetail = departinfo_time + " | from " + airport_ departdlist.append(deptdetail) departinfo_time = departing_date + " " + detailarivetime departinfo_time = datetime.datetime.strptime( departinfo_time, '%A %d %B %Y %H:%M') departinfo_time = departinfo_time.strftime('%Y/%m/%d %H:%M') airport_ = customfunction.get_airport_detail( get_airport_code(ariving_at)) or ariving_at arivedetail = departinfo_time + " | at " + airport_ arivelist.append(arivedetail) planetext = fl_flightno + " | " + planeno + " (" + fl_duration + ")" planelist.append(planetext) counter = counter + 1 departdetails = '@'.join(departdlist) arivedetails = '@'.join(arivelist) planedetails = ('@'.join(planelist)).strip() operatedbytext = '@'.join(operatedby) value_string.append( (fl_flightno, str(keyid), stime, stp, lyover, sourcestn, destinationstn, depttime, arivaltime, total_duration, str(econo), str(econotax), str(business), str(busstax), str(first), str(firsttax), "Economy", "Business", "First", "virgin_atlantic", departdetails, arivedetails, planedetails, operatedbytext)) recordcount = recordcount + 1 if recordcount > 50: cursor.executemany( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", value_string) db.commit() value_string = [] recordcount = 1 if len(value_string) > 0: if not DEV_LOCAL: cursor.executemany( "INSERT INTO pexproject_flightdata (flighno,searchkeyid,scrapetime,stoppage,stoppage_station,origin,destination,departure,arival,duration,maincabin,maintax,firstclass,firsttax,business,businesstax,cabintype1,cabintype2,cabintype3,datasource,departdetails,arivedetails,planedetails,operatedby) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);", value_string) db.commit() else: print value_string