def handle_data(self, data): if self.price == 2: tmpprice = strip_non_ascii(data) tmpprice = tmpprice[0:tmpprice.find('.')] self._vals['price'] = tmpprice self.price=0 if self.date == 1: self.tmp_date += data + " " if self.time == 1: self._vals['dep_time'] = strip_non_ascii(data).split()[0] self._vals['arr_time'] = strip_non_ascii(data).split()[1] if self.direction == 1: if data[0:8] == 'Tel-Aviv' : self.directionval=1; if data[-8:] == 'Tel-Aviv' : self.directionval=2;
def handle_data(self, data): if self.price == 2: tmpprice = strip_non_ascii(data) tmpprice = tmpprice[0:tmpprice.find('.')] self._vals['price'] = tmpprice self.price = 0 if self.date == 1: self.tmp_date += data + " " if self.time == 1: self._vals['dep_time'] = strip_non_ascii(data).split()[0] self._vals['arr_time'] = strip_non_ascii(data).split()[1] if self.direction == 1: if data[0:8] == 'Tel-Aviv': self.directionval = 1 if data[-8:] == 'Tel-Aviv': self.directionval = 2
def handle_endtag(self, tag): if tag == "td" and self.header==1: self.direction=0 self.price=0 if tag == "tr" and self.header==1: self._vals['date'] = self.dep_date.split('T')[0] self._vals['year'] = self._vals['date'].split('-')[0] self._vals['month'] = self._vals['date'].split('-')[1] self._vals['day'] = self._vals['date'].split('-')[2] self._vals['dep_time'] = ":".join(self.dep_date.split('T')[1].split(":")[0:2]) self._vals['arr_time'] = ":".join(self.arr_date.split('T')[1].split(":")[0:2]) self._vals['price'] = int((min([int(strip_non_ascii(p.split()[0])[0:-2])+1 for p in [" ".join(y.split()) for y in [x for x in self.tmp_price.split('\r\n')[1:]]]])*eur)+0.5) if "TLV" in self.tmp_direction.strip().split('\r\n')[0] : self._vals['direction']=1 if "BCN" in self.tmp_direction.strip().split('\r\n')[0] : self._vals['direction']=2 #if "TLV" in [strip_non_ascii(x.replace(' ','')) for x in self.tmp_direction.split('\r\n')[1:5]][0] : self._vals['direction']=1 #if "TLV" in [strip_non_ascii(x.replace(' ','')) for x in self.tmp_direction.split('\r\n')[1:5]][2] : self._vals['direction']=2 self.header=0 self.data.append(self._vals) self._vals = {} self.tmp_price="" self.tmp_direction="" self.direction = 0 self.price=0 self.dep_date = "" self.arr_date = ""
def handle_endtag(self, tag): if tag == "td" and self.header == 1: self.direction = 0 self.price = 0 if tag == "tr" and self.header == 1: self._vals['date'] = self.dep_date.split('T')[0] self._vals['year'] = self._vals['date'].split('-')[0] self._vals['month'] = self._vals['date'].split('-')[1] self._vals['day'] = self._vals['date'].split('-')[2] self._vals['dep_time'] = ":".join( self.dep_date.split('T')[1].split(":")[0:2]) self._vals['arr_time'] = ":".join( self.arr_date.split('T')[1].split(":")[0:2]) self._vals['price'] = int((min([ int(strip_non_ascii(p.split()[0])[0:-2]) + 1 for p in [ " ".join(y.split()) for y in [x for x in self.tmp_price.split('\r\n')[1:]] ] ]) * eur) + 0.5) if "TLV" in self.tmp_direction.strip().split('\r\n')[0]: self._vals['direction'] = 1 if "BCN" in self.tmp_direction.strip().split('\r\n')[0]: self._vals['direction'] = 2 #if "TLV" in [strip_non_ascii(x.replace(' ','')) for x in self.tmp_direction.split('\r\n')[1:5]][0] : self._vals['direction']=1 #if "TLV" in [strip_non_ascii(x.replace(' ','')) for x in self.tmp_direction.split('\r\n')[1:5]][2] : self._vals['direction']=2 self.header = 0 self.data.append(self._vals) self._vals = {} self.tmp_price = "" self.tmp_direction = "" self.direction = 0 self.price = 0 self.dep_date = "" self.arr_date = ""
def handle_endtag(self, tag): if tag=="span": self.price=0 self.date=0 self.time=0 if tag=="li" and self.day==1: if self.tmp_price!="": self._vals['weekday']=self.tmp_date.split()[0] self._vals['day']=self.tmp_date.split()[1] self._vals['month']=datetime.datetime.strptime(self.tmp_date.split()[2], "%b").strftime("%m") self._vals['price']=int(float(strip_non_ascii(self.tmp_price))*float(eur)+0.5) self._vals['priceE']=strip_non_ascii(self.tmp_price) self._vals['direction']=self.direction self._vals['dep_time']=self.tmp_time.split()[1] self._vals['arr_time']=self.tmp_time.split()[3] tmp_year=int(self.req_date.split("-")[2]) tmp_mon=int(self.req_date.split("-")[1]) if (tmp_mon==1) and (int(self._vals['month'])==12): tmp_year-=1 if (tmp_mon==12) and (int(self._vals['month'])==1): tmp_year+=1 self._vals['year'] = str(tmp_year) self.data.append(self._vals) self._vals={} self.tmp_price="" self.tmp_time=""
def handle_endtag(self, tag): if tag == "h3" : self.header=0 if tag == "h2" : self.direction=0 if tag == "span": if self.date == 1: tmp_year=int(self.req_date.split("-")[2]) tmp_mon=int(self.req_date.split("-")[1]) if (tmp_mon==1) and ("Dec" in self.tmp_date): tmp_year-=1 if (tmp_mon==12) and ("Jan" in self.tmp_date): tmp_year+=1 self._vals['year'] = str(tmp_year) tmp_full_date = strip_non_ascii(self.tmp_date).split() self._vals['weekday'] = tmp_full_date[0] self._vals['day'] = tmp_full_date[1] self._vals['month'] = datetime.datetime.strptime(tmp_full_date[2], "%b").strftime("%m") if tag == "label": if self._vals: self._vals['direction'] = self.directionval self.data.append(self._vals) self.deep=0 self._vals={} self.tmp_date = ""
def handle_endtag(self, tag): if tag == "h3": self.header = 0 if tag == "h2": self.direction = 0 if tag == "span": if self.date == 1: tmp_year = int(self.req_date.split("-")[2]) tmp_mon = int(self.req_date.split("-")[1]) if (tmp_mon == 1) and ("Dec" in self.tmp_date): tmp_year -= 1 if (tmp_mon == 12) and ("Jan" in self.tmp_date): tmp_year += 1 self._vals['year'] = str(tmp_year) tmp_full_date = strip_non_ascii(self.tmp_date).split() self._vals['weekday'] = tmp_full_date[0] self._vals['day'] = tmp_full_date[1] self._vals['month'] = datetime.datetime.strptime( tmp_full_date[2], "%b").strftime("%m") if tag == "label": if self._vals: self._vals['direction'] = self.directionval self.data.append(self._vals) self.deep = 0 self._vals = {} self.tmp_date = ""
if debug_flag: fd=codecs.open('/tmp/output_pages/'+sys.argv[0]+'_'+DST+'_'+str(Start)+'-'+str(Ret)+'.html', 'w', encoding='utf-8') fd.write(r2.text) fd.close() continue cur_date=Start.strftime("%d-%m-%Y") prP = getFlight(cur_date) prP.feed(r2.text) if debug_flag: print Start.strftime("%d/%m/%Y") print Ret.strftime("%d/%m/%Y") print len(list(find_all(r2.text, "marketColumn"))) for s in find_all(r2.text, '<span class="price">'): print strip_non_ascii(r2.text[s+20:r2.text.find('<', s+20, s+30)]) print len(prP.data) for t in prP.data : print t print r2 print '-------' flightsList.extend(prP.data) Start=Ret print "" #Out=[] #Inc=[] #for i in flightsList: #if i['direction'] == 4 : #Out.append(i) #else: #Inc.append(i) #Out=clean_dup(Out)
'w', encoding='utf-8') fd.write(r2.text) fd.close() continue cur_date = Start.strftime("%d-%m-%Y") prP = getFlight(cur_date) prP.feed(r2.text) if debug_flag: print Start.strftime("%d/%m/%Y") print Ret.strftime("%d/%m/%Y") print len(list(find_all(r2.text, "marketColumn"))) for s in find_all(r2.text, '<span class="price">'): print strip_non_ascii( r2.text[s + 20:r2.text.find('<', s + 20, s + 30)]) print len(prP.data) for t in prP.data: print t print r2 print '-------' flightsList.extend(prP.data) Start = Ret print "" #Out=[] #Inc=[] #for i in flightsList: #if i['direction'] == 4 : #Out.append(i) #else: #Inc.append(i)