debug_flag=False new_year=0 maxn=31 arg_month=sys.argv[2] Start_orig = datetime.date.today() #Start_orig = datetime.date(2015,8,1) Start_orig += datetime.timedelta(days=(int(maxn)-1)*int(arg_month)) Stop = Start_orig + datetime.timedelta(days=maxn) scrape_time = datetime.datetime.today() cleandone=1 DST = sys.argv[1] if len(sys.argv) >= 4 : if sys.argv[3] == "debug" : debug_flag=True usd=get_currency("usd") def get_proxy(): s=requests.session() test_url='http://fly.elal.co.il/plnext/ELALonlinebooking/Override.action' test2_url='http://booking.elal.co.il/newBooking/urlDirector.do' good=False while good is False: cur_proxy = replace_proxy() print "Need Proxy... {0}".format(cur_proxy) try: test=s.get(test_url) except: test = dummy() #if 'Access Denied' not in test.text and test.status_code == 200 and 'Manual Runner' not in test.text: if 'Access Denied' not in test.text and 'Manual Runner' not in test.text:
new_year = 0 maxn = 31 arg_month = sys.argv[2] if arg_month > 11: exit Start_orig = datetime.date.today() #Start_orig = datetime.date(2015,8,1) Start_orig += datetime.timedelta(days=(int(maxn) - 1) * int(arg_month)) Stop = Start_orig + datetime.timedelta(days=maxn) scrape_time = datetime.datetime.today() cleandone = 1 DST = sys.argv[1] if len(sys.argv) >= 4: if sys.argv[3] == "debug": debug_flag = True usd = get_currency("usd") Start = Start_orig flightsList = [] n = 0 print DST print str(scrape_time) print str(Start_orig), str(arg_month) while Stop > Start: n += 1 if debug_flag: print "Progress: " + str(n) + "/" + str(maxn) else: sys.stdout.write(" Progress: %d/%d \r" % (n, maxn)) sys.stdout.flush() Ret = Start + datetime.timedelta(days=2)
import requests import re from HTMLParser import HTMLParser import sys import datetime from general_scrape import find_all, clean_dup, strip_non_ascii, get_currency eur=get_currency("eur") class getFlight(HTMLParser): def __init__(self, req_date): self.tmp_date="" self.tmp_price="" self.tmp_data="" self.tmp_time="" self.day=0 self.endday=0 self.price=0 self.date=0 self.time = 0 self.data = [] self._vals = {} self.header = 0 self.direction = 0 self.req_date=req_date HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): if tag=="div": for a,b in attrs: if a=="class" and b=="OutboundDaySlider": self.direction=1 if a=="class" and b=="ReturnDaySlider": self.direction=2