Python fetchpage примеры, fetchpage.fetchpage Python примеры использования

Пример #1

0

Показать файл

def get_pnr(pnr):
    url = 'http://www.indianrail.gov.in/cgi_bin/inet_pnstat_cgi_10521.cgi'
    values = {
        'lccp_pnrno1': pnr,
        'lccp_cap_val': 30000,  # random value
        'lccp_capinp_val': 30000
    }

    header = {
        "Origin": "http://www.indianrail.gov.in",
        "Host": "www.indianrail.gov.in",
        "User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)",
        "Referer": "http://www.indianrail.gov.in/pnr_Enq.html"
    }

    html = fetchpage(url, values, header)
    d = {}
    num = re.findall(r"(?<=_both\">)\*?[0-9 -]+", html)
    strings = re.findall(r"(?<=_both\">)[0-9A-Z ]+[A-Z]+", html)
    strings = [s.strip() for s in strings]
    psgr = re.findall(r"(?<=B>Passenger )[0-9]+", html)
    status = re.findall(r"(?<=B>)(?!Passenger)[0-9A-Za-z/, ]+(?=</B>)", html)
    status = [s.strip() for s in status]
    booking_status = []
    current_status = []
    for i in range(0, len(status), 2):
        booking_status.append(status[i])
        current_status.append(status[i + 1])
    try:
        d['pnr'] = pnr
        d['number'] = num[0][1:]
        d['doj'] = strip_inline_space(num[1])
        d['name'] = strings[0]
        d['from'] = strings[1]
        d['to'] = strings[2]
        d['upto'] = strings[3]
        d['boarding'] = strings[4]
        d['class'] = strings[5]
        d['chart'] = 'N' if strings[6] == 'CHART NOT PREPARED' else 'Y'
        d['total'] = len(psgr)
        d['booking_status'] = booking_status
        d['current_status'] = current_status
        d['error'] = False
    except IndexError as e:
        d['number'] = ''
        d['doj'] = ''
        d['name'] = ''
        d['from'] = ''
        d['to'] = ''
        d['upto'] = ''
        d['boarding'] = ''
        d['class'] = ''
        d['chart'] = ''
        d['pnr'] = ''
        d['total'] = 0
        d['booking_status'] = ''
        d['current_status'] = ''
        d['error'] = True
        return d
    return d

Пример #2

0

Показать файл

Файл: live.py Проект: MANIT-Bhopal/RailwayAPI

def runningtime(url):
    global pos
    html=fetchpage(url)
    stn=re.findall("(?<=td>)(?!Source|Destination)[A-Za-z() ]+",html)
    times=re.findall("(?<=span=\"2\">)Source|(?<=td>)[0-9]+:[0-9]+ [PAM]+ / Destination|(?<=td>)Source / [0-9]+:[0-9]+ [PAM]+|(?<=td>)[0-9]+:[0-9]+ [PAM]+ / [0-9]+:[0-9]+ [PAM]+|(?<=td>)[0-9]+:[0-9]+ [PAM]+|(?<=td>)Source|(?<=td>)Destination|(?<=span=\"2\">)E.T.A.:[0-9PAM :]+|Waiting for Update",html)
    status=re.findall("(?<=green\">)No Delay|(?<=red\">)[0-9]+ [A-Za-z0-9 ]+|(?<=blue\">)[A-Za-z 0-9.]+",html)
    pos=re.search('(?<=br>Currently)[A-Za-z()0-9 ,<>\"\'=/:.]+(?=</p>)',html)
    if pos!=None:
        pos=remove_tag(pos.group(0))
    lst=[]
    i=0
    for j in range(len(stn)):
        d={}
        d['station']=stn[j]
        d['sch_arrival']=times[i]
        d['sch_departure']=times[i+1]
        try:
            tm=times[i+2]
            t=tm.split('/')
            d['act_arrival']=t[0].strip()
            d['act_departure']=t[1].strip()
        except IndexError:
            d['act_arrival']=tm
            d['act_departure']='-'
        lst.append(d)
        i+=3
    return lst

Пример #3

0

Показать файл

Файл: 123.py Проект: hydsrikiran/Buzzinglibrary

def extract_class(train):
    url="http://erail.in/"+str(train)+"/route"
    html=fetchpage(url)
    l=[]
    soup=BeautifulSoup(html,"html.parser")
    length=0
    for i in soup.find_all("b"):
        if len(i.attrs)==0:
            if(validate_class(i.text)):
                for j in i.text.split(" "):
                       length+=1
                       l.append(j.strip())
    if length<1: # rough heuristic to detect error
        print('No data: ',train)
        return None
    classcode=['1A','FC','2A','3A','3E','CC','SL','2S']
    d={}
    #d['train-number']=l[3]
    #d['train-name']=l[4]
    d['class-code']=[]
    not_encoutered_day=0
    for i,txt in enumerate(l):
        if txt in classcode:
            d['class-code'].append(txt)
            not_encoutered_day=1
        elif not_encoutered_day==1:
            break
    d['route']=[]    
    return d

Пример #4

0

Показать файл

Файл: live.py Проект: rahulGarg003/RailwayAPI

def runningtime(url):
    global pos
    html = fetchpage(url)
    stn = re.findall("(?<=td>)(?!Source|Destination)[A-Za-z() ]+", html)
    times = re.findall(
        "(?<=span=\"2\">)Source|(?<=td>)[0-9]+:[0-9]+ [PAM]+ / Destination|(?<=td>)Source / [0-9]+:[0-9]+ [PAM]+|(?<=td>)[0-9]+:[0-9]+ [PAM]+ / [0-9]+:[0-9]+ [PAM]+|(?<=td>)[0-9]+:[0-9]+ [PAM]+|(?<=td>)Source|(?<=td>)Destination|(?<=span=\"2\">)E.T.A.:[0-9PAM :]+",
        html)
    status = re.findall(
        "(?<=green\">)No Delay|(?<=red\">)[0-9]+ [A-Za-z0-9 ]+|(?<=blue\">)[A-Za-z 0-9.]+",
        html)
    pos = re.search('(?<=br>Currently)[A-Za-z()0-9 ,<>\"\'=/:.]+(?=</p>)',
                    html)
    if pos != None:
        pos = remove_tag(pos.group(0))
    lst = []
    i = 0
    for j in range(len(stn)):
        d = {}
        d['station'] = stn[j]
        d['sch_arrival'] = times[i]
        d['sch_departure'] = times[i + 1]
        try:
            tm = times[i + 2]
            t = tm.split('/')
            d['act_arrival'] = t[0].strip()
            d['act_departure'] = t[1].strip()
        except IndexError:
            d['act_arrival'] = tm
            d['act_departure'] = '-'

        lst.append(d)
        i += 3
    return lst

Пример #5

0

Показать файл

Файл: spider.py Проект: MANIT-Bhopal/RailwayAPI

def extract(train):
    url = "http://www.indianrail.gov.in/cgi_bin/inet_trnnum_cgi.cgi"
    ref = "http://www.indianrail.gov.in/inet_trn_num.html"
    html = fetchpage(url, {"lccp_trnname": train}, {"Referer": ref})
    l = []
    soup = BeautifulSoup(html)
    length = 0
    for i in soup.find_all("td"):
        if len(i.attrs) == 0:
            if validate(i.text):
                length += 1
                l.append(i.text.strip())

    if length < 10:  # rough heuristic to detect error
        print("No data: ", train)
        return None
    daycode = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
    d = {}
    d["train-number"] = l[3]
    d["train-name"] = l[4]
    d["day-code"] = []
    not_encoutered_day = 0
    for i, txt in enumerate(l):
        if txt in daycode:
            d["day-code"].append(txt)
            not_encoutered_day = 1
        elif not_encoutered_day == 1:
            break
    d["route"] = []
    # for m in l[i:]:
    #     print(m)
    l.append("END_MARKER")
    l = iter(l[i:])
    nxt = next(l)
    while True:
        t = {}
        t["no"] = nxt
        t["station-code"] = next(l)
        t["station-name"] = next(l)
        t["route-no"] = next(l)
        t["arrival-time"] = next(l)
        t["departure-time"] = next(l)
        nxt = next(l)
        # Many times no halt-time is given, this condition handles that case
        if ":" not in nxt:
            t["halt-time"] = 0
            t["distance"] = nxt
        else:
            t["halt-time"] = nxt
            t["distance"] = next(l)
        t["day"] = next(l)
        d["route"].append(t)
        nxt = next(l)
        # print(t)
        if nxt == "END_MARKER":
            break

    return d

Пример #6

0

Показать файл

Файл: between.py Проект: appstute-purushottam/RailwayAPI

def between(source,dest,date):
    url='http://www.indianrail.gov.in/cgi_bin/inet_srcdest_cgi_date.cgi'
    date=date.split('-')
    if len(date)==1:
        date.append('')
    cls="ZZ"
    values={"lccp_src_stncode_dis":source,
            "lccp_src_stncode":source,
            "lccp_dstn_stncode_dis":dest,
            "lccp_dstn_stncode":dest,
            "lccp_classopt":cls,
            "lccp_day":date[0],
            "lccp_month":date[1],
            "CurrentMonth":"4",
            "CurrentDate":"19",
            "CurrentYear":"2016"
    }
    header={"Origin":"http://www.indianrail.gov.in",
            "Host":"www.indianrail.gov.in",
            "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)",
            "Referer":"http://www.indianrail.gov.in/fare_Enq.html"
            }
    html=fetchpage(url,values,header)
    soup=BeautifulSoup(html)
    trains=[]
    for link in soup.find_all(href="#SAMETRN"):
        trains.append(link.text[1:].strip())

    destinations=[]
    sources=[]
    alter=0
    for tdtag in soup.find_all("td"):
        tagattr=tdtag.attrs.get('title','')
        if 'Station Code' in tagattr or 'temporary' in tagattr:
            t={}
            t['code']=extract_stn_code(tdtag['title'])
            t['name']=sanitize(tdtag.text)
            if alter==0:
                t['name']=db.station_metadata(t['code'])['fullname']
                sources.append(t)
                alter=1
            else:
                destinations.append(t)
                alter=0

    days=re.findall("(?<=B>)Y|(?<=red>)N",html)   
    numbers=[]
    for link in soup.find_all("input"):
        if link.get("onclick",False):
            #print(link['onclick'])
            num=re.findall("(?<=\')[0-9]+(?=[A-Z]+)",link['onclick'])
            if num!=[]:
                numbers.append(num[0])
    times=re.findall("(?<=TD>)[0-9:]+",html)
    return format_result_json(trains,days,numbers,times,sources,destinations)

Пример #7

0

Показать файл

def get_train(url):
    html = fetchpage(url)
    progn = re.compile(r"(?<=VALUE=\")[0-9]+", re.IGNORECASE)
    num = re.findall(progn, html)
    progn = re.compile(r"(?<=LEFT\">)[A-Za-z]+[ A-Za-z]+",
                       re.IGNORECASE)  #Extracts train names
    name = re.findall(progn, html)
    j = 0
    for i in range(0, len(num)):
        yield (num[i], name[j].strip())
        j = j + 3  #j-2  & j-1 contains the starting and ending station of train

Пример #8

0

Показать файл

Файл: pnr.py Проект: hydsrikiran/Buzzinglibrary

def get_pnr(pnr):
    url='http://www.indianrail.gov.in/cgi_bin/inet_pnstat_cgi_10521.cgi'
    values={'lccp_pnrno1':pnr,
            'lccp_cap_val':30000,# random value
            'lccp_capinp_val':30000}

    header={"Origin":"http://www.indianrail.gov.in",
            "Host":"www.indianrail.gov.in",
            "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)",
            "Referer":"http://www.indianrail.gov.in/pnr_Enq.html"
            }

    html=fetchpage(url,values,header)
    d={}
    num=re.findall(r"(?<=_both\">)\*?[0-9 -]+",html)
    strings=re.findall(r"(?<=_both\">)[0-9A-Z ]+[A-Z]+",html)
    strings=[s.strip() for s in strings]
    psgr=re.findall(r"(?<=B>Passenger )[0-9]+",html)
    status=re.findall(r"(?<=B>)(?!Passenger)[0-9A-Za-z/, ]+(?=</B>)",html)
    cancelled=0
    if status!=[] and status[-1]=='TRAIN CANCELLED':
        cancelled=1
    status=[s.strip() for s in status]
    booking_status=[]
    current_status=[]
    for i in range(0,len(status),2):
        booking_status.append(status[i])
        if cancelled:
            current_status.append('TRAIN CANCELLED')
            continue
        current_status.append(status[i+1])
    try:
        d['pnr']=pnr
        d['number']=num[0][1:]
        d['doj']=strip_inline_space(num[1])
        d['name']=strings[0]
        d['from']=strings[1]
        d['to']=strings[2]
        d['upto']=strings[3]
        d['boarding']=strings[4]
        d['class']=strings[5]
        d['chart']='N' if strings[6]=='CHART NOT PREPARED' else 'Y'
        d['total']=len(psgr)
        d['booking_status']=booking_status
        d['current_status']=current_status
        d['error']=False
    except IndexError as e:
        d['number']='';d['doj']='';d['name']='';d['from']=''
        d['to']='';d['upto']='';d['boarding']='';d['class']=''
        d['chart']='';d['pnr']='';d['total']=0;d['booking_status']=''
        d['current_status']='';d['error']=True
        return d
    return d

Пример #9

0

Показать файл

Файл: fare.py Проект: rahulGarg003/RailwayAPI

def get_fare(k):
    url = "http://www.indianrail.gov.in/cgi_bin/inet_frenq_cgi.cgi"
    doj = k['doj'].split('-')
    if len(doj) <= 1:
        doj = ['31', '12', '2015']  #Default Date
    k['quota'] = k['quota'].upper()
    values = {
        "lccp_trnno": k['train'],
        "lccp_day": doj[0],
        "lccp_month": doj[1],
        "lccp_srccode": k['source'],
        "lccp_dstncode": k['dest'],
        #"lccp_classopt":k['pref'],
        "lccp_classopt": "ZZ",
        "lccp_age": k['age'],
        "lccp_frclass1": k['quota'],
        "lccp_conc": "ZZZZZZ",
        "lccp_enrtcode": None,
        "lccp_viacode": None,
        "lccp_frclass2": "ZZ",
        "lccp_frclass3": "ZZ",
        "lccp_frclass4": "ZZ",
        "lccp_frclass5": "ZZ",
        "lccp_frclass6": "ZZ",
        "lccp_frclass7": "ZZ",
        "lccp_disp_avl_flg": "1",
        "getIt": "Please Wait...",
    }

    header = {
        "Origin": "http://www.indianrail.gov.in",
        "Host": "www.indianrail.gov.in",
        "User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)",
        "Referer": "http://www.indianrail.gov.in/fare_Enq.html"
    }

    html = fetchpage(url, values, header)
    cls = re.findall("(?<=Class -- )[0-9A-Za-z]+", html)
    fares = re.findall("(?<=both\">)[0-9]+", html)
    f = []
    l = len(cls)
    for i in range(l):
        t = {}
        t['class'] = cls[i]
        t['fare'] = fares[-l + i]
        f.append(t)
    return f

Пример #10

0

Показать файл

Файл: fare.py Проект: appstute-purushottam/RailwayAPI

def get_fare(k):
    url="http://www.indianrail.gov.in/cgi_bin/inet_frenq_cgi.cgi"
    doj=k['doj'].split('-')
    if len(doj)<=1:
        doj=['31','12','2015'] #Default Date
    k['quota']=k['quota'].upper()
    values={"lccp_trnno":k['train'],
            "lccp_day":doj[0],
            "lccp_month":doj[1],
            "lccp_srccode":k['source'],
            "lccp_dstncode":k['dest'],
            #"lccp_classopt":k['pref'],
            "lccp_classopt":"ZZ",
            "lccp_age":k['age'],
            "lccp_frclass1":k['quota'],
            "lccp_conc":"ZZZZZZ",
            "lccp_enrtcode":None,
            "lccp_viacode":None,
            "lccp_frclass2":"ZZ",
            "lccp_frclass3":"ZZ",
            "lccp_frclass4":"ZZ",
            "lccp_frclass5":"ZZ",
            "lccp_frclass6":"ZZ",
            "lccp_frclass7":"ZZ",
            "lccp_disp_avl_flg":"1",
            "getIt":"Please Wait...",
    }

    header={"Origin":"http://www.indianrail.gov.in",
            "Host":"www.indianrail.gov.in",
            "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)",
            "Referer":"http://www.indianrail.gov.in/fare_Enq.html"
            }
    
    html=fetchpage(url,values,header)
    cls=re.findall("(?<=Class -- )[0-9A-Za-z]+",html)
    fares=re.findall("(?<=both\">)[0-9]+",html)
    f=[]
    l=len(cls)
    for i in range(l):
        t={}
        t['class']=cls[i]
        t['fare']=fares[-l+i]
        f.append(t)
    return f

Пример #11

0

Показать файл

Файл: between.py Проект: MANIT-Bhopal/RailwayAPI

def between(source,dest,date):
    url='http://www.indianrail.gov.in/cgi_bin/inet_srcdest_cgi_date.cgi'
    date=date.split('-')
    cls="ZZ"
    values={"lccp_src_stncode_dis":source,
            "lccp_src_stncode":source,
            "lccp_dstn_stncode_dis":dest,
            "lccp_dstn_stncode":dest,
            "lccp_classopt":cls,
            "lccp_day":date[0],
            "lccp_month":date[1],
            "CurrentMonth":"4",
            "CurrentDate":"19",
            "CurrentYear":"2016"
    }
    header={"Origin":"http://www.indianrail.gov.in",
            "Host":"www.indianrail.gov.in",
            "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)",
            "Referer":"http://www.indianrail.gov.in/fare_Enq.html"
            }
    html=fetchpage(url,values,header)
    soup=BeautifulSoup(html)
    trains=[]
    for link in soup.find_all(href="#SAMETRN"):
        trains.append(link.text[1:].strip())

    days=re.findall("(?<=B>)Y|(?<=red>)N",html)   
    numbers=[]
    for link in soup.find_all("input"):
        if link.get("onclick",False):
            #print(link['onclick'])
            num=re.findall("(?<=\')[0-9]+(?=[A-Z]+)",link['onclick'])
            if num!=[]:
                numbers.append(num[0])
    times=re.findall("(?<=TD>)[0-9:]+",html)
    return format_result_json(trains,days,numbers,times)

Пример #12

0

Показать файл

Файл: seat.py Проект: appstute-purushottam/RailwayAPI

def get_seat(train,pref,quota,doj,source,dest):
    url="http://www.indianrail.gov.in/cgi_bin/inet_accavl_cgi.cgi"
    d={}
    d['num']=train;d['quota']=quota;d['class']=pref
    d['source']=source;d['dest']=dest
    doj=doj.split('-')
    if len(doj)!=3:
        return nullify(d)

    values={"lccp_trnno":train,
            "lccp_day":doj[0],
            "lccp_month":doj[1],
            "lccp_srccode":source,
            "lccp_dstncode":dest,
            "lccp_class1":pref,
            "lccp_quota":quota,
            "lccp_classopt":"ZZ",
            "lccp_class2":"ZZ",
            "lccp_class3":"ZZ",
            "lccp_class4":"ZZ",
            "lccp_class5":"ZZ",
            "lccp_class6":"ZZ",
            "lccp_class7":"ZZ",
            }
    header={"Origin":"http://www.indianrail.gov.in",
            "Host":"www.indianrail.gov.in",
            "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)",
            "Referer":"http://www.indianrail.gov.in/seat_Avail.html"
            }

    html=fetchpage(url,values,header)
    soup=BeautifulSoup(html)

    seats=[]
    dates=[]
    for i in soup.find_all('td'):
        if i.get('class',[None])[0]== 'table_border_both':
            if len(i.attrs.keys())==1:
                txt=i.text
                if(ischaralpha(txt[0])):
                   seats.append(txt)
                else:
                   dates.append(strip_inline_space(txt))

    if seats==[]:
        return nullify(d)

    d['seats']=[]
    d['dates']=[]
    d['error']=False
    #Sometimes the page contains seats for two classes and sometimes only for one
    #so 'step' contain total classes shown in the page and adding it to seat gets only the seats for queried class
    step=int(len(seats)/len(dates))
    if step==0:
        return nullify(d)

    for i in range(0,len(seats),step):
        d['seats'].append(seats[i])
    for i in dates:
        i=strip_inline_space(i)
        d['dates'].append(i)
    return d

Пример #13

0

Показать файл

Файл: live.py Проект: appstute-purushottam/RailwayAPI

def runningtime(number,doj):
    url='http://runningstatus.in/status/{0}-on-{1}'.format(number,doj)
    d={}
    d['train_number']=number
    nullify(d)
    try:
        # Converting time from GMT to IST
        if len(doj)!=8:
            raise
        year=int(doj[0:4])
        month=int(doj[4:6])
        day=int(doj[6:8])
        datetimeob=datetime.datetime(year,month,day)
    except:
        return format_result_json(nullify(d,'Date not in proper format'))

    weekday=datetimeob.weekday()
    html=fetchpage(url)
    soup=BeautifulSoup(html,"lxml")
    count=0
    for i in soup.find_all("div"):
        if i.attrs.get("class",[None])[0]=="runningstatus-widget-content":
            if count==1:
                dot=i.text.find('.')
                if dot!=-1:
                    end=i.text.find('\n\n\n')
                    #Some pages of this site has no ending tag for <div>. Handles it.
                    if end==-1:
                        d['position']=i.text[dot+1:]
                    else:
                        d['position']=i.text[dot+1:end]
            count+=1
            if "TRAIN IS CANCELLED" in i.text:
                return format_result_json(nullify(d,'Train is cancelled'))
    delay_time_header=0
    for i in soup.find_all("th"):
        if i.text.strip()=="Delay Time":
            delay_time_header=1
    trainmd=db.train_metadata(number)  
    days=['MON','TUE','WED','THU','FRI','SAT','SUN']
    if trainmd['days']!='':
        if days[weekday] not in trainmd['days']:
            return format_result_json(nullify(d,'Train does not run on given date'))

    lst=[]
    prog=re.compile("[A-Za-z0-9 .:/()-]+")
    for i in soup.find_all("td"):
        i=i.text.strip()
        if prog.match(i):
            lst.append(i)
    lst.append('END_MARKER')
    liter=iter(lst)
    nxt=next(liter)
    while True:
        t={}
        if nxt=='END_MARKER':
            break
        t['station']=nxt
        t['platform']=next(liter)
        t['scharr']=next(liter)
        t['schdep']=next(liter)
        t['actarr-actdep']=next(liter)
        t['status']=''
        nxt=next(liter)
        if station_name_format(nxt) or nxt=='END_MARKER':
            d['route'].append(t)
            continue
        if delay_time_header:
            nxt=next(liter)
            d['route'].append(t)
            continue
        t['status']=nxt
        d['route'].append(t)
        nxt=next(liter)
    if d['route']==[]:
        return format_result_json(nullify(d,'Invalid Train Number'))

    return format_result_json(d)

Пример #14

0

Показать файл

Файл: spider.py Проект: MANIT-Bhopal/RailwayAPI

def get_train(url):
    soup = BeautifulSoup(fetchpage(url))
    for i in soup.find_all("input"):
        if i.attrs.get("value", False):
            if i["value"].isnumeric():
                yield i["value"]

Пример #15

0

Показать файл

Файл: pnr.py Проект: appstute-purushottam/RailwayAPI

def get_pnr(pnr):
    url='http://www.indianrail.gov.in/cgi_bin/inet_pnstat_cgi_10521.cgi'
    values={'lccp_pnrno1':pnr,
            'lccp_cap_val':30000,# random value
            'lccp_capinp_val':30000}

    header={"Origin":"http://www.indianrail.gov.in",
            "Host":"www.indianrail.gov.in",
            "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)",
            "Referer":"http://www.indianrail.gov.in/pnr_Enq.html"
            }

    html=fetchpage(url,values,header)
    d={}
    nullify(d)
    d['pnr']=pnr
    soup=BeautifulSoup(html,"lxml")
    mapper={0:'number',1:'name',2:'doj',3:'from',4:'to',5:'upto',6:'boarding',7:'class',8:'upgraded_class'}
    count=0
    cancelled=0
    limit=8
    status=[]
        
    for i in soup.find_all("td"):
        if i.attrs.get("class")==["table_border_both"]:
            txt=i.text.strip()
            if i.attrs.get("align")=="middle":
                d['chart']='N' if 'CHART NOT PREPARED' in txt else 'Y'
                continue
            if count>=limit:
                if 'Passenger' not in txt:
                    if status==[] and 'TRAIN CANCELLED' in txt:
                        cancelled=1
                        break
                    status.append(txt)
            else:
                d[mapper[count]]=txt
            count+=1
        elif i.attrs.get("width")=="5%": # <td width="5%">Upgraded class</td>
            limit+=1

    if cancelled or count==0:
        return nullify(d)
    
    if limit==9:
        d['class']=d['upgraded_class'] # Updates current class to the upgraded class
    total=0
    length=len(status)
    coachpos=0
    if length%2==1:
        coachpos=1
    status=iter(status)
    nxt=p_next(status)
    while 1:
        if nxt=='':
            break
        d['booking_status'].append(nxt)
        nxt=p_next(status)
        d['current_status'].append(nxt)
        nxt=p_next(status)
        if coachpos:
            if nxt!='' and (nxt[0]>='0' and nxt[0]<='9'):
                d['coach_position'].append(int(nxt))
                nxt=p_next(status)
            else:
                d['coach_position'].append(0)
        else:
            d['coach_position'].append(0)
        total+=1
    d['total']=total
    d['error']=False
    return d

Пример #16

0

Показать файл

def extract(train):
    url = "http://www.indianrail.gov.in/cgi_bin/inet_trnnum_cgi.cgi"
    ref = "http://www.indianrail.gov.in/inet_trn_num.html"
    html = fetchpage(url, {'lccp_trnname': train}, {'Referer': ref})
    return extract_page(html)

Пример #17

0

Показать файл

import re
from fetchpage import fetchpage

def remove_tag(s):
    IN_TAG=0
    OUT_TAG=1
    state=OUT_TAG
    i=0
    new=''
    while i<len(s):
        c=s[i]
        i+=1
        if state==OUT_TAG:
            if c=='<':
                state=IN_TAG
                continue
        elif state==IN_TAG:
            if c=='>':
                state=OUT_TAG
            continue
        new=new+c
    return new

        
html=fetchpage('http://runningstatus.in/status/12555-today')
m=re.search('(?<=br>Currently)[A-Za-z()0-9 ,<>\"\'=/:.]+(?=</p>)',html)
s=m.group(0)
print(s)
print(remove_tag(s))

Python fetchpage примеры использования