def get_pnr(pnr): url = 'http://www.indianrail.gov.in/cgi_bin/inet_pnstat_cgi_10521.cgi' values = { 'lccp_pnrno1': pnr, 'lccp_cap_val': 30000, # random value 'lccp_capinp_val': 30000 } header = { "Origin": "http://www.indianrail.gov.in", "Host": "www.indianrail.gov.in", "User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)", "Referer": "http://www.indianrail.gov.in/pnr_Enq.html" } html = fetchpage(url, values, header) d = {} num = re.findall(r"(?<=_both\">)\*?[0-9 -]+", html) strings = re.findall(r"(?<=_both\">)[0-9A-Z ]+[A-Z]+", html) strings = [s.strip() for s in strings] psgr = re.findall(r"(?<=B>Passenger )[0-9]+", html) status = re.findall(r"(?<=B>)(?!Passenger)[0-9A-Za-z/, ]+(?=</B>)", html) status = [s.strip() for s in status] booking_status = [] current_status = [] for i in range(0, len(status), 2): booking_status.append(status[i]) current_status.append(status[i + 1]) try: d['pnr'] = pnr d['number'] = num[0][1:] d['doj'] = strip_inline_space(num[1]) d['name'] = strings[0] d['from'] = strings[1] d['to'] = strings[2] d['upto'] = strings[3] d['boarding'] = strings[4] d['class'] = strings[5] d['chart'] = 'N' if strings[6] == 'CHART NOT PREPARED' else 'Y' d['total'] = len(psgr) d['booking_status'] = booking_status d['current_status'] = current_status d['error'] = False except IndexError as e: d['number'] = '' d['doj'] = '' d['name'] = '' d['from'] = '' d['to'] = '' d['upto'] = '' d['boarding'] = '' d['class'] = '' d['chart'] = '' d['pnr'] = '' d['total'] = 0 d['booking_status'] = '' d['current_status'] = '' d['error'] = True return d return d
def runningtime(url): global pos html=fetchpage(url) stn=re.findall("(?<=td>)(?!Source|Destination)[A-Za-z() ]+",html) times=re.findall("(?<=span=\"2\">)Source|(?<=td>)[0-9]+:[0-9]+ [PAM]+ / Destination|(?<=td>)Source / [0-9]+:[0-9]+ [PAM]+|(?<=td>)[0-9]+:[0-9]+ [PAM]+ / [0-9]+:[0-9]+ [PAM]+|(?<=td>)[0-9]+:[0-9]+ [PAM]+|(?<=td>)Source|(?<=td>)Destination|(?<=span=\"2\">)E.T.A.:[0-9PAM :]+|Waiting for Update",html) status=re.findall("(?<=green\">)No Delay|(?<=red\">)[0-9]+ [A-Za-z0-9 ]+|(?<=blue\">)[A-Za-z 0-9.]+",html) pos=re.search('(?<=br>Currently)[A-Za-z()0-9 ,<>\"\'=/:.]+(?=</p>)',html) if pos!=None: pos=remove_tag(pos.group(0)) lst=[] i=0 for j in range(len(stn)): d={} d['station']=stn[j] d['sch_arrival']=times[i] d['sch_departure']=times[i+1] try: tm=times[i+2] t=tm.split('/') d['act_arrival']=t[0].strip() d['act_departure']=t[1].strip() except IndexError: d['act_arrival']=tm d['act_departure']='-' lst.append(d) i+=3 return lst
def extract_class(train): url="http://erail.in/"+str(train)+"/route" html=fetchpage(url) l=[] soup=BeautifulSoup(html,"html.parser") length=0 for i in soup.find_all("b"): if len(i.attrs)==0: if(validate_class(i.text)): for j in i.text.split(" "): length+=1 l.append(j.strip()) if length<1: # rough heuristic to detect error print('No data: ',train) return None classcode=['1A','FC','2A','3A','3E','CC','SL','2S'] d={} #d['train-number']=l[3] #d['train-name']=l[4] d['class-code']=[] not_encoutered_day=0 for i,txt in enumerate(l): if txt in classcode: d['class-code'].append(txt) not_encoutered_day=1 elif not_encoutered_day==1: break d['route']=[] return d
def runningtime(url): global pos html = fetchpage(url) stn = re.findall("(?<=td>)(?!Source|Destination)[A-Za-z() ]+", html) times = re.findall( "(?<=span=\"2\">)Source|(?<=td>)[0-9]+:[0-9]+ [PAM]+ / Destination|(?<=td>)Source / [0-9]+:[0-9]+ [PAM]+|(?<=td>)[0-9]+:[0-9]+ [PAM]+ / [0-9]+:[0-9]+ [PAM]+|(?<=td>)[0-9]+:[0-9]+ [PAM]+|(?<=td>)Source|(?<=td>)Destination|(?<=span=\"2\">)E.T.A.:[0-9PAM :]+", html) status = re.findall( "(?<=green\">)No Delay|(?<=red\">)[0-9]+ [A-Za-z0-9 ]+|(?<=blue\">)[A-Za-z 0-9.]+", html) pos = re.search('(?<=br>Currently)[A-Za-z()0-9 ,<>\"\'=/:.]+(?=</p>)', html) if pos != None: pos = remove_tag(pos.group(0)) lst = [] i = 0 for j in range(len(stn)): d = {} d['station'] = stn[j] d['sch_arrival'] = times[i] d['sch_departure'] = times[i + 1] try: tm = times[i + 2] t = tm.split('/') d['act_arrival'] = t[0].strip() d['act_departure'] = t[1].strip() except IndexError: d['act_arrival'] = tm d['act_departure'] = '-' lst.append(d) i += 3 return lst
def extract(train): url = "http://www.indianrail.gov.in/cgi_bin/inet_trnnum_cgi.cgi" ref = "http://www.indianrail.gov.in/inet_trn_num.html" html = fetchpage(url, {"lccp_trnname": train}, {"Referer": ref}) l = [] soup = BeautifulSoup(html) length = 0 for i in soup.find_all("td"): if len(i.attrs) == 0: if validate(i.text): length += 1 l.append(i.text.strip()) if length < 10: # rough heuristic to detect error print("No data: ", train) return None daycode = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"] d = {} d["train-number"] = l[3] d["train-name"] = l[4] d["day-code"] = [] not_encoutered_day = 0 for i, txt in enumerate(l): if txt in daycode: d["day-code"].append(txt) not_encoutered_day = 1 elif not_encoutered_day == 1: break d["route"] = [] # for m in l[i:]: # print(m) l.append("END_MARKER") l = iter(l[i:]) nxt = next(l) while True: t = {} t["no"] = nxt t["station-code"] = next(l) t["station-name"] = next(l) t["route-no"] = next(l) t["arrival-time"] = next(l) t["departure-time"] = next(l) nxt = next(l) # Many times no halt-time is given, this condition handles that case if ":" not in nxt: t["halt-time"] = 0 t["distance"] = nxt else: t["halt-time"] = nxt t["distance"] = next(l) t["day"] = next(l) d["route"].append(t) nxt = next(l) # print(t) if nxt == "END_MARKER": break return d
def between(source,dest,date): url='http://www.indianrail.gov.in/cgi_bin/inet_srcdest_cgi_date.cgi' date=date.split('-') if len(date)==1: date.append('') cls="ZZ" values={"lccp_src_stncode_dis":source, "lccp_src_stncode":source, "lccp_dstn_stncode_dis":dest, "lccp_dstn_stncode":dest, "lccp_classopt":cls, "lccp_day":date[0], "lccp_month":date[1], "CurrentMonth":"4", "CurrentDate":"19", "CurrentYear":"2016" } header={"Origin":"http://www.indianrail.gov.in", "Host":"www.indianrail.gov.in", "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)", "Referer":"http://www.indianrail.gov.in/fare_Enq.html" } html=fetchpage(url,values,header) soup=BeautifulSoup(html) trains=[] for link in soup.find_all(href="#SAMETRN"): trains.append(link.text[1:].strip()) destinations=[] sources=[] alter=0 for tdtag in soup.find_all("td"): tagattr=tdtag.attrs.get('title','') if 'Station Code' in tagattr or 'temporary' in tagattr: t={} t['code']=extract_stn_code(tdtag['title']) t['name']=sanitize(tdtag.text) if alter==0: t['name']=db.station_metadata(t['code'])['fullname'] sources.append(t) alter=1 else: destinations.append(t) alter=0 days=re.findall("(?<=B>)Y|(?<=red>)N",html) numbers=[] for link in soup.find_all("input"): if link.get("onclick",False): #print(link['onclick']) num=re.findall("(?<=\')[0-9]+(?=[A-Z]+)",link['onclick']) if num!=[]: numbers.append(num[0]) times=re.findall("(?<=TD>)[0-9:]+",html) return format_result_json(trains,days,numbers,times,sources,destinations)
def get_train(url): html = fetchpage(url) progn = re.compile(r"(?<=VALUE=\")[0-9]+", re.IGNORECASE) num = re.findall(progn, html) progn = re.compile(r"(?<=LEFT\">)[A-Za-z]+[ A-Za-z]+", re.IGNORECASE) #Extracts train names name = re.findall(progn, html) j = 0 for i in range(0, len(num)): yield (num[i], name[j].strip()) j = j + 3 #j-2 & j-1 contains the starting and ending station of train
def get_pnr(pnr): url='http://www.indianrail.gov.in/cgi_bin/inet_pnstat_cgi_10521.cgi' values={'lccp_pnrno1':pnr, 'lccp_cap_val':30000,# random value 'lccp_capinp_val':30000} header={"Origin":"http://www.indianrail.gov.in", "Host":"www.indianrail.gov.in", "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)", "Referer":"http://www.indianrail.gov.in/pnr_Enq.html" } html=fetchpage(url,values,header) d={} num=re.findall(r"(?<=_both\">)\*?[0-9 -]+",html) strings=re.findall(r"(?<=_both\">)[0-9A-Z ]+[A-Z]+",html) strings=[s.strip() for s in strings] psgr=re.findall(r"(?<=B>Passenger )[0-9]+",html) status=re.findall(r"(?<=B>)(?!Passenger)[0-9A-Za-z/, ]+(?=</B>)",html) cancelled=0 if status!=[] and status[-1]=='TRAIN CANCELLED': cancelled=1 status=[s.strip() for s in status] booking_status=[] current_status=[] for i in range(0,len(status),2): booking_status.append(status[i]) if cancelled: current_status.append('TRAIN CANCELLED') continue current_status.append(status[i+1]) try: d['pnr']=pnr d['number']=num[0][1:] d['doj']=strip_inline_space(num[1]) d['name']=strings[0] d['from']=strings[1] d['to']=strings[2] d['upto']=strings[3] d['boarding']=strings[4] d['class']=strings[5] d['chart']='N' if strings[6]=='CHART NOT PREPARED' else 'Y' d['total']=len(psgr) d['booking_status']=booking_status d['current_status']=current_status d['error']=False except IndexError as e: d['number']='';d['doj']='';d['name']='';d['from']='' d['to']='';d['upto']='';d['boarding']='';d['class']='' d['chart']='';d['pnr']='';d['total']=0;d['booking_status']='' d['current_status']='';d['error']=True return d return d
def get_fare(k): url = "http://www.indianrail.gov.in/cgi_bin/inet_frenq_cgi.cgi" doj = k['doj'].split('-') if len(doj) <= 1: doj = ['31', '12', '2015'] #Default Date k['quota'] = k['quota'].upper() values = { "lccp_trnno": k['train'], "lccp_day": doj[0], "lccp_month": doj[1], "lccp_srccode": k['source'], "lccp_dstncode": k['dest'], #"lccp_classopt":k['pref'], "lccp_classopt": "ZZ", "lccp_age": k['age'], "lccp_frclass1": k['quota'], "lccp_conc": "ZZZZZZ", "lccp_enrtcode": None, "lccp_viacode": None, "lccp_frclass2": "ZZ", "lccp_frclass3": "ZZ", "lccp_frclass4": "ZZ", "lccp_frclass5": "ZZ", "lccp_frclass6": "ZZ", "lccp_frclass7": "ZZ", "lccp_disp_avl_flg": "1", "getIt": "Please Wait...", } header = { "Origin": "http://www.indianrail.gov.in", "Host": "www.indianrail.gov.in", "User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)", "Referer": "http://www.indianrail.gov.in/fare_Enq.html" } html = fetchpage(url, values, header) cls = re.findall("(?<=Class -- )[0-9A-Za-z]+", html) fares = re.findall("(?<=both\">)[0-9]+", html) f = [] l = len(cls) for i in range(l): t = {} t['class'] = cls[i] t['fare'] = fares[-l + i] f.append(t) return f
def get_fare(k): url="http://www.indianrail.gov.in/cgi_bin/inet_frenq_cgi.cgi" doj=k['doj'].split('-') if len(doj)<=1: doj=['31','12','2015'] #Default Date k['quota']=k['quota'].upper() values={"lccp_trnno":k['train'], "lccp_day":doj[0], "lccp_month":doj[1], "lccp_srccode":k['source'], "lccp_dstncode":k['dest'], #"lccp_classopt":k['pref'], "lccp_classopt":"ZZ", "lccp_age":k['age'], "lccp_frclass1":k['quota'], "lccp_conc":"ZZZZZZ", "lccp_enrtcode":None, "lccp_viacode":None, "lccp_frclass2":"ZZ", "lccp_frclass3":"ZZ", "lccp_frclass4":"ZZ", "lccp_frclass5":"ZZ", "lccp_frclass6":"ZZ", "lccp_frclass7":"ZZ", "lccp_disp_avl_flg":"1", "getIt":"Please Wait...", } header={"Origin":"http://www.indianrail.gov.in", "Host":"www.indianrail.gov.in", "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)", "Referer":"http://www.indianrail.gov.in/fare_Enq.html" } html=fetchpage(url,values,header) cls=re.findall("(?<=Class -- )[0-9A-Za-z]+",html) fares=re.findall("(?<=both\">)[0-9]+",html) f=[] l=len(cls) for i in range(l): t={} t['class']=cls[i] t['fare']=fares[-l+i] f.append(t) return f
def between(source,dest,date): url='http://www.indianrail.gov.in/cgi_bin/inet_srcdest_cgi_date.cgi' date=date.split('-') cls="ZZ" values={"lccp_src_stncode_dis":source, "lccp_src_stncode":source, "lccp_dstn_stncode_dis":dest, "lccp_dstn_stncode":dest, "lccp_classopt":cls, "lccp_day":date[0], "lccp_month":date[1], "CurrentMonth":"4", "CurrentDate":"19", "CurrentYear":"2016" } header={"Origin":"http://www.indianrail.gov.in", "Host":"www.indianrail.gov.in", "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)", "Referer":"http://www.indianrail.gov.in/fare_Enq.html" } html=fetchpage(url,values,header) soup=BeautifulSoup(html) trains=[] for link in soup.find_all(href="#SAMETRN"): trains.append(link.text[1:].strip()) days=re.findall("(?<=B>)Y|(?<=red>)N",html) numbers=[] for link in soup.find_all("input"): if link.get("onclick",False): #print(link['onclick']) num=re.findall("(?<=\')[0-9]+(?=[A-Z]+)",link['onclick']) if num!=[]: numbers.append(num[0]) times=re.findall("(?<=TD>)[0-9:]+",html) return format_result_json(trains,days,numbers,times)
def get_seat(train,pref,quota,doj,source,dest): url="http://www.indianrail.gov.in/cgi_bin/inet_accavl_cgi.cgi" d={} d['num']=train;d['quota']=quota;d['class']=pref d['source']=source;d['dest']=dest doj=doj.split('-') if len(doj)!=3: return nullify(d) values={"lccp_trnno":train, "lccp_day":doj[0], "lccp_month":doj[1], "lccp_srccode":source, "lccp_dstncode":dest, "lccp_class1":pref, "lccp_quota":quota, "lccp_classopt":"ZZ", "lccp_class2":"ZZ", "lccp_class3":"ZZ", "lccp_class4":"ZZ", "lccp_class5":"ZZ", "lccp_class6":"ZZ", "lccp_class7":"ZZ", } header={"Origin":"http://www.indianrail.gov.in", "Host":"www.indianrail.gov.in", "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)", "Referer":"http://www.indianrail.gov.in/seat_Avail.html" } html=fetchpage(url,values,header) soup=BeautifulSoup(html) seats=[] dates=[] for i in soup.find_all('td'): if i.get('class',[None])[0]== 'table_border_both': if len(i.attrs.keys())==1: txt=i.text if(ischaralpha(txt[0])): seats.append(txt) else: dates.append(strip_inline_space(txt)) if seats==[]: return nullify(d) d['seats']=[] d['dates']=[] d['error']=False #Sometimes the page contains seats for two classes and sometimes only for one #so 'step' contain total classes shown in the page and adding it to seat gets only the seats for queried class step=int(len(seats)/len(dates)) if step==0: return nullify(d) for i in range(0,len(seats),step): d['seats'].append(seats[i]) for i in dates: i=strip_inline_space(i) d['dates'].append(i) return d
def runningtime(number,doj): url='http://runningstatus.in/status/{0}-on-{1}'.format(number,doj) d={} d['train_number']=number nullify(d) try: # Converting time from GMT to IST if len(doj)!=8: raise year=int(doj[0:4]) month=int(doj[4:6]) day=int(doj[6:8]) datetimeob=datetime.datetime(year,month,day) except: return format_result_json(nullify(d,'Date not in proper format')) weekday=datetimeob.weekday() html=fetchpage(url) soup=BeautifulSoup(html,"lxml") count=0 for i in soup.find_all("div"): if i.attrs.get("class",[None])[0]=="runningstatus-widget-content": if count==1: dot=i.text.find('.') if dot!=-1: end=i.text.find('\n\n\n') #Some pages of this site has no ending tag for <div>. Handles it. if end==-1: d['position']=i.text[dot+1:] else: d['position']=i.text[dot+1:end] count+=1 if "TRAIN IS CANCELLED" in i.text: return format_result_json(nullify(d,'Train is cancelled')) delay_time_header=0 for i in soup.find_all("th"): if i.text.strip()=="Delay Time": delay_time_header=1 trainmd=db.train_metadata(number) days=['MON','TUE','WED','THU','FRI','SAT','SUN'] if trainmd['days']!='': if days[weekday] not in trainmd['days']: return format_result_json(nullify(d,'Train does not run on given date')) lst=[] prog=re.compile("[A-Za-z0-9 .:/()-]+") for i in soup.find_all("td"): i=i.text.strip() if prog.match(i): lst.append(i) lst.append('END_MARKER') liter=iter(lst) nxt=next(liter) while True: t={} if nxt=='END_MARKER': break t['station']=nxt t['platform']=next(liter) t['scharr']=next(liter) t['schdep']=next(liter) t['actarr-actdep']=next(liter) t['status']='' nxt=next(liter) if station_name_format(nxt) or nxt=='END_MARKER': d['route'].append(t) continue if delay_time_header: nxt=next(liter) d['route'].append(t) continue t['status']=nxt d['route'].append(t) nxt=next(liter) if d['route']==[]: return format_result_json(nullify(d,'Invalid Train Number')) return format_result_json(d)
def get_train(url): soup = BeautifulSoup(fetchpage(url)) for i in soup.find_all("input"): if i.attrs.get("value", False): if i["value"].isnumeric(): yield i["value"]
def get_pnr(pnr): url='http://www.indianrail.gov.in/cgi_bin/inet_pnstat_cgi_10521.cgi' values={'lccp_pnrno1':pnr, 'lccp_cap_val':30000,# random value 'lccp_capinp_val':30000} header={"Origin":"http://www.indianrail.gov.in", "Host":"www.indianrail.gov.in", "User-Agent":"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)", "Referer":"http://www.indianrail.gov.in/pnr_Enq.html" } html=fetchpage(url,values,header) d={} nullify(d) d['pnr']=pnr soup=BeautifulSoup(html,"lxml") mapper={0:'number',1:'name',2:'doj',3:'from',4:'to',5:'upto',6:'boarding',7:'class',8:'upgraded_class'} count=0 cancelled=0 limit=8 status=[] for i in soup.find_all("td"): if i.attrs.get("class")==["table_border_both"]: txt=i.text.strip() if i.attrs.get("align")=="middle": d['chart']='N' if 'CHART NOT PREPARED' in txt else 'Y' continue if count>=limit: if 'Passenger' not in txt: if status==[] and 'TRAIN CANCELLED' in txt: cancelled=1 break status.append(txt) else: d[mapper[count]]=txt count+=1 elif i.attrs.get("width")=="5%": # <td width="5%">Upgraded class</td> limit+=1 if cancelled or count==0: return nullify(d) if limit==9: d['class']=d['upgraded_class'] # Updates current class to the upgraded class total=0 length=len(status) coachpos=0 if length%2==1: coachpos=1 status=iter(status) nxt=p_next(status) while 1: if nxt=='': break d['booking_status'].append(nxt) nxt=p_next(status) d['current_status'].append(nxt) nxt=p_next(status) if coachpos: if nxt!='' and (nxt[0]>='0' and nxt[0]<='9'): d['coach_position'].append(int(nxt)) nxt=p_next(status) else: d['coach_position'].append(0) else: d['coach_position'].append(0) total+=1 d['total']=total d['error']=False return d
def extract(train): url = "http://www.indianrail.gov.in/cgi_bin/inet_trnnum_cgi.cgi" ref = "http://www.indianrail.gov.in/inet_trn_num.html" html = fetchpage(url, {'lccp_trnname': train}, {'Referer': ref}) return extract_page(html)
import re from fetchpage import fetchpage def remove_tag(s): IN_TAG=0 OUT_TAG=1 state=OUT_TAG i=0 new='' while i<len(s): c=s[i] i+=1 if state==OUT_TAG: if c=='<': state=IN_TAG continue elif state==IN_TAG: if c=='>': state=OUT_TAG continue new=new+c return new html=fetchpage('http://runningstatus.in/status/12555-today') m=re.search('(?<=br>Currently)[A-Za-z()0-9 ,<>\"\'=/:.]+(?=</p>)',html) s=m.group(0) print(s) print(remove_tag(s))