def ee_parse_sigpoints2(): sigs = [] parser = lxml.html.HTMLParser() airac_date = get_airac_date() url = "/%s/html/eAIP/EE-ENR-4.4-en-GB.html#ENR-4.4" % (airac_date, ) data, date = fetchdata.getdata(url, country='ee') parser.feed(data) tree = parser.close() for tab in tree.xpath(".//table"): for idx, cand in enumerate(tab.xpath(".//tr")): if len(cand.getchildren()) != 4: continue if idx == 0: continue sig = dict() name, coord, ats, remark = cand.getchildren() nametxt = alltext(name).strip() coordtxt = alltext(coord).strip() if idx == 1: assert nametxt == '1' and coordtxt == '2' continue print "Name:", nametxt print "coord:", coordtxt sig['url'] = url sig['date'] = date sig['name'] = nametxt sig['short'] = nametxt sig['kind'] = 'sig. point' subed = re.sub(ur"[\n\s]+", " ", coordtxt) sig['pos'] = mapper.anyparse(subed) sigs.append(sig) return sigs
def ee_parse_sigpoints2(): sigs=[] parser=lxml.html.HTMLParser() airac_date=get_airac_date() url="/%s/html/eAIP/EE-ENR-4.4-en-GB.html#ENR-4.4"%(airac_date,) data,date=fetchdata.getdata(url,country='ee') parser.feed(data) tree=parser.close() for tab in tree.xpath(".//table"): for idx,cand in enumerate(tab.xpath(".//tr")): if len(cand.getchildren())!=4: continue if idx==0: continue sig=dict() name,coord,ats,remark=cand.getchildren() nametxt=alltext(name).strip() coordtxt=alltext(coord).strip() if idx==1: assert nametxt=='1' and coordtxt=='2' continue print "Name:",nametxt print"coord:",coordtxt sig['url']=url sig['date']=date sig['name']=nametxt sig['short']=nametxt sig['kind']='sig. point' subed=re.sub(ur"[\n\s]+"," ",coordtxt) sig['pos']=mapper.anyparse(subed) sigs.append(sig) return sigs
def coordparse(self): val=request.params['val'] try: s=mapper.anyparse(val) c.pos=s c.deg,c.degmin,c.degminsec=mapper.to_all_formats(mapper.from_str(s)) print "Rendering mako coordpres" return render("/coordpres.mako") except Exception: print "returning empty string , coordpres" return ""
def coordparse(self): val = request.params['val'] try: s = mapper.anyparse(val) c.pos = s c.deg, c.degmin, c.degminsec = mapper.to_all_formats( mapper.from_str(s)) print "Rendering mako coordpres" return render("/coordpres.mako") except Exception: print "returning empty string , coordpres" return ""
def ee_parse_airfields2(): ads = [] spaces = [] airac_date = get_airac_date() print "airac", airac_date overview_url = "/%s/html/eAIP/EE-AD-0.6-en-GB.html" % (airac_date, ) parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(overview_url, country='ee') parser.feed(data) tree = parser.close() icaos = [] for cand in tree.xpath(".//h3"): txts = alltexts(cand.xpath(".//a")) aps = re.findall(r"EE[A-Z]{2}", " ".join(txts)) if aps: icao, = aps if alltext(cand).count("HELIPORT"): print "Ignore heliport", icao continue icaos.append(icao) for icao in icaos: ad = dict(icao=icao) url = "/%s/html/eAIP/EE-AD-2.%s-en-GB.html" % (airac_date, icao) data, date = fetchdata.getdata(url, country='ee') parser.feed(data) tree = parser.close() thrs = [] for h3 in tree.xpath(".//h3"): txt = alltext(h3) print repr(txt) ptrn = ur"\s*%s\s+[—-]\s+(.*)" % (unicode(icao.upper()), ) m = re.match(ptrn, txt, re.UNICODE) if m: assert not 'name' in ad ad['name'] = m.groups()[0] for tr in tree.xpath(".//tr"): txt = alltext(tr) m = re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*", txt) #print "Matching,",txt,":",m if m: crds, = m.groups() ad['pos'] = mapper.anyparse(crds) space = dict() for table in tree.xpath(".//table"): for tr in table.xpath(".//tr"): trtxt = alltext(tr) if trtxt.count("Designation and lateral limits"): space = dict() coords = tr.getchildren()[2] lines = alltext(coords).split("\n") if lines[0].strip() == 'NIL': continue zname, what, spill = re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)", lines[0]).groups() if spill and spill.strip(): rest = [spill] + lines[1:] else: rest = lines[1:] what = what.strip() assert ad['name'].upper().strip().count( zname.upper().strip()) assert what in ['FIZ', 'TIZ', 'CTR'] space['type'] = what space['points'] = mapper.parse_coord_str("\n".join(rest)) space['name'] = zname + " " + what space['date'] = date space['url'] = fetchdata.getrawurl(url, 'ee') if trtxt.count("Vertical limits"): vlim = alltext(tr.getchildren()[2]) if vlim.strip() == 'NIL': continue space['floor'], space['ceiling'] = vlim.split(" to ") #space['freqs']=x #hlc=False for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.lower().count("charts"): par = h4.getparent() for table in par.xpath(".//table"): for idx, tr in enumerate(table.xpath(".//tr")): name,page=\ tr.getchildren() nametxt = alltext(name) print "nametxt:", nametxt, "link:" for reg, variant in [ (r"Aerodrome.*Chart.*", ""), (r"Landing.*Chart.*", "landing"), (r".*Parking.*Chart.*", "parking"), (r".*Visual.*Approach.*|.*\bVAC\b.*", "vac") ]: if re.match(reg, nametxt): for a in page.xpath(".//a"): print "linklabel", a.text print "attrib:", a.attrib href = a.attrib['href'] print "Bef repl", href if href.lower().endswith("pdf"): href = href.replace( "../../graphics", "/%s/graphics" % (airac_date, )) print "href:", href, airac_date assert href parse_landing_chart.help_plc( ad, href, icao, ad['pos'], "ee", variant=variant) """arp=ad['pos'] lc=parse_landing_chart.parse_landing_chart( href, icao=icao, arppos=arp,country="ee") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc hlc=True #chartblobnames.append(lc['blobname']) """ #assert hlc for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.count("RUNWAY PHYSICAL"): par = h4.getparent() for table in par.xpath(".//table"): prevnametxt = "" for idx, tr in enumerate(table.xpath(".//tr")): if idx == 0: fc = alltext(tr.getchildren()[0]) print "FC", fc if not fc.count("Designations"): break #skip table if idx < 2: continue if len(tr.getchildren()) == 1: continue print "c:", tr.getchildren(), alltexts( tr.getchildren()) desig, trubrg, dims, strength, thrcoord, threlev = tr.getchildren( ) rwy = re.match(r"(\d{2}[LRC]?)", alltext(desig)) altc = alltext(thrcoord) print "Matching", altc print "rwymatch:", alltext(desig) m = re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*", altc, re.DOTALL | re.MULTILINE) if m: lat, lon = m.groups() print "Got latlon", lat, lon thrs.append( dict(pos=mapper.parse_coords(lat, lon), thr=rwy.groups()[0])) space['freqs'] = [] for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.count("ATS COMMUNICATION"): par = h4.getparent() for table in par.xpath(".//table"): for idx, tr in enumerate(table.xpath(".//tr")): print "cs", repr(tr.getchildren()), alltexts( tr.getchildren()) print len(tr.getchildren()) if len(tr.getchildren()) != 5: if "".join(alltexts( tr.getchildren())).count(u"EMERG"): continue #Sometimes emergency freq is listed, and then it is without callsign service,callsign,frequency,hours,remarks=\ tr.getchildren() callsigntxt = alltext(callsign) if idx < 2: if idx == 0: assert callsigntxt.strip() == "Call sign" if idx == 1: assert callsigntxt.strip() == "2" continue ftext = alltext(frequency) print "matching freq", ftext for freq in re.findall(ur"\b\d{3}\.\d{1,3}", ftext): freqmhz = float(freq) space['freqs'].append( (callsigntxt.strip(), freqmhz)) if space and 'points' in space: assert 'freqs' in space assert 'points' in space assert 'floor' in space assert 'ceiling' in space assert 'type' in space spaces.append(space) if thrs: ad['runways'] = rwy_constructor.get_rwys(thrs) aip_text_documents.help_parse_doc(ad, url, icao, "ee", title="General Information", category="general") ad['date'] = date ad['url'] = fetchdata.getrawurl(url, 'ee') print "AD:", ad assert 'pos' in ad assert 'name' in ad ads.append(ad)
def ee_parse_airfields2(): ads=[] spaces=[] airac_date=get_airac_date() print "airac",airac_date overview_url="/%s/html/eAIP/EE-AD-0.6-en-GB.html"%(airac_date,) parser=lxml.html.HTMLParser() data,date=fetchdata.getdata(overview_url,country='ee') parser.feed(data) tree=parser.close() icaos=[] for cand in tree.xpath(".//h3"): txts=alltexts(cand.xpath(".//a")) aps=re.findall(r"EE[A-Z]{2}"," ".join(txts)) if aps: icao,=aps if alltext(cand).count("HELIPORT"): print "Ignore heliport",icao continue icaos.append(icao) for icao in icaos: ad=dict(icao=icao) url="/%s/html/eAIP/EE-AD-2.%s-en-GB.html"%(airac_date,icao) data,date=fetchdata.getdata(url,country='ee') parser.feed(data) tree=parser.close() thrs=[] for h3 in tree.xpath(".//h3"): txt=alltext(h3) print repr(txt) ptrn=ur"\s*%s\s+[—-]\s+(.*)"%(unicode(icao.upper()),) m=re.match(ptrn,txt,re.UNICODE) if m: assert not 'name' in ad ad['name']=m.groups()[0] for tr in tree.xpath(".//tr"): txt=alltext(tr) m=re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",txt) #print "Matching,",txt,":",m if m: crds,=m.groups() ad['pos']=mapper.anyparse(crds) space=dict() for table in tree.xpath(".//table"): for tr in table.xpath(".//tr"): trtxt=alltext(tr) if trtxt.count("Designation and lateral limits"): space=dict() coords=tr.getchildren()[2] lines=alltext(coords).split("\n") if lines[0].strip()=='NIL': continue zname,what,spill=re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",lines[0]).groups() if spill and spill.strip(): rest=[spill]+lines[1:] else: rest=lines[1:] what=what.strip() assert ad['name'].upper().strip().count(zname.upper().strip()) assert what in ['FIZ','TIZ','CTR'] space['type']=what space['points']=mapper.parse_coord_str("\n".join(rest)) space['name']=zname+" "+what space['date']=date space['url']=fetchdata.getrawurl(url,'ee') if trtxt.count("Vertical limits"): vlim=alltext(tr.getchildren()[2]) if vlim.strip()=='NIL': continue space['floor'],space['ceiling']=vlim.split(" to ") #space['freqs']=x #hlc=False for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.lower().count("charts"): par=h4.getparent() for table in par.xpath(".//table"): for idx,tr in enumerate(table.xpath(".//tr")): name,page=\ tr.getchildren() nametxt=alltext(name) print "nametxt:",nametxt,"link:" for reg,variant in [ (r"Aerodrome.*Chart.*","") , (r"Landing.*Chart.*","landing"), (r".*Parking.*Chart.*","parking"), (r".*Visual.*Approach.*|.*\bVAC\b.*","vac") ]: if re.match(reg,nametxt): for a in page.xpath(".//a"): print "linklabel",a.text print "attrib:",a.attrib href=a.attrib['href'] print "Bef repl",href if href.lower().endswith("pdf"): href=href.replace("../../graphics","/%s/graphics"%(airac_date,)) print "href:",href,airac_date assert href parse_landing_chart.help_plc(ad,href, icao,ad['pos'],"ee",variant=variant) """arp=ad['pos'] lc=parse_landing_chart.parse_landing_chart( href, icao=icao, arppos=arp,country="ee") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc hlc=True #chartblobnames.append(lc['blobname']) """ #assert hlc for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("RUNWAY PHYSICAL"): par=h4.getparent() for table in par.xpath(".//table"): prevnametxt="" for idx,tr in enumerate(table.xpath(".//tr")): if idx==0: fc=alltext(tr.getchildren()[0]) print "FC",fc if not fc.count("Designations"): break #skip table if idx<2:continue if len(tr.getchildren())==1:continue print "c:",tr.getchildren(),alltexts(tr.getchildren()) desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren() rwy=re.match(r"(\d{2}[LRC]?)",alltext(desig)) altc=alltext(thrcoord) print "Matching",altc print "rwymatch:",alltext(desig) m=re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",altc,re.DOTALL|re.MULTILINE) if m: lat,lon=m.groups() print "Got latlon",lat,lon thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy.groups()[0])) space['freqs']=[] for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("ATS COMMUNICATION"): par=h4.getparent() for table in par.xpath(".//table"): for idx,tr in enumerate(table.xpath(".//tr")): print "cs",repr(tr.getchildren()),alltexts(tr.getchildren()) print len(tr.getchildren()) if len(tr.getchildren())!=5: if "".join(alltexts(tr.getchildren())).count(u"EMERG"): continue #Sometimes emergency freq is listed, and then it is without callsign service,callsign,frequency,hours,remarks=\ tr.getchildren() callsigntxt=alltext(callsign) if idx<2: if idx==0: assert callsigntxt.strip()=="Call sign" if idx==1: assert callsigntxt.strip()=="2" continue ftext=alltext(frequency) print "matching freq",ftext for freq in re.findall(ur"\b\d{3}\.\d{1,3}",ftext): freqmhz=float(freq) space['freqs'].append((callsigntxt.strip(),freqmhz)) if space and 'points' in space: assert 'freqs' in space assert 'points' in space assert 'floor' in space assert 'ceiling' in space assert 'type' in space spaces.append(space) if thrs: ad['runways']=rwy_constructor.get_rwys(thrs) aip_text_documents.help_parse_doc(ad,url, icao,"ee",title="General Information",category="general") ad['date']=date ad['url']=fetchdata.getrawurl(url,'ee') print "AD:",ad assert 'pos' in ad assert 'name' in ad ads.append(ad)