def ee_parse_sigpoints2(): sigs = [] parser = lxml.html.HTMLParser() airac_date = get_airac_date() url = "/%s/html/eAIP/EE-ENR-4.4-en-GB.html#ENR-4.4" % (airac_date, ) data, date = fetchdata.getdata(url, country='ee') parser.feed(data) tree = parser.close() for tab in tree.xpath(".//table"): for idx, cand in enumerate(tab.xpath(".//tr")): if len(cand.getchildren()) != 4: continue if idx == 0: continue sig = dict() name, coord, ats, remark = cand.getchildren() nametxt = alltext(name).strip() coordtxt = alltext(coord).strip() if idx == 1: assert nametxt == '1' and coordtxt == '2' continue print "Name:", nametxt print "coord:", coordtxt sig['url'] = url sig['date'] = date sig['name'] = nametxt sig['short'] = nametxt sig['kind'] = 'sig. point' subed = re.sub(ur"[\n\s]+", " ", coordtxt) sig['pos'] = mapper.anyparse(subed) sigs.append(sig) return sigs
def ee_parse_sigpoints2(): sigs=[] parser=lxml.html.HTMLParser() airac_date=get_airac_date() url="/%s/html/eAIP/EE-ENR-4.4-en-GB.html#ENR-4.4"%(airac_date,) data,date=fetchdata.getdata(url,country='ee') parser.feed(data) tree=parser.close() for tab in tree.xpath(".//table"): for idx,cand in enumerate(tab.xpath(".//tr")): if len(cand.getchildren())!=4: continue if idx==0: continue sig=dict() name,coord,ats,remark=cand.getchildren() nametxt=alltext(name).strip() coordtxt=alltext(coord).strip() if idx==1: assert nametxt=='1' and coordtxt=='2' continue print "Name:",nametxt print"coord:",coordtxt sig['url']=url sig['date']=date sig['name']=nametxt sig['short']=nametxt sig['kind']='sig. point' subed=re.sub(ur"[\n\s]+"," ",coordtxt) sig['pos']=mapper.anyparse(subed) sigs.append(sig) return sigs
def ee_parse_r_and_tsa2(): airac_date = get_airac_date() spaces = [] url = "/%s/html/eAIP/EE-ENR-5.2-en-GB.html" % (airac_date, ) spaces.extend(ee_parse_gen_r2(url)) url = "/%s/html/eAIP/EE-ENR-5.1-en-GB.html" % (airac_date, ) spaces.extend(ee_parse_gen_r2(url)) url = "/%s/html/eAIP/EE-ENR-5.3-en-GB.html" % (airac_date, ) spaces.extend(ee_parse_gen_r2(url)) url = "/%s/html/eAIP/EE-ENR-5.5-en-GB.html" % (airac_date, ) spaces.extend(ee_parse_gen_r2(url)) return spaces
def ee_parse_r_and_tsa2(): airac_date=get_airac_date() spaces=[] url="/%s/html/eAIP/EE-ENR-5.2-en-GB.html"%(airac_date,) spaces.extend(ee_parse_gen_r2(url)) url="/%s/html/eAIP/EE-ENR-5.1-en-GB.html"%(airac_date,) spaces.extend(ee_parse_gen_r2(url)) url="/%s/html/eAIP/EE-ENR-5.3-en-GB.html"%(airac_date,) spaces.extend(ee_parse_gen_r2(url)) url="/%s/html/eAIP/EE-ENR-5.5-en-GB.html"%(airac_date,) spaces.extend(ee_parse_gen_r2(url)) return spaces
def ee_parse_tma2(): spaces=[] airac_date=get_airac_date() url="/%s/html/eAIP/EE-ENR-2.1-en-GB.html"%(airac_date,) parser=lxml.html.HTMLParser() data,date=fetchdata.getdata(url,country='ee') parser.feed(data) tree=parser.close() icaos=[] def nested(tab): if tab==None: return False if tab.getparent() is None: return False #print dir(tab) if tab.tag=='table': return True return nested(tab.getparent()) for tab in tree.xpath(".//table"): print "table alltext:",alltext(tab) if nested(tab.getparent()): continue firsttr=tab.xpath(".//tr")[0] ntext=alltext(firsttr) print "firsttr",firsttr print "ntext",ntext if re.match(ur".*FIR\s*/\s*CTA.*",ntext): print "Matches Tallin FIR" name='TALLIN FIR' points=mapper.parse_coord_str(firtxt,context='estonia') floor,ceiling="GND","FL195" space={} space['name']=name space['points']=points space['floor']=floor space['ceiling']=ceiling space['freqs']=[] space['icao']='EETT' space['type']='FIR' space['date']=date space['url']=fetchdata.getrawurl(url,'ee') spaces.append(space) continue else: name=ntext.strip() space=dict(name=name) print "Name",name assert space['name'].count("TMA") \ or space['name'].count("FIR") if space['name'].count("FIR"): type='FIR' else: type="TMA" freqs=[] points=None floor=None ceiling=None for cand in tab.xpath(".//tr"): if len(cand.getchildren())!=2: continue nom,what=cand.getchildren() whattxt=alltext(what) nomtxt=alltext(nom) print "nomtxt",nomtxt,"space name",space['name'] if nomtxt.count("Lateral limits"): if space['name'].count("TALLINN TMA 2"): points=mapper.parse_coord_str(""" A circle with radius 20 NM centred on 592448N 0244957E """) else: whattxt=whattxt.replace( "then along the territory dividing line between Estonia and Russia to", "- Along the common Estonian/X state boundary to " ) print "Fixed up",whattxt points=mapper.parse_coord_str(whattxt,context='estonia') if nomtxt.count("Vertical limits"): floor,ceiling=whattxt.split(" to ") if nomtxt.count("Call sign"): callsign=whattxt.split("\n")[0] if nomtxt.count("freq"): freqs.extend(re.findall(ur"\d+\.\d+\s*MHz")) assert points and floor and ceiling space['points']=points space['type']=type space['floor']=floor space['ceiling']=ceiling space['freqs']=[] space['type']=type space['date']=date space['url']=fetchdata.getrawurl(url,'ee') for freq in freqs: space['freqs'].append((callsign,freq)) spaces.append(space)
def ee_parse_airfields2(): ads = [] spaces = [] airac_date = get_airac_date() print "airac", airac_date overview_url = "/%s/html/eAIP/EE-AD-0.6-en-GB.html" % (airac_date, ) parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(overview_url, country='ee') parser.feed(data) tree = parser.close() icaos = [] for cand in tree.xpath(".//h3"): txts = alltexts(cand.xpath(".//a")) aps = re.findall(r"EE[A-Z]{2}", " ".join(txts)) if aps: icao, = aps if alltext(cand).count("HELIPORT"): print "Ignore heliport", icao continue icaos.append(icao) for icao in icaos: ad = dict(icao=icao) url = "/%s/html/eAIP/EE-AD-2.%s-en-GB.html" % (airac_date, icao) data, date = fetchdata.getdata(url, country='ee') parser.feed(data) tree = parser.close() thrs = [] for h3 in tree.xpath(".//h3"): txt = alltext(h3) print repr(txt) ptrn = ur"\s*%s\s+[—-]\s+(.*)" % (unicode(icao.upper()), ) m = re.match(ptrn, txt, re.UNICODE) if m: assert not 'name' in ad ad['name'] = m.groups()[0] for tr in tree.xpath(".//tr"): txt = alltext(tr) m = re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*", txt) #print "Matching,",txt,":",m if m: crds, = m.groups() ad['pos'] = mapper.anyparse(crds) space = dict() for table in tree.xpath(".//table"): for tr in table.xpath(".//tr"): trtxt = alltext(tr) if trtxt.count("Designation and lateral limits"): space = dict() coords = tr.getchildren()[2] lines = alltext(coords).split("\n") if lines[0].strip() == 'NIL': continue zname, what, spill = re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)", lines[0]).groups() if spill and spill.strip(): rest = [spill] + lines[1:] else: rest = lines[1:] what = what.strip() assert ad['name'].upper().strip().count( zname.upper().strip()) assert what in ['FIZ', 'TIZ', 'CTR'] space['type'] = what space['points'] = mapper.parse_coord_str("\n".join(rest)) space['name'] = zname + " " + what space['date'] = date space['url'] = fetchdata.getrawurl(url, 'ee') if trtxt.count("Vertical limits"): vlim = alltext(tr.getchildren()[2]) if vlim.strip() == 'NIL': continue space['floor'], space['ceiling'] = vlim.split(" to ") #space['freqs']=x #hlc=False for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.lower().count("charts"): par = h4.getparent() for table in par.xpath(".//table"): for idx, tr in enumerate(table.xpath(".//tr")): name,page=\ tr.getchildren() nametxt = alltext(name) print "nametxt:", nametxt, "link:" for reg, variant in [ (r"Aerodrome.*Chart.*", ""), (r"Landing.*Chart.*", "landing"), (r".*Parking.*Chart.*", "parking"), (r".*Visual.*Approach.*|.*\bVAC\b.*", "vac") ]: if re.match(reg, nametxt): for a in page.xpath(".//a"): print "linklabel", a.text print "attrib:", a.attrib href = a.attrib['href'] print "Bef repl", href if href.lower().endswith("pdf"): href = href.replace( "../../graphics", "/%s/graphics" % (airac_date, )) print "href:", href, airac_date assert href parse_landing_chart.help_plc( ad, href, icao, ad['pos'], "ee", variant=variant) """arp=ad['pos'] lc=parse_landing_chart.parse_landing_chart( href, icao=icao, arppos=arp,country="ee") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc hlc=True #chartblobnames.append(lc['blobname']) """ #assert hlc for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.count("RUNWAY PHYSICAL"): par = h4.getparent() for table in par.xpath(".//table"): prevnametxt = "" for idx, tr in enumerate(table.xpath(".//tr")): if idx == 0: fc = alltext(tr.getchildren()[0]) print "FC", fc if not fc.count("Designations"): break #skip table if idx < 2: continue if len(tr.getchildren()) == 1: continue print "c:", tr.getchildren(), alltexts( tr.getchildren()) desig, trubrg, dims, strength, thrcoord, threlev = tr.getchildren( ) rwy = re.match(r"(\d{2}[LRC]?)", alltext(desig)) altc = alltext(thrcoord) print "Matching", altc print "rwymatch:", alltext(desig) m = re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*", altc, re.DOTALL | re.MULTILINE) if m: lat, lon = m.groups() print "Got latlon", lat, lon thrs.append( dict(pos=mapper.parse_coords(lat, lon), thr=rwy.groups()[0])) space['freqs'] = [] for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.count("ATS COMMUNICATION"): par = h4.getparent() for table in par.xpath(".//table"): for idx, tr in enumerate(table.xpath(".//tr")): print "cs", repr(tr.getchildren()), alltexts( tr.getchildren()) print len(tr.getchildren()) if len(tr.getchildren()) != 5: if "".join(alltexts( tr.getchildren())).count(u"EMERG"): continue #Sometimes emergency freq is listed, and then it is without callsign service,callsign,frequency,hours,remarks=\ tr.getchildren() callsigntxt = alltext(callsign) if idx < 2: if idx == 0: assert callsigntxt.strip() == "Call sign" if idx == 1: assert callsigntxt.strip() == "2" continue ftext = alltext(frequency) print "matching freq", ftext for freq in re.findall(ur"\b\d{3}\.\d{1,3}", ftext): freqmhz = float(freq) space['freqs'].append( (callsigntxt.strip(), freqmhz)) if space and 'points' in space: assert 'freqs' in space assert 'points' in space assert 'floor' in space assert 'ceiling' in space assert 'type' in space spaces.append(space) if thrs: ad['runways'] = rwy_constructor.get_rwys(thrs) aip_text_documents.help_parse_doc(ad, url, icao, "ee", title="General Information", category="general") ad['date'] = date ad['url'] = fetchdata.getrawurl(url, 'ee') print "AD:", ad assert 'pos' in ad assert 'name' in ad ads.append(ad)
def ee_parse_airfields2(): ads=[] spaces=[] airac_date=get_airac_date() print "airac",airac_date overview_url="/%s/html/eAIP/EE-AD-0.6-en-GB.html"%(airac_date,) parser=lxml.html.HTMLParser() data,date=fetchdata.getdata(overview_url,country='ee') parser.feed(data) tree=parser.close() icaos=[] for cand in tree.xpath(".//h3"): txts=alltexts(cand.xpath(".//a")) aps=re.findall(r"EE[A-Z]{2}"," ".join(txts)) if aps: icao,=aps if alltext(cand).count("HELIPORT"): print "Ignore heliport",icao continue icaos.append(icao) for icao in icaos: ad=dict(icao=icao) url="/%s/html/eAIP/EE-AD-2.%s-en-GB.html"%(airac_date,icao) data,date=fetchdata.getdata(url,country='ee') parser.feed(data) tree=parser.close() thrs=[] for h3 in tree.xpath(".//h3"): txt=alltext(h3) print repr(txt) ptrn=ur"\s*%s\s+[—-]\s+(.*)"%(unicode(icao.upper()),) m=re.match(ptrn,txt,re.UNICODE) if m: assert not 'name' in ad ad['name']=m.groups()[0] for tr in tree.xpath(".//tr"): txt=alltext(tr) m=re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",txt) #print "Matching,",txt,":",m if m: crds,=m.groups() ad['pos']=mapper.anyparse(crds) space=dict() for table in tree.xpath(".//table"): for tr in table.xpath(".//tr"): trtxt=alltext(tr) if trtxt.count("Designation and lateral limits"): space=dict() coords=tr.getchildren()[2] lines=alltext(coords).split("\n") if lines[0].strip()=='NIL': continue zname,what,spill=re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",lines[0]).groups() if spill and spill.strip(): rest=[spill]+lines[1:] else: rest=lines[1:] what=what.strip() assert ad['name'].upper().strip().count(zname.upper().strip()) assert what in ['FIZ','TIZ','CTR'] space['type']=what space['points']=mapper.parse_coord_str("\n".join(rest)) space['name']=zname+" "+what space['date']=date space['url']=fetchdata.getrawurl(url,'ee') if trtxt.count("Vertical limits"): vlim=alltext(tr.getchildren()[2]) if vlim.strip()=='NIL': continue space['floor'],space['ceiling']=vlim.split(" to ") #space['freqs']=x #hlc=False for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.lower().count("charts"): par=h4.getparent() for table in par.xpath(".//table"): for idx,tr in enumerate(table.xpath(".//tr")): name,page=\ tr.getchildren() nametxt=alltext(name) print "nametxt:",nametxt,"link:" for reg,variant in [ (r"Aerodrome.*Chart.*","") , (r"Landing.*Chart.*","landing"), (r".*Parking.*Chart.*","parking"), (r".*Visual.*Approach.*|.*\bVAC\b.*","vac") ]: if re.match(reg,nametxt): for a in page.xpath(".//a"): print "linklabel",a.text print "attrib:",a.attrib href=a.attrib['href'] print "Bef repl",href if href.lower().endswith("pdf"): href=href.replace("../../graphics","/%s/graphics"%(airac_date,)) print "href:",href,airac_date assert href parse_landing_chart.help_plc(ad,href, icao,ad['pos'],"ee",variant=variant) """arp=ad['pos'] lc=parse_landing_chart.parse_landing_chart( href, icao=icao, arppos=arp,country="ee") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc hlc=True #chartblobnames.append(lc['blobname']) """ #assert hlc for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("RUNWAY PHYSICAL"): par=h4.getparent() for table in par.xpath(".//table"): prevnametxt="" for idx,tr in enumerate(table.xpath(".//tr")): if idx==0: fc=alltext(tr.getchildren()[0]) print "FC",fc if not fc.count("Designations"): break #skip table if idx<2:continue if len(tr.getchildren())==1:continue print "c:",tr.getchildren(),alltexts(tr.getchildren()) desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren() rwy=re.match(r"(\d{2}[LRC]?)",alltext(desig)) altc=alltext(thrcoord) print "Matching",altc print "rwymatch:",alltext(desig) m=re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",altc,re.DOTALL|re.MULTILINE) if m: lat,lon=m.groups() print "Got latlon",lat,lon thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy.groups()[0])) space['freqs']=[] for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("ATS COMMUNICATION"): par=h4.getparent() for table in par.xpath(".//table"): for idx,tr in enumerate(table.xpath(".//tr")): print "cs",repr(tr.getchildren()),alltexts(tr.getchildren()) print len(tr.getchildren()) if len(tr.getchildren())!=5: if "".join(alltexts(tr.getchildren())).count(u"EMERG"): continue #Sometimes emergency freq is listed, and then it is without callsign service,callsign,frequency,hours,remarks=\ tr.getchildren() callsigntxt=alltext(callsign) if idx<2: if idx==0: assert callsigntxt.strip()=="Call sign" if idx==1: assert callsigntxt.strip()=="2" continue ftext=alltext(frequency) print "matching freq",ftext for freq in re.findall(ur"\b\d{3}\.\d{1,3}",ftext): freqmhz=float(freq) space['freqs'].append((callsigntxt.strip(),freqmhz)) if space and 'points' in space: assert 'freqs' in space assert 'points' in space assert 'floor' in space assert 'ceiling' in space assert 'type' in space spaces.append(space) if thrs: ad['runways']=rwy_constructor.get_rwys(thrs) aip_text_documents.help_parse_doc(ad,url, icao,"ee",title="General Information",category="general") ad['date']=date ad['url']=fetchdata.getrawurl(url,'ee') print "AD:",ad assert 'pos' in ad assert 'name' in ad ads.append(ad)
def ee_parse_tma2(): spaces = [] airac_date = get_airac_date() url = "/%s/html/eAIP/EE-ENR-2.1-en-GB.html" % (airac_date, ) parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(url, country='ee') parser.feed(data) tree = parser.close() icaos = [] def nested(tab): if tab == None: return False if tab.getparent() is None: return False #print dir(tab) if tab.tag == 'table': return True return nested(tab.getparent()) for tab in tree.xpath(".//table"): print "table alltext:", alltext(tab) if nested(tab.getparent()): continue firsttr = tab.xpath(".//tr")[0] ntext = alltext(firsttr) print "firsttr", firsttr print "ntext", ntext if re.match(ur".*FIR\s*/\s*CTA.*", ntext): print "Matches Tallin FIR" name = 'TALLIN FIR' points = mapper.parse_coord_str(firtxt, context='estonia') floor, ceiling = "GND", "FL195" space = {} space['name'] = name space['points'] = points space['floor'] = floor space['ceiling'] = ceiling space['freqs'] = [] space['icao'] = 'EETT' space['type'] = 'FIR' space['date'] = date space['url'] = fetchdata.getrawurl(url, 'ee') spaces.append(space) continue else: name = ntext.strip() space = dict(name=name) print "Name", name assert space['name'].count("TMA") \ or space['name'].count("FIR") if space['name'].count("FIR"): type = 'FIR' else: type = "TMA" freqs = [] points = None floor = None ceiling = None for cand in tab.xpath(".//tr"): if len(cand.getchildren()) != 2: continue nom, what = cand.getchildren() whattxt = alltext(what) nomtxt = alltext(nom) print "nomtxt", nomtxt, "space name", space['name'] if nomtxt.count("Lateral limits"): if space['name'].count("TALLINN TMA 2"): points = mapper.parse_coord_str(""" A circle with radius 20 NM centred on 592448N 0244957E """) else: whattxt = whattxt.replace( "then along the territory dividing line between Estonia and Russia to", "- Along the common Estonian/X state boundary to ") print "Fixed up", whattxt points = mapper.parse_coord_str(whattxt, context='estonia') if nomtxt.count("Vertical limits"): floor, ceiling = whattxt.split(" to ") if nomtxt.count("Call sign"): callsign = whattxt.split("\n")[0] if nomtxt.count("freq"): freqs.extend(re.findall(ur"\d+\.\d+\s*MHz")) assert points and floor and ceiling space['points'] = points space['type'] = type space['floor'] = floor space['ceiling'] = ceiling space['freqs'] = [] space['type'] = type space['date'] = date space['url'] = fetchdata.getrawurl(url, 'ee') for freq in freqs: space['freqs'].append((callsign, freq)) spaces.append(space)