def ev_parse_obst(): cur_airac = get_cur_airac() url = "/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-5.4-en-GB.html" % (cur_airac,) # url="/EV-ENR-5.4-en-GB.html" parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(url, country="ev") parser.feed(data) tree = parser.close() got_fir = False res = [] for table in tree.xpath("//table"): for row in table.xpath(".//tr"): tds = row.xpath(".//td") if len(tds) != 5: continue name, type, coord, elev, light = [alltext(x) for x in tds] elev, height = elev.split("/") res.append( dict( name=name, pos=mapper.parsecoord(coord), height=mapper.parse_elev(height.strip()), elev=mapper.parse_elev(elev), lighting=light, kind=type, ) ) return res
def parse_arc(line): db, rest = line.split(" ", 1) assert db == "DB" c1, c2 = rest.split(",") temp = [] temp.append(parse_rawcoord(c1)) end = parse_rawcoord(c2) assert vars['center'] bearing, radius = mapper.bearing_and_distance( mapper.parsecoord(vars['center']), mapper.parsecoord(end)) temp.append( "clockwise along an arc with radius %s NM centred on %s to the point %s" % ((radius), vars['center'], end)) #temp.append(end) coords.append(" ".join(temp))
def parse_sig_points(): p=Parser("/AIP/ENR/ENR 4/ES_ENR_4_4_en.pdf") points=[] for pagenr in xrange(p.get_num_pages()): #print "Processing page %d"%(pagenr,) page=p.parse_page_to_items(pagenr) lines=page.get_lines(page.get_all_items(),order_fudge=20) for line in lines: cols=line.split() if len(cols)>2: coordstr=" ".join(cols[1:3]) #print cols if len(mapper.parsecoords(coordstr))>0: crd=mapper.parsecoord(coordstr) #print "Found %s: %s"%(cols[0],crd) points.append(dict( name=cols[0], kind='sig. point', pos=crd)) p=Parser("/AIP/ENR/ENR 4/ES_ENR_4_1_en.pdf") for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) nameheading,=page.get_by_regex(r".*Name of station.*") freqheading,=page.get_by_regex(r".*Frequency.*") coordheading,=page.get_by_regex(r".*Coordinates.*") items=sorted(list(x for x in page.get_partially_in_rect(nameheading.x1,nameheading.y2+2,nameheading.x1+1,100) if x.text.strip()),key=lambda x:x.y1) idx=0 while True: if items[idx].text.strip()=="": idx+=1 continue if idx+1>=len(items): break name=items[idx] kind=items[idx+1] diffy=kind.y1-name.y2 #print "Name, kind:",name,kind #print name.text,kind.text,diffy assert kind.text.count("VOR") or kind.text.count("DME") or kind.text.count("NDB") assert diffy<0.5 #print "Frq cnt: <%s>"%(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05),) freqraw=" ".join(page.get_lines(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05))) short,freq=re.match(r"\s*([A-Z]{2,3})?\s*(\d+(?:\.?\d+)\s+(?:MHz|kHz))\s*(?:H24)?\s*",freqraw).groups() posraw=" ".join(page.get_lines(page.get_partially_in_rect(coordheading.x1,name.y1+0.05,coordheading.x2,kind.y2-0.05))) #print "Rawpos<%s>"%(posraw,) pos=mapper.parse_coords(*re.match(r".*?(\d+\.\d+[NS]).*?(\d+\.\d+[EW]).*",posraw).groups()) #print "Name: %s, Shortname: %s, Freq: %s,pos: %s"%(name.text,short,freq,pos) points.append(dict( name=short+" "+kind.text.strip()+" "+name.text.strip(), short=short, kind="nav-aid", pos=pos, freq=freq)) idx+=2 return points
def parse_arc(line): db,rest=line.split(" ",1) assert db=="DB" c1,c2=rest.split(",") temp=[] temp.append(parse_rawcoord(c1)) end=parse_rawcoord(c2) assert vars['center'] bearing,radius=mapper.bearing_and_distance(mapper.parsecoord(vars['center']), mapper.parsecoord(end)) temp.append("clockwise along an arc with radius %s NM centred on %s to the point %s"%((radius),vars['center'],end)) #temp.append(end) coords.append(" ".join(temp))
def ev_parse_sigpoints(): out=[] parser=lxml.html.HTMLParser() airac=get_cur_airac() url="/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-4.4-en-GB.html"%(airac) data,date=fetchdata.getdata(url,country='ev') parser.feed(data) tree=parser.close() for table in tree.xpath("//table"): #print "Table with %d children"%(len(table.getchildren()),) rows=list(table.xpath(".//tr")) for row in rows: hdr=list(row.xpath(".//th")) if hdr: continue cols=list(row.xpath(".//td")) pos=mapper.parsecoord(alltext(cols[1])) nameraw=alltext(cols[0]) print "raw:",repr(nameraw) name,=re.match(ur"\s*(\w{5})\s*",nameraw).groups() out.append(dict(name=name, kind='sig. point', pos=pos)) for manual in """PARKS:570014N 0241039E:entry/exit point VISTA:565002N 0241034E:entry/exit point ARNIS:565427N 0234611E:entry/exit point KISHI:565609N 0234608E:entry/exit point HOLDING WEST:565530N 0235327E:holding point HOLDING EAST:565351N 0240313E:holding point""".split("\n"): name,poss,kind=manual.split(":") out.append(dict( name=name.strip(), pos=mapper.parsecoord(poss), kind=kind)) return out
def ep_parse_wikipedia_airports(url): parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(url, country="wikipedia") parser.feed(data) tree = parser.close() res = [] for table in tree.xpath("//table"): for nr, row in enumerate(table.xpath(".//tr")): cols = list([alltext(x) for x in row.xpath(".//td")]) print "#", nr, ": ", cols if nr == 0: if len(cols) == 0 or cols[0].strip() != "Airport": break assert cols[3].strip() == "ICAO" assert cols[4].strip() == "Purpose" assert cols[5].strip().count("El") assert cols[9].strip() == "Coordinates" else: purpose = cols[4].strip() if purpose.count("Unused"): continue if purpose.count("Closed"): continue if purpose.count("Liquidated"): continue if purpose == "Military": continue #Just military icao = cols[3].strip() if icao == "": icao = "ZZZZ" name = cols[0].strip() #print "lats:",row.xpath(".//span[@class='latitude']") lat, = alltexts(row.xpath(".//span[@class='latitude']")) lon, = alltexts(row.xpath(".//span[@class='longitude']")) coords = fixup(lat.strip() + " " + lon.strip()) elevft = float(cols[5].strip()) res.append( dict(pos=mapper.parsecoord(coords), name=name, elev=elevft / 0.3048, icao=icao, date=date, url=url)) return res
def parse_airfields(): out = [] for item in csv.reader(open("fplan/extract/denmark.airfields.csv")): print item icao, empty, ICAO, name, d1, d2, pos, elev, owner, phone, d4, d5, webside = item if not pos[-1] in ['E', 'W']: pos = pos + "E" print "ICAO:", icao assert icao.upper() == ICAO name = coding(name) lat, lon = mapper.from_str(mapper.parsecoord(pos)) nasaelev = get_terrain_elev((lat, lon)) if elev == '': elev = nasaelev if nasaelev != 9999: assert abs(float(elev) - nasaelev) < 100 ad = dict(icao=ICAO, name=name, pos=mapper.to_str((lat, lon)), date="2010-01-01T00:00:00Z", elev=int(elev)) out.append(ad) return out
def parse_airfields(): out=[] for item in csv.reader(open("fplan/extract/denmark.airfields.csv")): print item icao,empty,ICAO,name,d1,d2,pos,elev,owner,phone,d4,d5,webside=item if not pos[-1] in ['E','W']: pos=pos+"E" print "ICAO:",icao assert icao.upper()==ICAO name=coding(name) lat,lon=mapper.from_str(mapper.parsecoord(pos)) nasaelev=get_terrain_elev((lat,lon)) if elev=='': elev=nasaelev if nasaelev!=9999: assert abs(float(elev)-nasaelev)<100 ad=dict( icao=ICAO, name=name, pos=mapper.to_str((lat,lon)), date="2010-01-01T00:00:00Z", elev=int(elev)) out.append(ad) return out
def ev_parse_obst(): cur_airac = get_cur_airac() url = "/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-5.4-en-GB.html" % (cur_airac, ) #url="/EV-ENR-5.4-en-GB.html" parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(url, country="ev") parser.feed(data) tree = parser.close() got_fir = False res = [] for table in tree.xpath("//table"): for row in table.xpath(".//tr"): tds = row.xpath(".//td") if len(tds) != 5: continue name, type, coord, elev, light = [alltext(x) for x in tds] elev, height = elev.split("/") res.append( dict(name=name, pos=mapper.parsecoord(coord), height=mapper.parse_elev(height.strip()), elev=mapper.parse_elev(elev), lighting=light, kind=type)) return res
def ep_parse_airfield(icao): spaces = [] pages, date = miner.parse("/aip/openp.php?id=EP_AD_2_%s_en" % (icao, ), maxcacheage=86400 * 7, country='ep', usecache=True) print "parsing ", icao, date points = None ctrname = None freqs = [] for nr, page in enumerate(pages): if nr == 0: def filter_tiny(its): for it in its: print "Filtering:", repr(it) print "size %f of <%s>." % (it.y2 - it.y1, it.text) textsize = it.y2 - it.y1 if textsize > 0.4: yield it namehdg, = page.get_by_regex( ur".*AERODROME\s+LOCATION\s+INDICATOR\s+AND\s+NAME.*", re.DOTALL) subs = page.get_partially_in_rect(0, namehdg.y1 + 0.5, 100, namehdg.y2 + 2.5) allsubs = [] for sub in subs: print "Item:", repr(sub) print "sub", repr(sub.subs) allsubs.extend(sub.subs) print "allsubs", allsubs lineobjs = list(filter_tiny(allsubs)) for lineobj in lineobjs: line = lineobj.text.strip() print "line:", line if line == icao: continue if re.match(ur".*AERODROME\s*LOCATION\s*INDICATOR.*", line): continue if re.match( ur".*WSKAŹNIK\s*LOKALIZACJI\s*LOTNISKA\s*I\s*NAZWA.*", line): continue m = re.match(ur"%s\s*[-]\s*([\w\s/]+)" % (icao, ), line, re.UNICODE | re.DOTALL) name, = m.groups() name = name.strip() break else: raise Exception("No name found!") print "Name:", name site, = page.get_by_regex( ur"ARP\s*-\s*WGS-84\s*coordinates\s*and\s*site\s*at\s*AD") print "site:", repr(site.text.strip()) splat = site.text.strip().split("\n") print "splat:", splat print len(splat) poss = splat[1:] print "rawpos,", poss for line in poss: m = re.match( ur"(\d+)°(\d+)'(\d+)''(N)\s*(\d+)°(\d+)'(\d+)''(E).*", line) if m: pos = mapper.parsecoord("".join(m.groups())) break else: raise Exception("No pos found") elevi, = page.get_by_regex( ur"\s*Elevation/Reference\s*temperature\s*", re.DOTALL) elevft, = re.match(ur".*\d+\s+m\s*\((\d+)\s+ft\).*", elevi.text, re.DOTALL).groups() elev = float(elevft)
spaces.append(dict( name=ctrname, points=mapper.parse_coord_str(ctrarea), ceiling=ceiling, type=type_, floor=floor, freqs=freqs, date=date, url=url )) spilve=dict( icao="EVRS", name="Spilve", elev=5, date=datetime(2011,04,05), pos=mapper.parsecoord("565931N 240428E") ) charturl="http://www.swflightplanner.se:8080/spilve.png" arp=spilve['pos'] icao=spilve['icao'].upper() parse_landing_chart.help_plc(spilve,charturl,icao,arp,country='raw',variant="landing") ads.append(spilve) return ads,spaces if __name__=='__main__': ads,spaces=ev_parse_airfields() for ad in ads:
def parse_sig_points(): p = Parser("/AIP/ENR/ENR 4/ES_ENR_4_4_en.pdf") points = [] for pagenr in xrange(p.get_num_pages()): #print "Processing page %d"%(pagenr,) page = p.parse_page_to_items(pagenr) lines = page.get_lines(page.get_all_items(), order_fudge=20) for line in lines: cols = line.split() if len(cols) > 2: coordstr = " ".join(cols[1:3]) #print cols if len(mapper.parsecoords(coordstr)) > 0: crd = mapper.parsecoord(coordstr) #print "Found %s: %s"%(cols[0],crd) points.append( dict(name=cols[0], kind='sig. point', pos=crd)) p = Parser("/AIP/ENR/ENR 4/ES_ENR_4_1_en.pdf") for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) nameheading, = page.get_by_regex(r".*Name of station.*") freqheading, = page.get_by_regex(r".*Frequency.*") coordheading, = page.get_by_regex(r".*Coordinates.*") items = sorted(list(x for x in page.get_partially_in_rect( nameheading.x1, nameheading.y2 + 2, nameheading.x1 + 1, 100) if x.text.strip()), key=lambda x: x.y1) idx = 0 while True: if items[idx].text.strip() == "": idx += 1 continue if idx + 1 >= len(items): break name = items[idx] kind = items[idx + 1] diffy = kind.y1 - name.y2 #print "Name, kind:",name,kind #print name.text,kind.text,diffy assert kind.text.count("VOR") or kind.text.count( "DME") or kind.text.count("NDB") assert diffy < 0.5 #print "Frq cnt: <%s>"%(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05),) freqraw = " ".join( page.get_lines( page.get_partially_in_rect(freqheading.x1, name.y1 + 0.05, freqheading.x2, kind.y2 - 0.05))) short, freq = re.match( r"\s*([A-Z]{2,3})?\s*(\d+(?:\.?\d+)\s+(?:MHz|kHz))\s*(?:H24)?\s*", freqraw).groups() posraw = " ".join( page.get_lines( page.get_partially_in_rect(coordheading.x1, name.y1 + 0.05, coordheading.x2, kind.y2 - 0.05))) #print "Rawpos<%s>"%(posraw,) pos = mapper.parse_coords(*re.match( r".*?(\d+\.\d+[NS]).*?(\d+\.\d+[EW]).*", posraw).groups()) #print "Name: %s, Shortname: %s, Freq: %s,pos: %s"%(name.text,short,freq,pos) points.append( dict(name=short + " " + kind.text.strip() + " " + name.text.strip(), short=short, kind="nav-aid", pos=pos, freq=freq)) idx += 2 return points