def fi_parse_sigpoints(): points = [] p = parse.Parser("/ais/eaip/pdf/enr/EF_ENR_4_4_EN.pdf", lambda x: x, country='fi') for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) for item in page.get_by_regex(ur"\d{6}N\s*\d{7}E"): lines = page.get_lines( page.get_partially_in_rect(0, item.y1, 100, item.y2)) print "Sigpoint lines:%s" % (repr(lines, )) lines = [ line for line in lines if re.match(ur"[A-Z]{5}.*\d{6}N\s*\d{7}E.*", line) ] assert len(lines) == 1 print "parse:", lines[0] name, lat, lon = re.match( ur"\s*([A-Z]{5})\s*(?:\(\s*FLYOVER\s*\))?\s*X?\s*(\d{6}N)\s*(\d{7}E)\s*.*", lines[0]).groups() points.append( dict(name=name, kind='sig. point', pos=mapper.parse_coords(lat, lon)))
def fi_parse_small_airfields(only=None): p = parse.Parser("/ais/vfr/pdf/aerodromes.pdf", lambda x: x, country="fi") ads = dict() for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) if not page: continue lines = page.get_lines(page.get_partially_in_rect(0, 0, 100, 15)) heading = lines[0].strip() if pagenr < 4 and not heading[-4:-2] == "EF": continue #this is one of the first pages, with general info, not an airport sheet #print heading name, icao = re.match(ur"(.*),\s*Finland\s*(EF[A-Z]{2})", heading).groups() name = name.strip() ad = ads.setdefault(icao, dict()) ad['name'] = name ad['icao'] = icao #print "Name: <%s> <%s>"%(icao,name) if only != None and only != icao: continue for item in page.get_by_regex(ur"1.*ARP.*sijainti.*location"): posline = page.get_lines( page.get_partially_in_rect(0, item.y2 + 0.05, 100, item.y2 + 5))[0] print "Posline:", posline lat, lon = re.match(ur"(\d{6}N) (\d{7}E).*", posline).groups() ad['pos'] = mapper.parse_coords(lat.replace(" ", ""), lon.replace(" ", "")) for item in page.get_by_regex(ur"FREQ MHZ"): freqline = page.get_lines( page.get_partially_in_rect(item.x1, item.y2 + 0.05, item.x2 + 20, item.y2 + 5))[0] print "Freqline:", freqline freq, = re.match(ur"(\d{3}\.\d{3}).*", freqline).groups() ad['freq'] = float(freq)
def ee_parse_sigpoints(): points = [] p = parse.Parser("/index.aw?section=9142&action=genpdf&file=9142.pdf", lambda x: x, country='ee') for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) for item in page.get_by_regex(ur"\d{6}N\s*\d{7}E"): lines = page.get_lines( page.get_partially_in_rect(0, item.y1 + 0.01, 100, item.y2 - 0.01)) print "Sigpoint lines:%s" % (repr(lines, )) lines = [ line for line in lines if re.match( ur"\s*\w{4,6}.*\d{6}N\s*\d{7}E.*", line, re.UNICODE) ] print lines assert len(lines) == 1 print "parse:", lines[0] name, lat, lon = re.match(ur"\s*(\w{4,6})\s*(\d{6}N)\s*(\d{7}E).*", lines[0], re.UNICODE).groups() points.append( dict(name=name, kind='sig. point', pos=mapper.parse_coords(lat, lon)))
def get_airspaces(routes): for routename,coords,altspec in routes: sig=getsig(coords,altspec) if (routename,sig) in predef: #pobj=None #for sroutename,ssig in predef: # if routename==sroutename: pobj=(routename,sig) width_nm,floor,ceiling,coordstr=predef[pobj] rawcoords=re.findall(ur"(\d{6}N)\s*(\d{7}E)",coordstr) coords=[mapper.latlon2merc(mapper.from_str(mapper.parse_coords(lats,lons)),13) for lats,lons in rawcoords] width=float(mapper.approx_scale(coords[0],13,1.25*width_nm)) try: outline=get_outline(coords,width) except Exception: uprint(u"Trouble parsing %s"%(routename,)) raise yield dict(name=routename, floor=floor, ceiling=ceiling, freqs=[], type="RNAV", points=[mapper.to_str(mapper.merc2latlon(x,13)) for x in outline]) else: uprint("Need to have predefine for route %s, with md5: %s"%(routename,sig)) uprint("Altitude, and points") raise Exception('Missing predef for route. Use: ("%s","%s") : ( 10,"FL 65","FL 95","""\n\n """),'%(routename,sig))
def fi_parse_obstacles(): obsts = [] p = parse.Parser("/ais/eaip/pdf/enr/EF_ENR_5_4_EN.pdf", lambda x: x, country='fi') for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) item = min(page.get_by_regex(ur"(?:AIP SUOMI.*)|(?:ENR 5.4.*)"), key=lambda x: x.x1) assert item.x1 < 20 lx = item.x1 for item in page.get_by_regex(ur"\d{6}N\s*\d{7}E"): lines = page.get_lines( page.get_partially_in_rect(lx - 1, item.y1 - 0.25, lx + 5, item.y2 + 0.25)) print "obj", lines assert len(lines) == 1 objid, = re.match("(EFINOB \d+)", lines[0]).groups() lines = page.get_lines( page.get_partially_in_rect(lx + 15, item.y1 - 0.25, lx + 25, item.y2 + 0.25)) obsttypes = [("Savupiippu", 'Chimney'), ('Masto', 'Mast'), ('Rakennus', 'Building'), ('Tuulivoimala', 'Wind tur[\s-]*bine'), ('Torni', 'Tower'), ('Nosturi', 'Crane'), ('Pyloni', 'Pylon')] lines = page.get_partially_in_rect(lx + 15, item.y1 - 0.25, 100, item.y2 + 0.25) lines.sort(key=lambda x: x.x1 + 0.05 * x.y1 ) #sort mostly on x1, and slightly on y1 nameandtype = " ".join(l.text.strip() for l in lines) name, kind = None, None for obst_fi, obst_en in obsttypes: print "Mach:", repr(nameandtype) regex = ur"(.{3,})\s+%s\s*/\s*(%s)\s*(\d{6}N)\s*(\d{7}E)\s+(\d+)\s+(\d+)(.*)" % ( obst_fi, obst_en) print regex m = re.match(regex, nameandtype) print "Matched <%s> against <%s>, result: %s" % ( regex, nameandtype, m) if m: name, kind, lat, lon, height, elev, lighting = m.groups() break assert name and kind #lines=page.get_lines(page.get_partially_in_rect(lx+52,item.y1-0.25,100,item.y2+0.25)) #print lines #assert len(lines)==1 #lat,lon,height,elev,lighting=re.match(ur"",lines[0]).groups() obsts.append( dict(name=name, pos=mapper.parse_coords(lat, lon), height=height, elev=elev, lighting=lighting, kind=kind, objid=objid))
def parse_sig_points(): p=Parser("/AIP/ENR/ENR 4/ES_ENR_4_4_en.pdf") points=[] for pagenr in xrange(p.get_num_pages()): #print "Processing page %d"%(pagenr,) page=p.parse_page_to_items(pagenr) lines=page.get_lines(page.get_all_items(),order_fudge=20) for line in lines: cols=line.split() if len(cols)>2: coordstr=" ".join(cols[1:3]) #print cols if len(mapper.parsecoords(coordstr))>0: crd=mapper.parsecoord(coordstr) #print "Found %s: %s"%(cols[0],crd) points.append(dict( name=cols[0], kind='sig. point', pos=crd)) p=Parser("/AIP/ENR/ENR 4/ES_ENR_4_1_en.pdf") for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) nameheading,=page.get_by_regex(r".*Name of station.*") freqheading,=page.get_by_regex(r".*Frequency.*") coordheading,=page.get_by_regex(r".*Coordinates.*") items=sorted(list(x for x in page.get_partially_in_rect(nameheading.x1,nameheading.y2+2,nameheading.x1+1,100) if x.text.strip()),key=lambda x:x.y1) idx=0 while True: if items[idx].text.strip()=="": idx+=1 continue if idx+1>=len(items): break name=items[idx] kind=items[idx+1] diffy=kind.y1-name.y2 #print "Name, kind:",name,kind #print name.text,kind.text,diffy assert kind.text.count("VOR") or kind.text.count("DME") or kind.text.count("NDB") assert diffy<0.5 #print "Frq cnt: <%s>"%(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05),) freqraw=" ".join(page.get_lines(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05))) short,freq=re.match(r"\s*([A-Z]{2,3})?\s*(\d+(?:\.?\d+)\s+(?:MHz|kHz))\s*(?:H24)?\s*",freqraw).groups() posraw=" ".join(page.get_lines(page.get_partially_in_rect(coordheading.x1,name.y1+0.05,coordheading.x2,kind.y2-0.05))) #print "Rawpos<%s>"%(posraw,) pos=mapper.parse_coords(*re.match(r".*?(\d+\.\d+[NS]).*?(\d+\.\d+[EW]).*",posraw).groups()) #print "Name: %s, Shortname: %s, Freq: %s,pos: %s"%(name.text,short,freq,pos) points.append(dict( name=short+" "+kind.text.strip()+" "+name.text.strip(), short=short, kind="nav-aid", pos=pos, freq=freq)) idx+=2 return points
def parse_obstacles(): p=parse.Parser("/AIP/ENR/ENR 5/ES_ENR_5_4_en.pdf",lambda x: x) res=[] for pagenr in xrange(0,p.get_num_pages()): page=p.parse_page_to_items(pagenr) items=page.get_by_regex(r"\bDesignation\b") print items assert len(items)==1 ay1=items[0].y1 ay2=100 in_rect=page.get_fully_in_rect(0,ay1,100,100) lines=page.get_lines(in_rect,order_fudge=30) for line in lines: line=line.strip() if line=="within radius 300 m.": continue if line=="": continue if line.startswith("AMDT"): continue if line.startswith("AIRAC AMDT"): continue if re.match("^Area\s*No\s*Designation.*",line): continue if re.match("^ft\s*ft\s*Character.*",line): continue if line.strip()=="The LFV Group": continue if line.startswith("The"): continue if line.startswith("LFV"): continue if line.startswith("Group"): continue if line.strip()=="": continue uprint("Matching line: <%s>"%(line,)) if line.strip()=="Reverse side intentionally blank": continue m=re.match(r"\s*(?:\d{2}N \d{2}E)?\s*\d+\s*(.*?)(\d{6}\.?\d*N)\s*(\d{7}\.?\d*E)\s*(?:\(\*\))?\s*(\d+)\s*(\d+)\s*(.*)$", line) if m: name,lat,lon,height,elev,more=m.groups() uprint("Found match: %s"%(m.groups(),)) light_and_type=re.match(r"(.*?)\s*("+"|".join(obsttypes)+")",more) if not light_and_type: raise Exception(u"Unknown obstacle type:%s"%(more,)) light,kind=light_and_type.groups() res.append( dict( name=name, pos=mapper.parse_coords(lat,lon), height=height, elev=elev, lighting=light, kind=kind )) else: raise Exception("Unparsed obstacle line: %s"%(line,)) return res
def fi_parse_obstacles(): obsts=[] p=parse.Parser("/ais/eaip/pdf/enr/EF_ENR_5_4_EN.pdf",lambda x: x,country='fi') for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) item=min(page.get_by_regex(ur"(?:AIP SUOMI.*)|(?:ENR 5.4.*)"),key=lambda x:x.x1) assert item.x1<20 lx=item.x1 for item in page.get_by_regex(ur"\d{6}N\s*\d{7}E"): lines=page.get_lines(page.get_partially_in_rect(lx-1,item.y1-0.25,lx+5,item.y2+0.25)) print "obj",lines assert len(lines)==1 objid,=re.match("(EFINOB \d+)",lines[0]).groups() lines=page.get_lines(page.get_partially_in_rect(lx+15,item.y1-0.25,lx+25,item.y2+0.25)) obsttypes=[ ("Savupiippu",'Chimney'), ('Masto','Mast'), ('Rakennus','Building'), ('Tuulivoimala','Wind tur[\s-]*bine'), ('Torni','Tower'), ('Nosturi','Crane'), ('Pyloni','Pylon') ] lines=page.get_partially_in_rect(lx+15,item.y1-0.25,100,item.y2+0.25) lines.sort(key=lambda x:x.x1+0.05*x.y1) #sort mostly on x1, and slightly on y1 nameandtype=" ".join(l.text.strip() for l in lines) name,kind=None,None for obst_fi,obst_en in obsttypes: print "Mach:",repr(nameandtype) regex=ur"(.{3,})\s+%s\s*/\s*(%s)\s*(\d{6}N)\s*(\d{7}E)\s+(\d+)\s+(\d+)(.*)"%(obst_fi,obst_en) print regex m=re.match(regex,nameandtype) print "Matched <%s> against <%s>, result: %s"%(regex,nameandtype,m) if m: name,kind,lat,lon,height,elev,lighting=m.groups() break assert name and kind #lines=page.get_lines(page.get_partially_in_rect(lx+52,item.y1-0.25,100,item.y2+0.25)) #print lines #assert len(lines)==1 #lat,lon,height,elev,lighting=re.match(ur"",lines[0]).groups() obsts.append( dict( name=name, pos=mapper.parse_coords(lat,lon), height=height, elev=elev, lighting=lighting, kind=kind, objid=objid )) return obsts
def fi_parse_airfield(icao=None): spaces = [] ad = dict() assert icao != None ad['icao'] = icao sigpoints = [] #https://ais.fi/ais/eaip/pdf/aerodromes/EF_AD_2_EFET_EN.pdf #https://ais.fi/ais/eaip/aipcharts/efet/EF_AD_2_EFET_VAC.pdf #vacp=parse.Parser("/ais/eaip/aipcharts/%s/EF_AD_2_%s_VAC.pdf"%(icao.lower(),icao),lambda x: x,country="fi") def remove_italics(x): return x.replace("<i>", "").replace("</i>", "") p = parse.Parser("/ais/eaip/pdf/aerodromes/EF_AD_2_%s_EN.pdf" % (icao, ), remove_italics, country="fi") #The following doesn't actually work, since finnish VAC are bitmaps!!! :-( if 0: vacpage = vacp.parse_page_to_items(0) repp = vacpage.get_by_regex("\s*REPORTING\s*POINTS\s*") assert len(repp) > 0 for item in repp: lines = iter( page.get_lines( page.get_partially_in_rect(item.x1, item.y2 + 0.1, 100, 100))) for line in lines: uprint("Looking for reporting points:%s" % (line, )) name, lat, lon = re.match( ur"([A-ZÅÄÖ\s ]{3,})\s*([ \d]+N)\s*([ \d]+E).*", line) sigpoints.append( dict(name=icao + " " + name.strip(), kind="reporting", pos=mapper.parse_coords(lat.replace(" ", ""), lon.replace(" ", "")))) page = p.parse_page_to_items(0) nameregex = ur"%s\s+-\s+([A-ZÅÄÖ\- ]{3,})" % (icao, ) for item in page.get_by_regex(nameregex): #print "fontsize:",item.fontsize assert item.fontsize >= 14 ad['name'] = re.match(nameregex, item.text).groups()[0].strip() break for item in page.get_by_regex(ur".*ELEV\s*/\s*REF.*"): lines = page.get_lines( page.get_partially_in_rect(0, item.y1 + 0.1, 100, item.y2 - 0.1)) for line in lines: print "Line:", line ft, = re.match(".*ELEV.*([\d\.]+)\s*FT.*", line).groups() assert not 'elev' in ad ad['elev'] = float(ft)
def ey_parse_sigpoints(): points=[] p=parse.Parser("/EY_ENR_4_4_en_2011-03-10.pdf",lambda x: x,country='ee') for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) for item in page.get_by_regex(ur"[\d\s]+N\s*[\d\s]+E"): lines=page.get_lines(page.get_partially_in_rect(0,item.y1+0.01,100,item.y2-0.01)) print "Sigpoint lines:%s"%(repr(lines,)) lines=[line for line in lines if re.match(ur"\s*\w{4,6}.*[\d\s]+N\s*[\d\s]+E.*",line,re.UNICODE)] print lines assert len(lines)==1 print "parse:",lines[0] name,lat,lon=re.match(ur"\s*(\w{4,6})\s*([\d\s]+N)\s*([\d\s]+E).*",lines[0],re.UNICODE).groups() points.append(dict( name=name, kind='sig. point', pos=mapper.parse_coords(lat.replace(" ",""),lon.replace(" ",""))))
def fi_parse_sigpoints(): points=[] p=parse.Parser("/ais/eaip/pdf/enr/EF_ENR_4_4_EN.pdf",lambda x: x,country='fi') for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) for item in page.get_by_regex(ur"\d{6}N\s*\d{7}E"): lines=page.get_lines(page.get_partially_in_rect(0,item.y1,100,item.y2)) print "Sigpoint lines:%s"%(repr(lines,)) lines=[line for line in lines if re.match(ur"[A-Z]{5}.*\d{6}N\s*\d{7}E.*",line)] assert len(lines)==1 print "parse:",lines[0] name,lat,lon=re.match(ur"\s*([A-Z]{5})\s*(?:\(\s*FLYOVER\s*\))?\s*X?\s*(\d{6}N)\s*(\d{7}E)\s*.*",lines[0]).groups() points.append(dict( name=name, kind='sig. point', pos=mapper.parse_coords(lat,lon))) return points
def ey_parse_sigpoints(): points=[] p=parse.Parser("/EY_ENR_4_4_en_2011-03-10.pdf",lambda x: x,country='ee') for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) for item in page.get_by_regex(ur"[\d\s]+N\s*[\d\s]+E"): lines=page.get_lines(page.get_partially_in_rect(0,item.y1+0.01,100,item.y2-0.01)) print "Sigpoint lines:%s"%(repr(lines,)) lines=[line for line in lines if re.match(ur"\s*\w{4,6}.*[\d\s]+N\s*[\d\s]+E.*",line,re.UNICODE)] print lines assert len(lines)==1 print "parse:",lines[0] name,lat,lon=re.match(ur"\s*(\w{4,6})\s*([\d\s]+N)\s*([\d\s]+E).*",lines[0],re.UNICODE).groups() points.append(dict( name=name, kind='sig. point', pos=mapper.parse_coords(lat.replace(" ",""),lon.replace(" ","")))) return points
def ee_parse_sigpoints(): points=[] p=parse.Parser("/index.aw?section=9142&action=genpdf&file=9142.pdf",lambda x: x,country='ee') for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) for item in page.get_by_regex(ur"\d{6}N\s*\d{7}E"): lines=page.get_lines(page.get_partially_in_rect(0,item.y1+0.01,100,item.y2-0.01)) print "Sigpoint lines:%s"%(repr(lines,)) lines=[line for line in lines if re.match(ur"\s*\w{4,6}.*\d{6}N\s*\d{7}E.*",line,re.UNICODE)] print lines assert len(lines)==1 print "parse:",lines[0] name,lat,lon=re.match(ur"\s*(\w{4,6})\s*(\d{6}N)\s*(\d{7}E).*",lines[0],re.UNICODE).groups() points.append(dict( name=name, kind='sig. point', pos=mapper.parse_coords(lat,lon))) return points
def fi_parse_small_airfields(only=None): p=parse.Parser("/ais/vfr/pdf/aerodromes.pdf",lambda x: x,country="fi") ads=dict() for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) if not page: continue lines=page.get_lines(page.get_partially_in_rect(0,0,100,15)) heading=lines[0].strip() if pagenr<4 and not heading[-4:-2]=="EF": continue #this is one of the first pages, with general info, not an airport sheet #print heading name,icao=re.match(ur"(.*),\s*Finland\s*(EF[A-Z]{2})",heading).groups() name=name.strip() ad=ads.setdefault(icao,dict()) ad['name']=name ad['icao']=icao #print "Name: <%s> <%s>"%(icao,name) if only!=None and only!=icao: continue for item in page.get_by_regex(ur"1.*ARP.*sijainti.*location"): posline=page.get_lines(page.get_partially_in_rect(0,item.y2+0.05,100,item.y2+5))[0] print "Posline:",posline lat,lon=re.match(ur"(\d{6}N) (\d{7}E).*",posline).groups() ad['pos']=mapper.parse_coords(lat.replace(" ",""),lon.replace(" ","")) for item in page.get_by_regex(ur"FREQ MHZ"): freqline=page.get_lines(page.get_partially_in_rect(item.x1,item.y2+0.05,item.x2+20,item.y2+5))[0] print "Freqline:",freqline freq,=re.match(ur"(\d{3}\.\d{3}).*",freqline).groups() ad['freq']=float(freq) for item in page.get_by_regex(ur"ELEV FT \(M\)"): elevline=page.get_lines(page.get_partially_in_rect(item.x1,item.y2+0.05,item.x2+20,item.y2+5))[0] print "elevline:",elevline elev,=re.match(ur"(\d+)\s*\(\d+\)",elevline).groups() ad['elev']=int(elev) for icao,ad in ads.items(): assert ad['icao']==icao assert 'pos' in ad return ads.values()
def get_airspaces(routes): for routename, coords, altspec in routes: sig = getsig(coords, altspec) if (routename, sig) in predef: #pobj=None #for sroutename,ssig in predef: # if routename==sroutename: pobj = (routename, sig) width_nm, floor, ceiling, coordstr = predef[pobj] rawcoords = re.findall(ur"(\d{6}N)\s*(\d{7}E)", coordstr) coords = [ mapper.latlon2merc( mapper.from_str(mapper.parse_coords(lats, lons)), 13) for lats, lons in rawcoords ] width = float( mapper.approx_scale(coords[0], 13, 1.25 * width_nm)) try: outline = get_outline(coords, width) except Exception: uprint(u"Trouble parsing %s" % (routename, )) raise yield dict(name=routename, floor=floor, ceiling=ceiling, freqs=[], type="RNAV", points=[ mapper.to_str(mapper.merc2latlon(x, 13)) for x in outline ]) else: uprint( "Need to have predefine for route %s, with md5: %s" % (routename, sig)) uprint("Altitude, and points") raise Exception( 'Missing predef for route. Use: ("%s","%s") : ( 10,"FL 65","FL 95","""\n\n """),' % (routename, sig))
def load_finland(): zipname = getzip() zf = zipfile.ZipFile(zipname) areas = [] points = [] for fname in zf.namelist(): #print "File:",fname txt = zf.open(fname).read() if fname == "WaypointImport.txt": for row in txt.split("\n"): if row.strip() == "" or row.startswith("*"): continue #print repr(row) lat, lon, name = re.match(r"(\d+N) (\d+E)\s*(\w+)", row).groups() points.append( dict(name=unicode(name, 'utf8'), kind="sig. point", pos=mapper.parse_coords(lat, lon))) else: t = "TMA" if fname.count("D_Areas") or fname.count("TRA") or fname.count( "R_Areas"): t = "R" if fname.count("CTR"): t = "CTR" if fname.lower().count('finland_fir'): t = "FIR" areas.extend(list(parse_areas(txt, t))) for area in points: print "Point: %s: %s" % (area['name'], area['pos']) for area in areas: print "Area: %s - %s-%s: %s" % (area['name'], area['floor'], area['ceiling'], area['points']) return areas, points
def ee_parse_airfield(icao=None): spaces = [] ad = dict() ad["icao"] = icao sigpoints = [] p = parse.Parser("/ee_%s.pdf" % (icao,), lambda x: x, country="ee") page = p.parse_page_to_items(0) print icao nameregex = ur".*%s\s*[-−]\s*([A-ZÅÄÖ\- ]{3,})" % (icao,) for item in page.get_by_regex(nameregex): print "fontsize:", item.fontsize assert item.fontsize >= 10 ad["name"] = re.match(nameregex, item.text).groups()[0].strip() break else: raise Exception("Found no airfield name!") for item in page.get_by_regex(ur".*Kõrgus merepinnast.*"): lines = page.get_lines(page.get_partially_in_rect(0, item.y1 + 0.1, 100, item.y2 - 0.1)) for line in lines: ft, = re.match(".*?([\d\.]+)\s*FT\.*", line).groups() assert not "elev" in ad print "parsed ft:", ft ad["elev"] = float(ft) for item in page.get_by_regex(ur"ARP koordinaadid"): lines = page.get_lines(page.get_partially_in_rect(item.x1, item.y1, 100, item.y2)) for line in lines: print line for crd in mapper.parsecoords(line): assert not ("pos" in ad) ad["pos"] = crd break else: raise Exception("No coords") ad["runways"] = [] thrs = [] freqs = [] for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) print "Parsing page", pagenr for item in page.get_by_regex("\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*"): print "Phys char" coords, = page.get_by_regex_in_rect("RWY end coordinates", 0, item.y2, 100, 100) design, = page.get_by_regex_in_rect("Designations", 0, item.y2, 100, 100) lines = page.get_lines(page.get_partially_in_rect(0, design.y2, design.x2, 100)) print "Design", lines rwys = [] for line in lines: m = re.match("(\d{2})", line) if m: print "rwynum", line rwys.append((m.groups()[0], line.y1)) rwys.append((None, 100)) for (rwy, y), (nextrwy, nexty) in izip(rwys, rwys[1:]): lines = page.get_lines(page.get_partially_in_rect(coords.x1, y, coords.x2, nexty - 0.5)) lines = [line for line in lines if line.strip()] print "Lines for rwy", lines thrlat, thrlon, endlat, endlon, undulation = lines[:5] assert undulation.count("GUND") thrs.append(dict(pos=mapper.parse_coords(thrlat, thrlon), thr=rwy)) print thrs if 0: for item in page.get_by_regex("ATS AIRSPACE"): lines = iter(page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100))) spaces = [] while True: line = lines.next() # print "Read line:",line if line.count("Vertical limits"): break m = re.match(ur".*?/\s+Designation and lateral limits\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:?\s*$", line) if not m: m = re.match(ur"\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:", line) # print "Second try:",m spacename, = m.groups() # print "Got spacename:",spacename assert spacename.strip() != "" coords = [] while True: line = lines.next() # print "Further:",line if line.count("Vertical limits"): break if not re.search(ur"[\d ]+N\s*[\d ]+E", line) and not re.search( ur"circle|cent[red]{1,5}|pitkin|point", line ): break coords.append(line) areaspec = "".join(coords) def fixup(m): lat, lon = m.groups() return lat.replace(" ", "") + " " + lon.replace(" ", "") areaspec = re.sub(ur"([\d ]+N)\s*([\d ]+E)", fixup, areaspec) # print "Fixed areaspec",areaspec # if icao=="EFKS": # areaspec=areaspec.replace("6615 28N","661528N") # Error! REstriction areas! spaces.append(dict(name=spacename, type="CTR", points=mapper.parse_coord_str(areaspec))) if line.count("Vertical limits"): # print "Breaking" break while not line.count("Vertical limits"): line = lines.next() # print "Matching veritcal limits--------------------------------" oldspaces = spaces spaces = [] for space in oldspaces: if space["name"].count("/"): a, b = space["name"].split("/") spaces.append(dict(space, name=a.strip())) spaces.append(dict(space, name=b.strip())) else: spaces.append(space) missing = set([space["name"] for space in spaces]) while True: for space in spaces: # print "Matching ",space['name']," to ",line,"missing:",missing for it in xrange(2): cand = space["name"] if it == 1: if cand.count("CTR"): cand = "CTR" if cand.count("FIZ"): cand = "FIZ" m = re.match(ur".*%s\s*:([^,:-]*)\s*-\s*([^,:-]*)" % (cand,), line) if m: break if len(spaces) == 1 and not m: m = re.match(ur".*Vertical limits\s*(.*)\s*-\s*(.*)", line) if m: for lim in m.groups(): assert lim.count(",") == 0 space["floor"], space["ceiling"] = m.groups() missing.remove(space["name"]) # print "Missing:" if len(missing) == 0: break if len(missing) == 0: break line = lines.next() print "Parse f o n page", pagenr for item2 in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*"): lines = page.get_lines(page.get_partially_in_rect(0, item2.y2 + 0.1, 100, 100)) for line in lines: if line.count("RADIO NAVIGATION AND LANDING AIDS"): break print "Comm line:", line twr = re.match(ur"TWR.*(\d{3}\.\d{3})\b.*", line) if twr: freqs.append(("TWR", float(twr.groups()[0]))) atis = re.match(ur"ATIS.*(\d{3}\.\d{3})", line) if atis: freqs.append(("ATIS", float(atis.groups()[0])))
return open(blobpath).read(),cksum def get_chart_png(chartname,cksum): icaoprefix=chartname[0:4].upper() tmppath=os.path.join(os.getenv("SWFP_DATADIR"),"adcharts",icaoprefix) pngpath=os.path.join(tmppath,chartname+"."+cksum+".2.png") return open(pngpath).read() def get_width_height(chartname,cksum): icao=chartname[0:4] tmppath=os.path.join(os.getenv("SWFP_DATADIR"),"adcharts",icao) pngpath=os.path.join(tmppath,chartname+"."+cksum+".2.png") im = Image.open(pngpath) return im.size def get_timestamp(blobname,cksum,level): icao=blobname[0:4] tmppath=os.path.join(os.getenv("SWFP_DATADIR"),"adcharts",icao) path=os.path.join(tmppath,"%s.%s-%d.bin"%(blobname,cksum,level)) return os.path.getmtime(path) if __name__=='__main__': if len(sys.argv)>1: cur_icao=sys.argv[1] else: cur_icao='ESMQ' arppos=mapper.parse_coords("564108N","0161715E") ret=parse_landing_chart("/AIP/AD/AD 2/%s/ES_AD_2_%s_2_1_en.pdf"%(cur_icao,cur_icao),icao=cur_icao,arppos=arppos) print "returns",ret
def fi_parse_airfield(icao=None): spaces=[] ad=dict() assert icao!=None ad['icao']=icao sigpoints=[] #https://ais.fi/ais/eaip/pdf/aerodromes/EF_AD_2_EFET_EN.pdf #https://ais.fi/ais/eaip/aipcharts/efet/EF_AD_2_EFET_VAC.pdf #vacp=parse.Parser("/ais/eaip/aipcharts/%s/EF_AD_2_%s_VAC.pdf"%(icao.lower(),icao),lambda x: x,country="fi") def remove_italics(x): return x.replace("<i>","").replace("</i>","") p=parse.Parser("/ais/eaip/pdf/aerodromes/EF_AD_2_%s_EN.pdf"%(icao,),remove_italics,country="fi") #The following doesn't actually work, since finnish VAC are bitmaps!!! :-( if 0: vacpage=vacp.parse_page_to_items(0) repp=vacpage.get_by_regex("\s*REPORTING\s*POINTS\s*") assert len(repp)>0 for item in repp: lines=iter(page.get_lines(page.get_partially_in_rect(item.x1,item.y2+0.1,100,100))) for line in lines: uprint("Looking for reporting points:%s"%(line,)) name,lat,lon=re.match(ur"([A-ZÅÄÖ\s ]{3,})\s*([ \d]+N)\s*([ \d]+E).*",line) sigpoints.append(dict( name=icao+" "+name.strip(), kind="reporting", pos=mapper.parse_coords(lat.replace(" ",""),lon.replace(" ","")))) page=p.parse_page_to_items(0) nameregex=ur"%s\s+-\s+([A-ZÅÄÖ\- ]{3,})"%(icao,) for item in page.get_by_regex(nameregex): #print "fontsize:",item.fontsize assert item.fontsize>=14 ad['name']=re.match(nameregex,item.text).groups()[0].strip() break for item in page.get_by_regex(ur".*ELEV\s*/\s*REF.*"): lines=page.get_lines(page.get_partially_in_rect(0,item.y1+0.1,100,item.y2-0.1)) for line in lines: print "Line:",line ft,=re.match(".*ELEV.*([\d\.]+)\s*FT.*",line).groups() assert not 'elev' in ad ad['elev']=float(ft) for item in page.get_by_regex(ur"Mittapisteen.*sijainti"): lines=page.get_lines(page.get_partially_in_rect(item.x1,item.y1,100,item.y2)) for line in lines: for crd in mapper.parsecoords(line): assert not ('pos' in ad) ad['pos']=crd parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_ADC.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi' ) parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_VAC.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi',variant='VAC' ) parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_LDG.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi',variant='landing' ) parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_APDC.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi',variant='parking' ) aip_text_documents.help_parse_doc(ad,"/ais/eaip/pdf/aerodromes/EF_AD_2_%s_EN.pdf"%(icao.upper(),), icao,"fi",title="General Information",category="general") ad['runways']=[] thrs=[] freqs=[] for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) if page==None: continue for item in page.get_by_regex("\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*"): lines=page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100)) for line in lines: if re.match(ur"AD\s+2.13",line): break m=re.match(ur".*?(RWY END)?\s*\*?(\d{6}\.\d+N)\s*(\d{6,7}\.\d+E).*",line) if not m:continue rwyend,lat,lon=m.groups() rwytxts=page.get_lines(page.get_partially_in_rect(0,line.y1,12,line.y2)) print "Rwytxts:",rwytxts rwytxt,=rwytxts uprint("rwytext:",rwytxt) rwy,=re.match(ur"\s*(\d{2}[LRCM]?)\s*[\d.]*\s*",rwytxt).groups() have_thr=False for thr in thrs: if thr['thr']==rwy: have_thr=True if rwyend!=None and have_thr: continue thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy)) for item in page.get_by_regex("ATS AIRSPACE"): lines=iter(page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100))) spaces=[] line=lines.next() while True: while line.strip()=="": line=lines.next() print "Read line:",line if line.count("Vertical limits"): break m=re.match(ur".*?/\s+Designation and lateral limits\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:?\s*$",line) if not m: m=re.match(ur"\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:",line) #print "Second try:",m spacename,=m.groups() #print "Got spacename:",spacename assert spacename.strip()!="" coords=[] while True: line=lines.next() print "Further:",line if line.count("Vertical limits"): print "Breaking" break if not re.search(ur"[\d ]+N\s*[\d ]+E",line) and \ not re.search(ur"circle|cent[red]{1,5}|pitkin|point|equal\s*to",line): print "Breaking" break coords.append(line) areaspec="".join(coords) def fixup(m): lat,lon=m.groups() return lat.replace(" ","")+" "+lon.replace(" ","") areaspec=re.sub(ur"([\d ]+N)\s*([\d ]+E)",fixup,areaspec) areaspec=re.sub(ur"\(.*/\s*equal\s*to\s*Malmi\s*CTR\s*lateral\s*limits\)","",areaspec) #print "Fixed areaspec",areaspec #if icao=="EFKS": # areaspec=areaspec.replace("6615 28N","661528N") #Error! REstriction areas! spaces.append(dict( name=spacename, type="CTR", points=mapper.parse_coord_str(areaspec))) if line.count("Vertical limits"): #print "Breaking" break while not line.count("Vertical limits"): line=lines.next() #print "Matching veritcal limits--------------------------------" oldspaces=spaces spaces=[] for space in oldspaces: if space['name'].count("/"): a,b=space['name'].split("/") spaces.append(dict(space,name=a.strip())) spaces.append(dict(space,name=b.strip())) else: spaces.append(space) missing=set([space['name'] for space in spaces]) while True: for space in spaces: for it in xrange(3): cand=space['name'] if it==1: if cand.count("CTR"): cand="CTR" if cand.count("FIZ"): cand="FIZ" if it==2: if cand.count("CTR"): cand=r"CTR\s*/[\sA-Z]+" if cand.count("FIZ UPPER"): cand="FIZ UPPER" if cand.count("FIZ LOWER"): cand="FIZ LOWER" m=re.match(ur".*%s\s*:([^,:-]*)\s*-\s*([^,:-]*)"%(cand,),line) print "Matching ",cand," to ",line,"missing:",missing,m if m: break if len(spaces)==1 and not m: m=re.match(ur".*Vertical limits\s*(.*)\s*-\s*(.*)",line) if m: print "*****MATCH!!:::",m.groups() for lim in m.groups(): assert lim.count(",")==0 space['floor'],space['ceiling']=m.groups() missing.remove(space['name']) #print "Missing:" if len(missing)==0: break if len(missing)==0: break #print "Still missing:",missing line=lines.next() print "Parse f o n page",pagenr for item2 in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*"): lines=page.get_lines(page.get_partially_in_rect(0,item2.y2+0.1,100,100)) for line in lines: if line.count("RADIO NAVIGATION AND LANDING AIDS"): break print "Comm line:",line twr=re.match(ur"TWR.*(\d{3}\.\d{3})\b.*",line) if twr: freqs.append(('TWR',float(twr.groups()[0]))) atis=re.match(ur"ATIS.*(\d{3}\.\d{3})",line) if atis: freqs.append(('ATIS',float(atis.groups()[0])))
m = re.match("(\d{2})", line) if m: print "rwynum", line rwys.append((m.groups()[0], line.y1)) rwys.append((None, 100)) for (rwy, y), (nextrwy, nexty) in izip(rwys, rwys[1:]): lines = page.get_lines( page.get_partially_in_rect(coords.x1, y, coords.x2, nexty - 0.5)) lines = [line for line in lines if line.strip()] print "Lines for rwy", lines thrlat, thrlon, endlat, endlon, undulation = lines[:5] assert undulation.count("GUND") thrs.append( dict(pos=mapper.parse_coords(thrlat, thrlon), thr=rwy)) print thrs if 0: for item in page.get_by_regex("ATS AIRSPACE"): lines = iter( page.get_lines( page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100))) spaces = [] while True: line = lines.next() #print "Read line:",line if line.count("Vertical limits"): break m = re.match(
def parse_page(parser, pagenr): page = parser.parse_page_to_items(pagenr) items = page.items minx = min([item.x1 for item in items]) headings = [] majorre = ur"\s*([A-ZÅÄÖ ][A-ZÅÄÖ]{3,})\s+(?:TMA|MIL CTA)\s*(?:-.*)?$" minorre = ur"\s*(?:TMA|MIL CTA [SN]?)\s*[A-ZÅÄÖ ]*\s*" airwayre = ur"(AWY\s+EF\s+[-A-Z]+)" delegre = ur".*(Delegation\s+of\s+responsibility).*" for item in page.get_by_regex(majorre): m, = re.match(majorre, item.text).groups() assert m != None assert m.strip() != "" headings.append(('major', item.text.strip(), m, item)) for item in page.get_by_regex(airwayre): m, = re.match(airwayre, item.text).groups() assert m != None assert m.strip() != "" headings.append(('airway', item.text.strip(), m, item)) for item in page.get_by_regex(minorre): m = re.match(minorre, item.text).group() assert m != None assert m.strip() != "" #print "Heading %d: %s"%(item.y1,m) headings.append(('minor', item.text.strip(), m, item)) for item in page.get_by_regex(delegre): m, = re.match(delegre, item.text).groups() assert m != None assert m.strip() != "" headings.append(('deleg', item.text.strip(), m, item)) #print headings headings.sort(key=lambda x: x[3].y1) def findheadingfor(y, meta=None): minor = None major = None #print "HEadings:",headings for (kind, full, name, item) in reversed(headings): #print "Checking %s,%s (state: minor %s / major %s)"%(kind,item.y1,minor,major) if kind == 'airway' and item.y1 < y: return name, "airway" if kind == 'deleg' and item.y1 < y: return name, "deleg" if minor == None and kind == "minor" and item.y1 < y: minor = name.strip() if meta != None: meta['minor_y'] = item.y1 if major == None and kind == "major" and item.y1 < y: major = name.strip() fullname = full if meta != None: meta['major_y'] = item.y1 break assert major != None and major.strip() != "" if minor != None: return major + " " + minor, "area" return fullname, "area" cury = 0 coordstrs = page.get_by_regex(ur".*\d{6}N \d{7}E.*") airway_width = None airway_vlim = None for item in page.get_partially_in_rect(0, 0, 100, 15): if item.text.upper().count("WID NM"): airway_width = (item.x1, item.x2) if item.text.lower().count("vertical limits"): airway_vlim = (item.x1, item.x2) out = [] atsout = [] while True: found = False #print "Looking for coords, y= %d"%(cury,) for titem in coordstrs: #print "Considering coordstr: ",titem.y1 if titem.y1 <= cury: continue if titem.x1 < 40: item = titem found = True break if not found: break cury = item.y1 headmeta = dict() name, hkind = findheadingfor(item.y1, headmeta) if hkind == 'airway': assert airway_width and airway_vlim lines = page.get_lines(page.get_partially_in_rect( 0, cury, minx + 35, 100), order_fudge=6) y1 = cury y2 = 100 coordlines = [] for idx, line in enumerate(lines): if line.count("AWY") and line.count("EF"): y2 = line.y1 break coordlines.append(line.strip()) coordstr = " ".join(coordlines) inpoints = [ mapper.parse_coords(lat, lon) for lat, lon in re.findall(r"(\d+N) (\d+E)", coordstr) ] for wcand in page.get_partially_in_rect(airway_width[0], y1 + 0.05, airway_width[1], y2 - 0.05): width_nm = float( re.match(r"(\d+\.?\d*)", wcand.text).groups()[0]) elevs = [] for vcand in page.get_partially_in_rect(airway_vlim[0], y1 + 0.05, airway_vlim[1], y2 - 0.05): elevs.append(re.match(r"(FL\s*\d+)", vcand.text).groups()[0]) elevs.sort(key=lambda x: mapper.parse_elev(x)) floor, ceiling = elevs atsout.append( dict(floor=floor, ceiling=ceiling, freqs=[], type="RNAV", name=name, points=ats_routes.get_latlon_outline(inpoints, width_nm))) cury = y2 continue elif hkind == 'deleg': y2 = cury + 1 continue else: areaspec = [] #print "Rect: ",0,cury,minx+35,100 y1 = cury lines = page.get_lines(page.get_partially_in_rect( 0, cury, minx + 35, 100), order_fudge=10) for idx, line in enumerate(lines): if re.search(ur"FL \d+", line) or line.count("FT MSL"): vertidx = idx break #print "Line:",line.encode('utf8') if line.strip() == "": vertidx = idx break cury = max(cury, line.y2 + 0.5) line = line.replace(u"–", "-") if not (line.endswith("-") or line.endswith(" ")): line += " " areaspec.append(line) verts = [] for idx in xrange(vertidx, len(lines)): #print "Looking for alt:",lines[idx],"y2:",lines[idx].y2 m = re.search(ur"(FL\s+\d+)", lines[idx].strip()) if m: verts.append((m.groups()[0], lines[idx].y1)) m = re.search(ur"(\d+ FT (?:MSL|GND|SFC))", lines[idx].strip()) if m: verts.append((m.groups()[0], lines[idx].y1)) if len(verts) >= 2: break y2 = verts[-1][1] freqs = [] for attempt in xrange(2): for freqcand in page.get_by_regex(ur".*\d{3}\.\d{3}.*"): #print "headmeta:",headmeta #print "attempt:",attempt #print "freqy1:",freqcand.y1 if freqcand.x1 < 30: continue if attempt == 0: if freqcand.y1 < y1: continue else: if 'major_y' in headmeta: if freqcand.y1 < headmeta['major_y']: continue else: if freqcand.y1 < y1: continue if freqcand.y1 > y2: continue x, y = freqcand.x1, freqcand.y1 lines = page.get_lines( page.get_partially_in_rect(x + 0.1, y - 10, x + 5, y - 0.1)) freq, = re.match(ur".*(\d{3}\.\d{3}).*", freqcand.text).groups() fname = None for line in reversed(lines): if re.match(ur"[A-ZÅÄÖ ]{3,}", line): #print "freqname Matched:",line fname = line.strip() break if not fname: raise Exception("Found no frequency name for freq: " + freq) freqs.append((fname, float(freq)))
print "FC",fc if not fc.count("Designations"): break #skip table if idx<2:continue if len(tr.getchildren())==1:continue print "c:",tr.getchildren(),alltexts(tr.getchildren()) desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren() rwy=re.match(r"(\d{2}[LRC]?)",alltext(desig)) altc=alltext(thrcoord) print "Matching",altc print "rwymatch:",alltext(desig) m=re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",altc,re.DOTALL|re.MULTILINE) if m: lat,lon=m.groups() print "Got latlon",lat,lon thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy.groups()[0])) addummy=dict() for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("CHARTS"): par=h4.getparent() for table in par.xpath(".//table"): prevnametxt="" for idx,tr in enumerate(table.xpath(".//tr")): namepage=tr nametxt=alltext(tr) print "nametxt:",nametxt,"link:"
def parse_page(parser,pagenr): page=parser.parse_page_to_items(pagenr) items=page.items minx=min([item.x1 for item in items]) headings=[] majorre=ur"\s*([A-ZÅÄÖ ][A-ZÅÄÖ]{3,})\s+(?:TMA|MIL CTA)\s*(?:-.*)?$" minorre=ur"\s*(?:TMA|MIL CTA [SN]?)\s*[A-ZÅÄÖ ]*\s*" airwayre=ur"(AWY\s+EF\s+[-A-Z]+)" delegre=ur".*(Delegation\s+of\s+responsibility).*" for item in page.get_by_regex(majorre): m,=re.match(majorre,item.text).groups() assert m!=None assert m.strip()!="" headings.append(('major',item.text.strip(),m,item)) for item in page.get_by_regex(airwayre): m,=re.match(airwayre,item.text).groups() assert m!=None assert m.strip()!="" headings.append(('airway',item.text.strip(),m,item)) for item in page.get_by_regex(minorre): m=re.match(minorre,item.text).group() assert m!=None assert m.strip()!="" #print "Heading %d: %s"%(item.y1,m) headings.append(('minor',item.text.strip(),m,item)) for item in page.get_by_regex(delegre): m,=re.match(delegre,item.text).groups() assert m!=None assert m.strip()!="" headings.append(('deleg',item.text.strip(),m,item)) #print headings headings.sort(key=lambda x:x[3].y1) def findheadingfor(y,meta=None): minor=None major=None #print "HEadings:",headings for (kind,full,name,item) in reversed(headings): #print "Checking %s,%s (state: minor %s / major %s)"%(kind,item.y1,minor,major) if kind=='airway' and item.y1<y: return name,"airway" if kind=='deleg' and item.y1<y: return name,"deleg" if minor==None and kind=="minor" and item.y1<y: minor=name.strip() if meta!=None: meta['minor_y']=item.y1 if major==None and kind=="major" and item.y1<y: major=name.strip() fullname=full if meta!=None: meta['major_y']=item.y1 break assert major!=None and major.strip()!="" if minor!=None: return major+" "+minor,"area" return fullname,"area" cury=0 coordstrs=page.get_by_regex(ur".*\d{6}N \d{7}E.*") airway_width=None airway_vlim=None for item in page.get_partially_in_rect(0,0,100,15): if item.text.upper().count("WID NM"): airway_width=(item.x1,item.x2) if item.text.lower().count("vertical limits"): airway_vlim=(item.x1,item.x2) out=[] atsout=[] while True: found=False #print "Looking for coords, y= %d"%(cury,) for titem in coordstrs: #print "Considering coordstr: ",titem.y1 if titem.y1<=cury: continue if titem.x1<40: item=titem found=True break if not found: break cury=item.y1 headmeta=dict() name,hkind=findheadingfor(item.y1,headmeta) if hkind=='airway': assert airway_width and airway_vlim lines=page.get_lines(page.get_partially_in_rect(0,cury,minx+35,100),order_fudge=6) y1=cury y2=100 coordlines=[] for idx,line in enumerate(lines): if line.count("AWY") and line.count("EF"): y2=line.y1 break coordlines.append(line.strip()) coordstr=" ".join(coordlines) inpoints=[mapper.parse_coords(lat,lon) for lat,lon in re.findall(r"(\d+N) (\d+E)",coordstr)] for wcand in page.get_partially_in_rect(airway_width[0],y1+0.05,airway_width[1],y2-0.05): width_nm=float(re.match(r"(\d+\.?\d*)",wcand.text).groups()[0]) elevs=[] for vcand in page.get_partially_in_rect(airway_vlim[0],y1+0.05,airway_vlim[1],y2-0.05): elevs.append(re.match(r"(FL\s*\d+)",vcand.text).groups()[0]) elevs.sort(key=lambda x:mapper.parse_elev(x)) floor,ceiling=elevs atsout.append(dict( floor=floor, ceiling=ceiling, freqs=[], type="RNAV", name=name, points=ats_routes.get_latlon_outline(inpoints, width_nm))) cury=y2 continue elif hkind=='deleg': y2=cury+1 continue else: areaspec=[] #print "Rect: ",0,cury,minx+35,100 y1=cury lines=page.get_lines(page.get_partially_in_rect(0,cury,minx+35,100),order_fudge=10) for idx,line in enumerate(lines): if re.search(ur"FL \d+",line) or line.count("FT MSL"): vertidx=idx break #print "Line:",line.encode('utf8') if line.strip()=="": vertidx=idx break cury=max(cury,line.y2+0.5) line=line.replace(u"–","-") if not (line.endswith("-") or line.endswith(" ")): line+=" " areaspec.append(line) verts=[] for idx in xrange(vertidx,len(lines)): #print "Looking for alt:",lines[idx],"y2:",lines[idx].y2 m=re.search(ur"(FL\s+\d+)",lines[idx].strip()) if m: verts.append((m.groups()[0],lines[idx].y1)) m=re.search(ur"(\d+ FT (?:MSL|GND|SFC))",lines[idx].strip()) if m: verts.append((m.groups()[0],lines[idx].y1)) if len(verts)>=2: break y2=verts[-1][1] freqs=[] for attempt in xrange(2): for freqcand in page.get_by_regex(ur".*\d{3}\.\d{3}.*"): #print "headmeta:",headmeta #print "attempt:",attempt #print "freqy1:",freqcand.y1 if freqcand.x1<30: continue if attempt==0: if freqcand.y1<y1: continue else: if 'major_y' in headmeta: if freqcand.y1<headmeta['major_y']: continue else: if freqcand.y1<y1: continue if freqcand.y1>y2: continue x,y=freqcand.x1,freqcand.y1 lines=page.get_lines(page.get_partially_in_rect(x+0.1,y-10,x+5,y-0.1)) freq,=re.match(ur".*(\d{3}\.\d{3}).*",freqcand.text).groups() fname=None for line in reversed(lines): if re.match(ur"[A-ZÅÄÖ ]{3,}",line): #print "freqname Matched:",line fname=line.strip() break if not fname: raise Exception("Found no frequency name for freq: "+freq) freqs.append((fname,float(freq))) if len(freqs): break
def get_notam_objs(kind=None): notamupdates = meta.Session.query(NotamUpdate).filter( NotamUpdate.disappearnotam == sa.null()).all() obstacles = [] others = [] spaces = [] areas = [] for u in notamupdates: text = u.text.strip() if text.count("W52355N0234942E"): text = text.replace("W52355N0234942E", "652355N0234942E") coordgroups = [] for line in text.split("\n"): dig = False for char in line: if char.isdigit(): dig = True if dig == False: if len(coordgroups) and coordgroups[-1] != "": coordgroups.append("") else: if len(coordgroups) == 0: coordgroups = [""] coordgroups[-1] += line + "\n" if (kind == None or kind == "notamarea"): for radius, unit, lat, lon in chain( re.findall( r"RADIUS\s*(?:OF)?\s*(\d+)\s*(NM|M)\s*(?:CENT[ERD]+|FR?O?M)?\s*(?:ON)?\s*(?:AT)?\s*(\d+[NS])\s*(\d+[EW])", text), re.findall( r"(\d+)\s*(NM|M)\s*RADIUS\s*(?:CENT[ERD]+)?\s*(?:ON|AT|FROM)?\s*(\d+[NS])\s*(\d+[EW])", text), re.findall( r"(\d+)\s*(NM|M)\s*RADIUS.*?[^0-9](\d+[NS])\s*(\d+[EW])", text, re.DOTALL)): try: radius = float(radius) if unit == "M": radius = radius / 1852.0 else: assert unit == "NM" centre = mapper.parse_coords(lat, lon) coords = mapper.create_circle(centre, radius) areas.append( dict(points=coords, kind="notamarea", name=text, type="notamarea", notam_ordinal=u.appearnotam, notam_line=u.appearline, notam=text)) except Exception, cause: print "Invalid notam coords: %s,%s" % (lat, lon) for coordgroup in coordgroups: try: coords = list(mapper.parse_lfv_area(coordgroup, False)) except Exception, cause: print "Parsing,", coordgroup print "Exception parsing lfv area from notam:%s" % (cause, ) coords = [] if len(coords) == 0: continue if text.count("OBST") and (kind == None or kind == "obstacle"): elevs = re.findall(r"ELEV\s*(\d+)\s*FT", text) elevs = [int(x) for x in elevs if x.isdigit()] if len(elevs) != 0: elev = max(elevs) for coord in coords: obstacles.append( dict(pos=coord, elev=elev, elevf=mapper.parse_elev(elev), kind='notam', notam_ordinal=u.appearnotam, notam_line=u.appearline, name=text.split("\n")[0], notam=text)) continue couldbearea = True if len(coords) <= 2: couldbearea = False if text.count("PSN") >= len(coords) - 2: couldbearea = False if couldbearea == False and (kind == None or kind == "notam"): for coord in coords: others.append( dict(pos=coord, kind='notam', name=text, notam_ordinal=u.appearnotam, notam_line=u.appearline, notam=text)) if couldbearea == True and (kind == None or kind == "notamarea"): if len(coords) > 2: if text.startswith("AREA: "): continue #These aren't real notams, they're area-specifications for all other notams... make this better some day. areas.append( dict(points=coords, kind="notamarea", name=text, type="notamarea", notam_ordinal=u.appearnotam, notam_line=u.appearline, notam=text))
def ee_parse_airfields2(): ads=[] spaces=[] airac_date=get_airac_date() print "airac",airac_date overview_url="/%s/html/eAIP/EE-AD-0.6-en-GB.html"%(airac_date,) parser=lxml.html.HTMLParser() data,date=fetchdata.getdata(overview_url,country='ee') parser.feed(data) tree=parser.close() icaos=[] for cand in tree.xpath(".//h3"): txts=alltexts(cand.xpath(".//a")) aps=re.findall(r"EE[A-Z]{2}"," ".join(txts)) if aps: icao,=aps if alltext(cand).count("HELIPORT"): print "Ignore heliport",icao continue icaos.append(icao) for icao in icaos: ad=dict(icao=icao) url="/%s/html/eAIP/EE-AD-2.%s-en-GB.html"%(airac_date,icao) data,date=fetchdata.getdata(url,country='ee') parser.feed(data) tree=parser.close() thrs=[] for h3 in tree.xpath(".//h3"): txt=alltext(h3) print repr(txt) ptrn=ur"\s*%s\s+[—-]\s+(.*)"%(unicode(icao.upper()),) m=re.match(ptrn,txt,re.UNICODE) if m: assert not 'name' in ad ad['name']=m.groups()[0] for tr in tree.xpath(".//tr"): txt=alltext(tr) m=re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",txt) #print "Matching,",txt,":",m if m: crds,=m.groups() ad['pos']=mapper.anyparse(crds) space=dict() for table in tree.xpath(".//table"): for tr in table.xpath(".//tr"): trtxt=alltext(tr) if trtxt.count("Designation and lateral limits"): space=dict() coords=tr.getchildren()[2] lines=alltext(coords).split("\n") if lines[0].strip()=='NIL': continue zname,what,spill=re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",lines[0]).groups() if spill and spill.strip(): rest=[spill]+lines[1:] else: rest=lines[1:] what=what.strip() assert ad['name'].upper().strip().count(zname.upper().strip()) assert what in ['FIZ','TIZ','CTR'] space['type']=what space['points']=mapper.parse_coord_str("\n".join(rest)) space['name']=zname+" "+what space['date']=date space['url']=fetchdata.getrawurl(url,'ee') if trtxt.count("Vertical limits"): vlim=alltext(tr.getchildren()[2]) if vlim.strip()=='NIL': continue space['floor'],space['ceiling']=vlim.split(" to ") #space['freqs']=x #hlc=False for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.lower().count("charts"): par=h4.getparent() for table in par.xpath(".//table"): for idx,tr in enumerate(table.xpath(".//tr")): name,page=\ tr.getchildren() nametxt=alltext(name) print "nametxt:",nametxt,"link:" for reg,variant in [ (r"Aerodrome.*Chart.*","") , (r"Landing.*Chart.*","landing"), (r".*Parking.*Chart.*","parking"), (r".*Visual.*Approach.*|.*\bVAC\b.*","vac") ]: if re.match(reg,nametxt): for a in page.xpath(".//a"): print "linklabel",a.text print "attrib:",a.attrib href=a.attrib['href'] print "Bef repl",href if href.lower().endswith("pdf"): href=href.replace("../../graphics","/%s/graphics"%(airac_date,)) print "href:",href,airac_date assert href parse_landing_chart.help_plc(ad,href, icao,ad['pos'],"ee",variant=variant) """arp=ad['pos'] lc=parse_landing_chart.parse_landing_chart( href, icao=icao, arppos=arp,country="ee") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc hlc=True #chartblobnames.append(lc['blobname']) """ #assert hlc for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("RUNWAY PHYSICAL"): par=h4.getparent() for table in par.xpath(".//table"): prevnametxt="" for idx,tr in enumerate(table.xpath(".//tr")): if idx==0: fc=alltext(tr.getchildren()[0]) print "FC",fc if not fc.count("Designations"): break #skip table if idx<2:continue if len(tr.getchildren())==1:continue print "c:",tr.getchildren(),alltexts(tr.getchildren()) desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren() rwy=re.match(r"(\d{2}[LRC]?)",alltext(desig)) altc=alltext(thrcoord) print "Matching",altc print "rwymatch:",alltext(desig) m=re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",altc,re.DOTALL|re.MULTILINE) if m: lat,lon=m.groups() print "Got latlon",lat,lon thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy.groups()[0])) space['freqs']=[] for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("ATS COMMUNICATION"): par=h4.getparent() for table in par.xpath(".//table"): for idx,tr in enumerate(table.xpath(".//tr")): print "cs",repr(tr.getchildren()),alltexts(tr.getchildren()) print len(tr.getchildren()) if len(tr.getchildren())!=5: if "".join(alltexts(tr.getchildren())).count(u"EMERG"): continue #Sometimes emergency freq is listed, and then it is without callsign service,callsign,frequency,hours,remarks=\ tr.getchildren() callsigntxt=alltext(callsign) if idx<2: if idx==0: assert callsigntxt.strip()=="Call sign" if idx==1: assert callsigntxt.strip()=="2" continue ftext=alltext(frequency) print "matching freq",ftext for freq in re.findall(ur"\b\d{3}\.\d{1,3}",ftext): freqmhz=float(freq) space['freqs'].append((callsigntxt.strip(),freqmhz)) if space and 'points' in space: assert 'freqs' in space assert 'points' in space assert 'floor' in space assert 'ceiling' in space assert 'type' in space spaces.append(space) if thrs: ad['runways']=rwy_constructor.get_rwys(thrs) aip_text_documents.help_parse_doc(ad,url, icao,"ee",title="General Information",category="general") ad['date']=date ad['url']=fetchdata.getrawurl(url,'ee') print "AD:",ad assert 'pos' in ad assert 'name' in ad ads.append(ad)
def extract_airfields(filtericao=lambda x:True,purge=True): #print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf") ads=[] p=Parser("/AIP/AD/AD 1/ES_AD_1_1_en.pdf") points=dict() startpage=None for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) if page.count("Aerodrome directory"): startpage=pagenr break if startpage==None: raise Exception("Couldn't find aerodrome directory in file") #print "Startpage: %d"%(startpage,) #nochartf=open("nochart.txt","w") for pagenr in xrange(startpage,p.get_num_pages()): row_y=[] page=p.parse_page_to_items(pagenr) allines=[x for x in (page.get_lines(page.get_partially_in_rect(0,0,15,100))) if x.strip()] for item,next in zip(allines,allines[1:]+[""]): #print "item:",item m=re.match(ur"^\s*[A-ZÅÄÖ]{3,}(?:/.*)?\b.*",item) if m: #print "Candidate, next is:",next if re.match(r"^\s*[A-Z]{4}\b.*",next): #print "Matched:",item #print "y1:",item.y1 row_y.append(item.y1) for y1,y2 in zip(row_y,row_y[1:]+[100.0]): #print "Extacting from y-range: %f-%f"%(y1,y2) items=list(page.get_partially_in_rect(0,y1-0.25,5.0,y2+0.25,ysort=True)) if len(items)>=2: #print "Extract items",items ad=dict(name=unicode(items[0].text).strip(), icao=unicode(items[1].text).strip() ) #print "Icao:",ad['icao'] assert re.match(r"[A-Z]{4}",ad['icao']) if not filtericao(ad): continue if len(items)>=3: #print "Coord?:",items[2].text m=re.match(r".*(\d{6}N)\s*(\d{7}E).*",items[2].text) if m: lat,lon=m.groups() ad['pos']=parse_coords(lat,lon) #print "Items3:",items[3:] elev=re.findall(r"(\d{1,5})\s*ft"," ".join(t.text for t in items[3:])) #print "Elev:",elev assert len(elev)==1 ad['elev']=int(elev[0]) ads.append(ad) big_ad=set() for ad in ads: if not ad.has_key('pos'): big_ad.add(ad['icao']) for ad in ads: icao=ad['icao'] if icao in big_ad: if icao in ['ESIB','ESNY','ESCM','ESPE']: continue try: p=Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf"%(icao,icao)) except: p=Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf"%(icao,icao)) ad['aipvacurl']=p.get_url() for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) """ for altline in exitlines: m=re.match(r"(\w+)\s+(\d+N)\s*(\d+E.*)",altline) if not m: continue name,lat,lon=m.groups() try: coord=parse_coords(lat,lon) except Exception: continue points.append(dict(name=name,pos=coord)) """ for kind in xrange(2): if kind==0: hits=page.get_by_regex(r"H[Oo][Ll][Dd][Ii][Nn][Gg]") kind="holding point" if kind==1: hits=page.get_by_regex(r"[Ee]ntry.*[Ee]xit.*point") kind="entry/exit point" if len(hits)==0: continue for holdingheading in hits: items=sorted(page.get_partially_in_rect(holdingheading.x1+2.0,holdingheading.y2+0.1,holdingheading.x1+0.5,100), key=lambda x:x.y1) items=[x for x in items if not x.text.startswith(" ")] #print "Holding items:",items for idx,item in enumerate(items): print "Holding item",item y1=item.y1 if idx==len(items)-1: y2=100 else: y2=items[idx+1].y1 items2=[x for x in page.get_partially_in_rect(item.x1+1,y1+0.3,item.x1+40,y2-0.1) if x.x1>=item.x1-0.25 and x.y1>=y1-0.05 and x.y1<y2-0.05] s=(" ".join(page.get_lines(items2))).strip() print "Holding lines:",repr(page.get_lines(items2)) #if s.startswith("ft Left/3"): #Special case for ESOK # s,=re.match("ft Left/3.*?([A-Z]{4,}.*)",s).groups() #m=re.match("ft Left/\d+.*?([A-Z]{4,}.*)",s) #if m: # s,=m.groups() if s.startswith("LjUNG"): #Really strange problem with ESCF s=s[0]+"J"+s[2:] if s.lower().startswith("holding"): sl=s.split(" ",1) if len(sl)>1: s=sl[1] s=s.strip() if kind=="entry/exit point" and s.startswith("HOLDING"): continue #reached HOLDING-part of VAC #Check for other headings #Fixup strange formatting of points in some holding items: (whitespace between coord and 'E') s=re.sub(ur"(\d+)\s*(N)\s*(\d+)\s*(E)",lambda x:"".join(x.groups()),s) m=re.match(r"([A-Z]{2,}).*?(\d+N)\s*(\d+E).*",s) if not m: m=re.match(r".*?(\d+N)\s*(\d+E).*",s) if not m: continue assert m lat,lon=m.groups() #skavsta if icao=="ESKN": if s.startswith(u"Hold north of T"): name="NORTH" elif s.startswith(u"Hold south of B"): name="SOUTH" else: assert 0 #add more specials here else: continue else: name,lat,lon=m.groups() try: coord=parse_coords(lat,lon) except Exception: print "Couldn't parse:",lat,lon continue #print name,lat,lon,mapper.format_lfv(*mapper.from_str(coord)) if name.count("REMARK") or len(name)<=2: print "Suspicious name: ",name #sys.exit(1) continue points[icao+' '+name]=dict(name=icao+' '+name,icao=icao,pos=coord,kind=kind) #for point in points.items(): # print point #sys.exit(1) def fixhex11(s): out=[] for c in s: i=ord(c) if i>=0x20: out.append(c) continue if i in [0x9,0xa,0xd]: out.append(c) continue out.append(' ') return "".join(out) for ad in ads: icao=ad['icao'] if icao in big_ad: #print "Parsing ",icao p=Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf"%(icao,icao),loadhook=fixhex11) ad['aiptexturl']=p.get_url() firstpage=p.parse_page_to_items(0) te="\n".join(firstpage.get_all_lines()) #print te coords=re.findall(r"ARP.*(\d{6}N)\s*(\d{7}E)",te) if len(coords)>1: raise Exception("First page of airport info (%s) does not contain exactly ONE set of coordinates"%(icao,)) if len(coords)==0: print "Couldn't find coords for ",icao #print "Coords:",coords ad['pos']=parse_coords(*coords[0]) elev=re.findall(r"Elevation.*?(\d{1,5})\s*ft",te,re.DOTALL) if len(elev)>1: raise Exception("First page of airport info (%s) does not contain exactly ONE elevation in ft"%(icao,)) if len(elev)==0: print "Couldn't find elev for ",icao ad['elev']=int(elev[0]) freqs=[] found=False thrs=[] #uprint("-------------------------------------") for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) #uprint("Looking on page %d"%(pagenr,)) if 0: #opening hours are no longer stored in a separate document for any airports. No need to detect which any more (since none are). for item in page.get_by_regex(".*OPERATIONAL HOURS.*"): lines=page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100)) for line in lines: things=["ATS","Fuelling","Operating"] if not line.count("AIP SUP"): continue for thing in things: if line.count(thing): ad['aipsup']=True for item in page.get_by_regex(".*\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*.*"): #uprint("Physical char on page") lines=page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100)) seen_end_rwy_text=False for line,nextline in izip(lines,lines[1:]+[None]): #uprint("MAtching: <%s>"%(line,)) if re.match(ur"AD\s+2.13",line): break if line.count("Slope of"): break if line.lower().count("end rwy:"): seen_end_rwy_text=True if line.lower().count("bgn rwy:"): seen_end_rwy_text=True m=re.match(ur".*(\d{6}\.\d+)[\s\(\)\*]*(N).*",line) if not m:continue m2=re.match(ur".*(\d{6,7}\.\d+)\s*[\s\(\)\*]*(E).*",nextline) if not m2:continue latd,n=m.groups() lond,e=m2.groups() assert n=="N" assert e=="E" lat=latd+n lon=lond+e rwytxts=page.get_lines(page.get_partially_in_rect(0,line.y1+0.05,12,nextline.y2-0.05)) uprint("Rwytxts:",rwytxts) rwy=None for rwytxt in rwytxts: #uprint("lat,lon:%s,%s"%(lat,lon)) #uprint("rwytext:",rwytxt) m=re.match(ur"\s*(\d{2}[LRCM]?)\b.*",rwytxt) if m: assert rwy==None rwy=m.groups()[0] if rwy==None and seen_end_rwy_text: continue print "Cur airport:",icao already=False assert rwy!=None seen_end_rwy_text=False for thr in thrs: if thr['thr']==rwy: raise Exception("Same runway twice on airfield:"+icao) thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy)) assert len(thrs)>=2 for pagenr in xrange(0,p.get_num_pages()): page=p.parse_page_to_items(pagenr) matches=page.get_by_regex(r".*ATS\s+COMMUNICATION\s+FACILITIES.*") #print "Matches of ATS COMMUNICATION FACILITIES on page %d: %s"%(pagenr,matches) if len(matches)>0: commitem=matches[0] curname=None callsign=page.get_by_regex_in_rect(ur"Call\s*sign",0,commitem.y1,100,commitem.y2+8)[0] for idx,item in enumerate(page.get_lines(page.get_partially_in_rect(callsign.x1-0.5,commitem.y1,100,100),fudge=0.3,order_fudge=15)): if item.strip()=="": curname=None if re.match(".*RADIO\s+NAVIGATION\s+AND\s+LANDING\s+AIDS.*",item): break #print "Matching:",item m=re.match(r"(.*?)\s*(\d{3}\.\d{1,3})\s*MHz.*",item) #print "MHZ-match:",m if not m: continue #print "MHZ-match:",m.groups() who,sfreq=m.groups() freq=float(sfreq) if abs(freq-121.5)<1e-4: if who.strip(): curname=who continue #Ignore emergency frequency, it is understood if not who.strip(): if curname==None: continue else: curname=who freqs.append((curname.strip().rstrip("/"),freq)) for pagenr in xrange(0,p.get_num_pages()): page=p.parse_page_to_items(pagenr) matches=page.get_by_regex(r".*ATS\s*AIRSPACE.*") #print "Matches of ATS_AIRSPACE on page %d: %s"%(pagenr,matches) if len(matches)>0: heading=matches[0] desigitem,=page.get_by_regex("Designation and lateral limits") vertitem,=page.get_by_regex("Vertical limits") airspaceclass,=page.get_by_regex("Airspace classification") lastname=None subspacelines=dict() subspacealts=dict() for idx,item in enumerate(page.get_lines(page.get_partially_in_rect(desigitem.x2+1,desigitem.y1,100,vertitem.y1-1))): if item.count("ATS airspace not established"): assert idx==0 break if item.strip()=="": continue m=re.match(r"(.*?)(\d{6}N\s+.*)",item) if m: name,coords=m.groups() name=name.strip() else: name=item.strip() coords=None if name: lastname=name if coords: subspacelines.setdefault(lastname,[]).append(coords) assert lastname lastname=None #print "Spaces:",subspacelines #print "ICAO",ad['icao'] #altlines=page.get_lines(page.get_partially_in_rect(vertitem.x2+1,vertitem.y1,100,airspaceclass.y1-0.2)) #print "Altlines:",altlines subspacealts=dict() subspacekeys=subspacelines.keys() allaltlines=" ".join(page.get_lines(page.get_partially_in_rect(vertitem.x1+0.5,vertitem.y1+0.5,100,airspaceclass.y1-0.2))) single_vertlim=False totalts=list(mapper.parse_all_alts(allaltlines)) #print "totalts:",totalts if len(totalts)==2: single_vertlim=True for subspacename in subspacekeys: ceil=None floor=None subnames=[subspacename] if subspacename.split(" ")[-1].strip() in ["TIA","TIZ","CTR","CTR/TIZ"]: subnames.append(subspacename.split(" ")[-1].strip()) #print "Parsing alts for ",subspacename,subnames try: for nametry in subnames: if single_vertlim: #there's only one subspace, parse all of vertical limits field for this single one. items=[vertitem] else: items=page.get_by_regex_in_rect(nametry,vertitem.x2+1,vertitem.y1,100,airspaceclass.y1-0.2) for item in items: alts=[] for line in page.get_lines(page.get_partially_in_rect(item.x1+0.5,item.y1+0.5,100,airspaceclass.y1-0.2)): #print "Parsing:",line line=line.replace(nametry,"").lower().strip() parsed=list(mapper.parse_all_alts(line)) if len(parsed): alts.append(mapper.altformat(*parsed[0])) if len(alts)==2: break if alts: #print "alts:",alts ceil,floor=alts raise StopIteration except StopIteration: pass assert ceil and floor subspacealts[subspacename]=dict(ceil=ceil,floor=floor) spaces=[] for spacename in subspacelines.keys(): altspacename=spacename #print "Altspacename: %s, subspacesalts: %s"%(altspacename,subspacealts) space=dict( name=spacename, ceil=subspacealts[altspacename]['ceil'], floor=subspacealts[altspacename]['floor'], points=parse_coord_str(" ".join(subspacelines[spacename])), freqs=list(set(freqs)) ) if True: vs=[] for p in space['points']: x,y=mapper.latlon2merc(mapper.from_str(p),13) vs.append(Vertex(int(x),int(y))) p=Polygon(vvector(vs)) if p.calc_area()<=30*30: pass#print space pass#print "Area:",p.calc_area() assert p.calc_area()>30*30 #print "Area: %f"%(p.calc_area(),) spaces.append(space) #print space ad['spaces']=spaces found=True if found: break assert found ad['runways']=rwy_constructor.get_rwys(thrs) #Now find any ATS-airspace chartblobnames=[] for ad in ads: icao=ad['icao'] if icao in big_ad: parse_landing_chart.help_plc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_2-1_en.pdf"%(icao,icao), icao,ad['pos'],"se",variant="") parse_landing_chart.help_plc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf"%(icao,icao), icao,ad['pos'],"se",variant="vac") parse_landing_chart.help_plc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_2-3_en.pdf"%(icao,icao), icao,ad['pos'],"se",variant="parking") #aip_text_documents.help_parse_doc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf"%(icao,icao), # icao,"se",title="General Information",category="general") aip_text_documents.help_parse_doc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf"%(icao,icao), icao,"se",title="General Information",category="general") #if purge: # parse_landing_chart.purge_old(chartblobnames,country="se") #sys.exit(1) for extra in extra_airfields.extra_airfields: if filtericao(extra): ads.append(extra) print print for k,v in sorted(points.items()): print k,v,mapper.format_lfv(*mapper.from_str(v['pos'])) #print "Num points:",len(points) origads=list(ads) for flygkartan_id,name,lat,lon,dummy in csv.reader(open("fplan/extract/flygkartan.csv"),delimiter=";"): found=None lat=float(lat) lon=float(lon) if type(name)==str: name=unicode(name,'utf8') mercf=mapper.latlon2merc((lat,lon),13) for a in origads: merca=mapper.latlon2merc(mapper.from_str(a['pos']),13) dist=math.sqrt((merca[0]-mercf[0])**2+(merca[1]-mercf[1])**2) if dist<120: found=a break if found: found['flygkartan_id']=flygkartan_id else: d=dict( icao='ZZZZ', name=name, pos=mapper.to_str((lat,lon)), elev=int(get_terrain_elev((lat,lon))), flygkartan_id=flygkartan_id) if filtericao(d): ads.append(d) minor_ad_charts=extra_airfields.minor_ad_charts for ad in ads: if ad['name'].count(u"Långtora"): ad['pos']=mapper.to_str(mapper.from_aviation_format("5944.83N01708.20E")) if ad['name'] in minor_ad_charts: charturl=minor_ad_charts[ad['name']] arp=ad['pos'] if 'icao' in ad and ad['icao'].upper()!='ZZZZ': icao=ad['icao'].upper() else: icao=ad['fake_icao'] parse_landing_chart.help_plc(ad,charturl,icao,arp,country='raw',variant="landing") """ assert icao!=None lc=parse_landing_chart.parse_landing_chart( charturl, icao=icao, arppos=arp,country="raw") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc """ #print ads for ad in ads: print "%s: %s - %s (%s ft) (%s)"%(ad['icao'],ad['name'],ad['pos'],ad['elev'],ad.get('flygkartan_id','inte i flygkartan')) for space in ad.get('spaces',[]): for freq in space.get('freqs',[]): print " ",freq #if 'spaces' in ad: # print " spaces: %s"%(ad['spaces'],) #if 'aiptext' in ad: # print "Aip texts:",ad['aiptext'] #else: # print "No aiptext" print "Points:" for point in sorted(points.values(),key=lambda x:x['name']): print point f=codecs.open("extract_airfields.regress.txt","w",'utf8') for ad in ads: r=repr(ad) d=md5.md5(r).hexdigest() f.write("%s - %s - %s\n"%(ad['icao'],ad['name'],d)) f.close() f=codecs.open("extract_airfields.regress-details.txt","w",'utf8') for ad in ads: r=repr(ad) f.write(u"%s - %s - %s\n"%(ad['icao'],ad['name'],r)) f.close() return ads,points.values()
def extract_airfields(filtericao=lambda x: True, purge=True): # print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf") ads = [] p = Parser("/AIP/AD/AD 1/ES_AD_1_1_en.pdf") points = dict() startpage = None for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) if page.count("Aerodrome directory"): startpage = pagenr break if startpage == None: raise Exception("Couldn't find aerodrome directory in file") # print "Startpage: %d"%(startpage,) # nochartf=open("nochart.txt","w") for pagenr in xrange(startpage, p.get_num_pages()): row_y = [] page = p.parse_page_to_items(pagenr) allines = [x for x in (page.get_lines(page.get_partially_in_rect(0, 0, 15, 100))) if x.strip()] for item, next in zip(allines, allines[1:] + [""]): # print "item:",item m = re.match(ur"^\s*[A-ZÅÄÖ]{3,}(?:/.*)?\b.*", item) if m: # print "Candidate, next is:",next if re.match(r"^\s*[A-Z]{4}\b.*", next): # print "Matched:",item # print "y1:",item.y1 row_y.append(item.y1) for y1, y2 in zip(row_y, row_y[1:] + [100.0]): # print "Extacting from y-range: %f-%f"%(y1,y2) items = list(page.get_partially_in_rect(0, y1 - 0.25, 5.0, y2 + 0.25, ysort=True)) if len(items) >= 2: # print "Extract items",items ad = dict(name=unicode(items[0].text).strip(), icao=unicode(items[1].text).strip()) # print "Icao:",ad['icao'] assert re.match(r"[A-Z]{4}", ad["icao"]) if not filtericao(ad): continue if len(items) >= 3: # print "Coord?:",items[2].text m = re.match(r".*(\d{6}N)\s*(\d{7}E).*", items[2].text) if m: lat, lon = m.groups() ad["pos"] = parse_coords(lat, lon) # print "Items3:",items[3:] elev = re.findall(r"(\d{1,5})\s*ft", " ".join(t.text for t in items[3:])) # print "Elev:",elev assert len(elev) == 1 ad["elev"] = int(elev[0]) ads.append(ad) big_ad = set() for ad in ads: if not ad.has_key("pos"): big_ad.add(ad["icao"]) for ad in ads: icao = ad["icao"] if icao in big_ad: if icao in ["ESIB", "ESNY", "ESCM", "ESPE"]: continue try: p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf" % (icao, icao)) except: p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf" % (icao, icao)) ad["aipvacurl"] = p.get_url() for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) """ for altline in exitlines: m=re.match(r"(\w+)\s+(\d+N)\s*(\d+E.*)",altline) if not m: continue name,lat,lon=m.groups() try: coord=parse_coords(lat,lon) except Exception: continue points.append(dict(name=name,pos=coord)) """ for kind in xrange(2): if kind == 0: hits = page.get_by_regex(r"H[Oo][Ll][Dd][Ii][Nn][Gg]") kind = "holding point" if kind == 1: hits = page.get_by_regex(r"[Ee]ntry.*[Ee]xit.*point") kind = "entry/exit point" if len(hits) == 0: continue for holdingheading in hits: items = sorted( page.get_partially_in_rect( holdingheading.x1 + 2.0, holdingheading.y2 + 0.1, holdingheading.x1 + 0.5, 100 ), key=lambda x: x.y1, ) items = [x for x in items if not x.text.startswith(" ")] # print "Holding items:",items for idx, item in enumerate(items): print "Holding item", item y1 = item.y1 if idx == len(items) - 1: y2 = 100 else: y2 = items[idx + 1].y1 items2 = [ x for x in page.get_partially_in_rect(item.x1 + 1, y1 + 0.3, item.x1 + 40, y2 - 0.1) if x.x1 >= item.x1 - 0.25 and x.y1 >= y1 - 0.05 and x.y1 < y2 - 0.05 ] s = (" ".join(page.get_lines(items2))).strip() print "Holding lines:", repr(page.get_lines(items2)) # if s.startswith("ft Left/3"): #Special case for ESOK # s,=re.match("ft Left/3.*?([A-Z]{4,}.*)",s).groups() # m=re.match("ft Left/\d+.*?([A-Z]{4,}.*)",s) # if m: # s,=m.groups() if s.startswith("LjUNG"): # Really strange problem with ESCF s = s[0] + "J" + s[2:] if s.lower().startswith("holding"): sl = s.split(" ", 1) if len(sl) > 1: s = sl[1] s = s.strip() if kind == "entry/exit point" and s.startswith("HOLDING"): continue # reached HOLDING-part of VAC # Check for other headings # Fixup strange formatting of points in some holding items: (whitespace between coord and 'E') s = re.sub(ur"(\d+)\s*(N)\s*(\d+)\s*(E)", lambda x: "".join(x.groups()), s) m = re.match(r"([A-Z]{2,}).*?(\d+N)\s*(\d+E).*", s) if not m: m = re.match(r".*?(\d+N)\s*(\d+E).*", s) if not m: continue assert m lat, lon = m.groups() # skavsta if icao == "ESKN": if s.startswith(u"Hold north of T"): name = "NORTH" elif s.startswith(u"Hold south of B"): name = "SOUTH" else: assert 0 # add more specials here else: continue else: name, lat, lon = m.groups() try: coord = parse_coords(lat, lon) except Exception: print "Couldn't parse:", lat, lon continue # print name,lat,lon,mapper.format_lfv(*mapper.from_str(coord)) if name.count("REMARK") or len(name) <= 2: print "Suspicious name: ", name # sys.exit(1) continue points[icao + " " + name] = dict(name=icao + " " + name, icao=icao, pos=coord, kind=kind) # for point in points.items(): # print point # sys.exit(1) def fixhex11(s): out = [] for c in s: i = ord(c) if i >= 0x20: out.append(c) continue if i in [0x9, 0xA, 0xD]: out.append(c) continue out.append(" ") return "".join(out) for ad in ads: icao = ad["icao"] if icao in big_ad: # print "Parsing ",icao p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf" % (icao, icao), loadhook=fixhex11) ad["aiptexturl"] = p.get_url() firstpage = p.parse_page_to_items(0) te = "\n".join(firstpage.get_all_lines()) # print te coords = re.findall(r"ARP.*(\d{6}N)\s*(\d{7}E)", te) if len(coords) > 1: raise Exception( "First page of airport info (%s) does not contain exactly ONE set of coordinates" % (icao,) ) if len(coords) == 0: print "Couldn't find coords for ", icao # print "Coords:",coords ad["pos"] = parse_coords(*coords[0]) elev = re.findall(r"Elevation.*?(\d{1,5})\s*ft", te, re.DOTALL) if len(elev) > 1: raise Exception( "First page of airport info (%s) does not contain exactly ONE elevation in ft" % (icao,) ) if len(elev) == 0: print "Couldn't find elev for ", icao ad["elev"] = int(elev[0]) freqs = [] found = False thrs = [] # uprint("-------------------------------------") for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) # uprint("Looking on page %d"%(pagenr,)) if ( 0 ): # opening hours are no longer stored in a separate document for any airports. No need to detect which any more (since none are). for item in page.get_by_regex(".*OPERATIONAL HOURS.*"): lines = page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100)) for line in lines: things = ["ATS", "Fuelling", "Operating"] if not line.count("AIP SUP"): continue for thing in things: if line.count(thing): ad["aipsup"] = True for item in page.get_by_regex(".*\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*.*"): # uprint("Physical char on page") lines = page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100)) seen_end_rwy_text = False for line, nextline in izip(lines, lines[1:] + [None]): # uprint("MAtching: <%s>"%(line,)) if re.match(ur"AD\s+2.13", line): break if line.count("Slope of"): break if line.lower().count("end rwy:"): seen_end_rwy_text = True if line.lower().count("bgn rwy:"): seen_end_rwy_text = True m = re.match(ur".*(\d{6}\.\d+)[\s\(\)\*]*(N).*", line) if not m: continue m2 = re.match(ur".*(\d{6,7}\.\d+)\s*[\s\(\)\*]*(E).*", nextline) if not m2: continue latd, n = m.groups() lond, e = m2.groups() assert n == "N" assert e == "E" lat = latd + n lon = lond + e rwytxts = page.get_lines(page.get_partially_in_rect(0, line.y1 + 0.05, 12, nextline.y2 - 0.05)) uprint("Rwytxts:", rwytxts) rwy = None for rwytxt in rwytxts: # uprint("lat,lon:%s,%s"%(lat,lon)) # uprint("rwytext:",rwytxt) m = re.match(ur"\s*(\d{2}[LRCM]?)\b.*", rwytxt) if m: assert rwy == None rwy = m.groups()[0] if rwy == None and seen_end_rwy_text: continue print "Cur airport:", icao already = False assert rwy != None seen_end_rwy_text = False for thr in thrs: if thr["thr"] == rwy: raise Exception("Same runway twice on airfield:" + icao) thrs.append(dict(pos=mapper.parse_coords(lat, lon), thr=rwy)) assert len(thrs) >= 2 for pagenr in xrange(0, p.get_num_pages()): page = p.parse_page_to_items(pagenr) matches = page.get_by_regex(r".*ATS\s+COMMUNICATION\s+FACILITIES.*") # print "Matches of ATS COMMUNICATION FACILITIES on page %d: %s"%(pagenr,matches) if len(matches) > 0: commitem = matches[0] curname = None callsign = page.get_by_regex_in_rect(ur"Call\s*sign", 0, commitem.y1, 100, commitem.y2 + 8)[0] for idx, item in enumerate( page.get_lines( page.get_partially_in_rect(callsign.x1 - 0.5, commitem.y1, 100, 100), fudge=0.3, order_fudge=15, ) ): if item.strip() == "": curname = None if re.match(".*RADIO\s+NAVIGATION\s+AND\s+LANDING\s+AIDS.*", item): break # print "Matching:",item m = re.match(r"(.*?)\s*(\d{3}\.\d{1,3})\s*MHz.*", item) # print "MHZ-match:",m if not m: continue # print "MHZ-match:",m.groups() who, sfreq = m.groups() freq = float(sfreq) if abs(freq - 121.5) < 1e-4: if who.strip(): curname = who continue # Ignore emergency frequency, it is understood if not who.strip(): if curname == None: continue else: curname = who freqs.append((curname.strip().rstrip("/"), freq)) for pagenr in xrange(0, p.get_num_pages()): page = p.parse_page_to_items(pagenr) matches = page.get_by_regex(r".*ATS\s*AIRSPACE.*") # print "Matches of ATS_AIRSPACE on page %d: %s"%(pagenr,matches) if len(matches) > 0: heading = matches[0] desigitem, = page.get_by_regex("Designation and lateral limits") vertitem, = page.get_by_regex("Vertical limits") airspaceclass, = page.get_by_regex("Airspace classification") lastname = None subspacelines = dict() subspacealts = dict() for idx, item in enumerate( page.get_lines(page.get_partially_in_rect(desigitem.x2 + 1, desigitem.y1, 100, vertitem.y1 - 1)) ): if item.count("ATS airspace not established"): assert idx == 0 break if item.strip() == "": continue m = re.match(r"(.*?)(\d{6}N\s+.*)", item) if m: name, coords = m.groups() name = name.strip() else: name = item.strip() coords = None if name: lastname = name if coords: subspacelines.setdefault(lastname, []).append(coords) assert lastname lastname = None # print "Spaces:",subspacelines # print "ICAO",ad['icao'] # altlines=page.get_lines(page.get_partially_in_rect(vertitem.x2+1,vertitem.y1,100,airspaceclass.y1-0.2)) # print "Altlines:",altlines subspacealts = dict() subspacekeys = subspacelines.keys() allaltlines = " ".join( page.get_lines( page.get_partially_in_rect( vertitem.x1 + 0.5, vertitem.y1 + 0.5, 100, airspaceclass.y1 - 0.2 ) ) ) single_vertlim = False totalts = list(mapper.parse_all_alts(allaltlines)) # print "totalts:",totalts if len(totalts) == 2: single_vertlim = True for subspacename in subspacekeys: ceil = None floor = None subnames = [subspacename] if subspacename.split(" ")[-1].strip() in ["TIA", "TIZ", "CTR", "CTR/TIZ"]: subnames.append(subspacename.split(" ")[-1].strip()) # print "Parsing alts for ",subspacename,subnames try: for nametry in subnames: if ( single_vertlim ): # there's only one subspace, parse all of vertical limits field for this single one. items = [vertitem] else: items = page.get_by_regex_in_rect( nametry, vertitem.x2 + 1, vertitem.y1, 100, airspaceclass.y1 - 0.2 ) for item in items: alts = [] for line in page.get_lines( page.get_partially_in_rect( item.x1 + 0.5, item.y1 + 0.5, 100, airspaceclass.y1 - 0.2 ) ): # print "Parsing:",line line = line.replace(nametry, "").lower().strip() parsed = list(mapper.parse_all_alts(line)) if len(parsed): alts.append(mapper.altformat(*parsed[0])) if len(alts) == 2: break if alts: # print "alts:",alts ceil, floor = alts raise StopIteration except StopIteration: pass assert ceil and floor subspacealts[subspacename] = dict(ceil=ceil, floor=floor) spaces = [] for spacename in subspacelines.keys(): altspacename = spacename # print "Altspacename: %s, subspacesalts: %s"%(altspacename,subspacealts) space = dict( name=spacename, ceil=subspacealts[altspacename]["ceil"], floor=subspacealts[altspacename]["floor"], points=parse_coord_str(" ".join(subspacelines[spacename])), freqs=list(set(freqs)), ) if True: vs = [] for p in space["points"]: x, y = mapper.latlon2merc(mapper.from_str(p), 13) vs.append(Vertex(int(x), int(y))) p = Polygon(vvector(vs)) if p.calc_area() <= 30 * 30: pass # print space pass # print "Area:",p.calc_area() assert p.calc_area() > 30 * 30 # print "Area: %f"%(p.calc_area(),) spaces.append(space) # print space ad["spaces"] = spaces found = True if found: break assert found ad["runways"] = rwy_constructor.get_rwys(thrs)
def parse_sig_points(): p = Parser("/AIP/ENR/ENR 4/ES_ENR_4_4_en.pdf") points = [] for pagenr in xrange(p.get_num_pages()): #print "Processing page %d"%(pagenr,) page = p.parse_page_to_items(pagenr) lines = page.get_lines(page.get_all_items(), order_fudge=20) for line in lines: cols = line.split() if len(cols) > 2: coordstr = " ".join(cols[1:3]) #print cols if len(mapper.parsecoords(coordstr)) > 0: crd = mapper.parsecoord(coordstr) #print "Found %s: %s"%(cols[0],crd) points.append( dict(name=cols[0], kind='sig. point', pos=crd)) p = Parser("/AIP/ENR/ENR 4/ES_ENR_4_1_en.pdf") for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) nameheading, = page.get_by_regex(r".*Name of station.*") freqheading, = page.get_by_regex(r".*Frequency.*") coordheading, = page.get_by_regex(r".*Coordinates.*") items = sorted(list(x for x in page.get_partially_in_rect( nameheading.x1, nameheading.y2 + 2, nameheading.x1 + 1, 100) if x.text.strip()), key=lambda x: x.y1) idx = 0 while True: if items[idx].text.strip() == "": idx += 1 continue if idx + 1 >= len(items): break name = items[idx] kind = items[idx + 1] diffy = kind.y1 - name.y2 #print "Name, kind:",name,kind #print name.text,kind.text,diffy assert kind.text.count("VOR") or kind.text.count( "DME") or kind.text.count("NDB") assert diffy < 0.5 #print "Frq cnt: <%s>"%(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05),) freqraw = " ".join( page.get_lines( page.get_partially_in_rect(freqheading.x1, name.y1 + 0.05, freqheading.x2, kind.y2 - 0.05))) short, freq = re.match( r"\s*([A-Z]{2,3})?\s*(\d+(?:\.?\d+)\s+(?:MHz|kHz))\s*(?:H24)?\s*", freqraw).groups() posraw = " ".join( page.get_lines( page.get_partially_in_rect(coordheading.x1, name.y1 + 0.05, coordheading.x2, kind.y2 - 0.05))) #print "Rawpos<%s>"%(posraw,) pos = mapper.parse_coords(*re.match( r".*?(\d+\.\d+[NS]).*?(\d+\.\d+[EW]).*", posraw).groups()) #print "Name: %s, Shortname: %s, Freq: %s,pos: %s"%(name.text,short,freq,pos) points.append( dict(name=short + " " + kind.text.strip() + " " + name.text.strip(), short=short, kind="nav-aid", pos=pos, freq=freq)) idx += 2 return points
def get_notam_objs(kind=None): notamupdates=meta.Session.query(NotamUpdate).filter( NotamUpdate.disappearnotam==sa.null()).all() obstacles=[] others=[] spaces=[] areas=[] for u in notamupdates: text=u.text.strip() if text.count("W52355N0234942E"): text=text.replace("W52355N0234942E","652355N0234942E") coordgroups=[] for line in text.split("\n"): dig=False for char in line: if char.isdigit(): dig=True if dig==False: if len(coordgroups) and coordgroups[-1]!="": coordgroups.append("") else: if len(coordgroups)==0: coordgroups=[""] coordgroups[-1]+=line+"\n" if (kind==None or kind=="notamarea"): for radius,unit,lat,lon in chain( re.findall(r"RADIUS\s*(?:OF)?\s*(\d+)\s*(NM|M)\s*(?:CENT[ERD]+|FR?O?M)?\s*(?:ON)?\s*(?:AT)?\s*(\d+[NS])\s*(\d+[EW])",text), re.findall(r"(\d+)\s*(NM|M)\s*RADIUS\s*(?:CENT[ERD]+)?\s*(?:ON|AT|FROM)?\s*(\d+[NS])\s*(\d+[EW])",text), re.findall(r"(\d+)\s*(NM|M)\s*RADIUS.*?[^0-9](\d+[NS])\s*(\d+[EW])",text,re.DOTALL) ): try: radius=float(radius) if unit=="M": radius=radius/1852.0 else: assert unit=="NM" centre=mapper.parse_coords(lat,lon) coords=mapper.create_circle(centre,radius) areas.append(dict( points=coords, kind="notamarea", name=text, type="notamarea", notam_ordinal=u.appearnotam, notam_line=u.appearline, notam=text)) except Exception,cause: print "Invalid notam coords: %s,%s"%(lat,lon) for coordgroup in coordgroups: try: coords=list(mapper.parse_lfv_area(coordgroup,False)) except Exception,cause: print "Parsing,",coordgroup print "Exception parsing lfv area from notam:%s"%(cause,) coords=[] if len(coords)==0: continue if text.count("OBST") and (kind==None or kind=="obstacle"): elevs=re.findall(r"ELEV\s*(\d+)\s*FT",text) elevs=[int(x) for x in elevs if x.isdigit()] if len(elevs)!=0: elev=max(elevs) for coord in coords: obstacles.append(dict( pos=coord, elev=elev, elevf=mapper.parse_elev(elev), kind='notam', notam_ordinal=u.appearnotam, notam_line=u.appearline, name=text.split("\n")[0], notam=text)) continue couldbearea=True if len(coords)<=2: couldbearea=False if text.count("PSN")>=len(coords)-2: couldbearea=False if couldbearea==False and (kind==None or kind=="notam"): for coord in coords: others.append(dict( pos=coord, kind='notam', name=text, notam_ordinal=u.appearnotam, notam_line=u.appearline, notam=text)) if couldbearea==True and (kind==None or kind=="notamarea"): if len(coords)>2: if text.startswith("AREA: "): continue #These aren't real notams, they're area-specifications for all other notams... make this better some day. areas.append(dict( points=coords, kind="notamarea", name=text, type="notamarea", notam_ordinal=u.appearnotam, notam_line=u.appearline, notam=text))
def load_finland(): zipname = getzip() zf = zipfile.ZipFile(zipname) areas = [] points = [] for fname in zf.namelist(): # print "File:",fname txt = zf.open(fname).read() if fname == "WaypointImport.txt": for row in txt.split("\n"): if row.strip() == "" or row.startswith("*"): continue # print repr(row) lat, lon, name = re.match(r"(\d+N) (\d+E)\s*(\w+)", row).groups() points.append(dict(name=unicode(name, "utf8"), kind="sig. point", pos=mapper.parse_coords(lat, lon))) else: t = "TMA" if fname.count("D_Areas") or fname.count("TRA") or fname.count("R_Areas"): t = "R" if fname.count("CTR"): t = "CTR" if fname.lower().count("finland_fir"): t = "FIR" areas.extend(list(parse_areas(txt, t))) for area in points: print "Point: %s: %s" % (area["name"], area["pos"]) for area in areas: print "Area: %s - %s-%s: %s" % (area["name"], area["floor"], area["ceiling"], area["points"]) return areas, points
if not m: continue rwyend, lat, lon = m.groups() rwytxts = page.get_lines( page.get_partially_in_rect(0, line.y1, 12, line.y2)) print "Rwytxts:", rwytxts rwytxt, = rwytxts uprint("rwytext:", rwytxt) rwy, = re.match(ur"\s*(\d{2}[LRCM]?)\s*[\d.]*\s*", rwytxt).groups() have_thr = False for thr in thrs: if thr['thr'] == rwy: have_thr = True if rwyend != None and have_thr: continue thrs.append(dict(pos=mapper.parse_coords(lat, lon), thr=rwy)) for item in page.get_by_regex("ATS AIRSPACE"): lines = iter( page.get_lines( page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100))) spaces = [] line = lines.next() while True: while line.strip() == "": line = lines.next() print "Read line:", line if line.count("Vertical limits"): break m = re.match( ur".*?/\s+Designation and lateral limits\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:?\s*$",
def ee_parse_airfields2(): ads = [] spaces = [] airac_date = get_airac_date() print "airac", airac_date overview_url = "/%s/html/eAIP/EE-AD-0.6-en-GB.html" % (airac_date, ) parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(overview_url, country='ee') parser.feed(data) tree = parser.close() icaos = [] for cand in tree.xpath(".//h3"): txts = alltexts(cand.xpath(".//a")) aps = re.findall(r"EE[A-Z]{2}", " ".join(txts)) if aps: icao, = aps if alltext(cand).count("HELIPORT"): print "Ignore heliport", icao continue icaos.append(icao) for icao in icaos: ad = dict(icao=icao) url = "/%s/html/eAIP/EE-AD-2.%s-en-GB.html" % (airac_date, icao) data, date = fetchdata.getdata(url, country='ee') parser.feed(data) tree = parser.close() thrs = [] for h3 in tree.xpath(".//h3"): txt = alltext(h3) print repr(txt) ptrn = ur"\s*%s\s+[—-]\s+(.*)" % (unicode(icao.upper()), ) m = re.match(ptrn, txt, re.UNICODE) if m: assert not 'name' in ad ad['name'] = m.groups()[0] for tr in tree.xpath(".//tr"): txt = alltext(tr) m = re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*", txt) #print "Matching,",txt,":",m if m: crds, = m.groups() ad['pos'] = mapper.anyparse(crds) space = dict() for table in tree.xpath(".//table"): for tr in table.xpath(".//tr"): trtxt = alltext(tr) if trtxt.count("Designation and lateral limits"): space = dict() coords = tr.getchildren()[2] lines = alltext(coords).split("\n") if lines[0].strip() == 'NIL': continue zname, what, spill = re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)", lines[0]).groups() if spill and spill.strip(): rest = [spill] + lines[1:] else: rest = lines[1:] what = what.strip() assert ad['name'].upper().strip().count( zname.upper().strip()) assert what in ['FIZ', 'TIZ', 'CTR'] space['type'] = what space['points'] = mapper.parse_coord_str("\n".join(rest)) space['name'] = zname + " " + what space['date'] = date space['url'] = fetchdata.getrawurl(url, 'ee') if trtxt.count("Vertical limits"): vlim = alltext(tr.getchildren()[2]) if vlim.strip() == 'NIL': continue space['floor'], space['ceiling'] = vlim.split(" to ") #space['freqs']=x #hlc=False for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.lower().count("charts"): par = h4.getparent() for table in par.xpath(".//table"): for idx, tr in enumerate(table.xpath(".//tr")): name,page=\ tr.getchildren() nametxt = alltext(name) print "nametxt:", nametxt, "link:" for reg, variant in [ (r"Aerodrome.*Chart.*", ""), (r"Landing.*Chart.*", "landing"), (r".*Parking.*Chart.*", "parking"), (r".*Visual.*Approach.*|.*\bVAC\b.*", "vac") ]: if re.match(reg, nametxt): for a in page.xpath(".//a"): print "linklabel", a.text print "attrib:", a.attrib href = a.attrib['href'] print "Bef repl", href if href.lower().endswith("pdf"): href = href.replace( "../../graphics", "/%s/graphics" % (airac_date, )) print "href:", href, airac_date assert href parse_landing_chart.help_plc( ad, href, icao, ad['pos'], "ee", variant=variant) """arp=ad['pos'] lc=parse_landing_chart.parse_landing_chart( href, icao=icao, arppos=arp,country="ee") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc hlc=True #chartblobnames.append(lc['blobname']) """ #assert hlc for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.count("RUNWAY PHYSICAL"): par = h4.getparent() for table in par.xpath(".//table"): prevnametxt = "" for idx, tr in enumerate(table.xpath(".//tr")): if idx == 0: fc = alltext(tr.getchildren()[0]) print "FC", fc if not fc.count("Designations"): break #skip table if idx < 2: continue if len(tr.getchildren()) == 1: continue print "c:", tr.getchildren(), alltexts( tr.getchildren()) desig, trubrg, dims, strength, thrcoord, threlev = tr.getchildren( ) rwy = re.match(r"(\d{2}[LRC]?)", alltext(desig)) altc = alltext(thrcoord) print "Matching", altc print "rwymatch:", alltext(desig) m = re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*", altc, re.DOTALL | re.MULTILINE) if m: lat, lon = m.groups() print "Got latlon", lat, lon thrs.append( dict(pos=mapper.parse_coords(lat, lon), thr=rwy.groups()[0])) space['freqs'] = [] for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.count("ATS COMMUNICATION"): par = h4.getparent() for table in par.xpath(".//table"): for idx, tr in enumerate(table.xpath(".//tr")): print "cs", repr(tr.getchildren()), alltexts( tr.getchildren()) print len(tr.getchildren()) if len(tr.getchildren()) != 5: if "".join(alltexts( tr.getchildren())).count(u"EMERG"): continue #Sometimes emergency freq is listed, and then it is without callsign service,callsign,frequency,hours,remarks=\ tr.getchildren() callsigntxt = alltext(callsign) if idx < 2: if idx == 0: assert callsigntxt.strip() == "Call sign" if idx == 1: assert callsigntxt.strip() == "2" continue ftext = alltext(frequency) print "matching freq", ftext for freq in re.findall(ur"\b\d{3}\.\d{1,3}", ftext): freqmhz = float(freq) space['freqs'].append( (callsigntxt.strip(), freqmhz)) if space and 'points' in space: assert 'freqs' in space assert 'points' in space assert 'floor' in space assert 'ceiling' in space assert 'type' in space spaces.append(space) if thrs: ad['runways'] = rwy_constructor.get_rwys(thrs) aip_text_documents.help_parse_doc(ad, url, icao, "ee", title="General Information", category="general") ad['date'] = date ad['url'] = fetchdata.getrawurl(url, 'ee') print "AD:", ad assert 'pos' in ad assert 'name' in ad ads.append(ad)