Exemplo n.º 1
0
def ep_parse_wikipedia_airports(url):
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country="wikipedia")
    parser.feed(data)
    tree = parser.close()
    res = []
    for table in tree.xpath("//table"):
        for nr, row in enumerate(table.xpath(".//tr")):
            cols = list([alltext(x) for x in row.xpath(".//td")])
            print "#", nr, ": ", cols
            if nr == 0:
                if len(cols) == 0 or cols[0].strip() != "Airport":
                    break
                assert cols[3].strip() == "ICAO"
                assert cols[4].strip() == "Purpose"
                assert cols[5].strip().count("El")
                assert cols[9].strip() == "Coordinates"
            else:
                purpose = cols[4].strip()
                if purpose.count("Unused"): continue
                if purpose.count("Closed"): continue
                if purpose.count("Liquidated"): continue
                if purpose == "Military": continue  #Just military
                icao = cols[3].strip()
                if icao == "": icao = "ZZZZ"
                name = cols[0].strip()
                #print "lats:",row.xpath(".//span[@class='latitude']")
                lat, = alltexts(row.xpath(".//span[@class='latitude']"))
                lon, = alltexts(row.xpath(".//span[@class='longitude']"))
                coords = fixup(lat.strip() + " " + lon.strip())
                elevft = float(cols[5].strip())
                res.append(
                    dict(pos=mapper.parsecoord(coords),
                         name=name,
                         elev=elevft / 0.3048,
                         icao=icao,
                         date=date,
                         url=url))

    return res
Exemplo n.º 2
0
                            continue
                        if not seenreal and re.match(
                                ur".*The\s*line\s*joining.*", text):
                            continue
                        if not seenreal and text.endswith("following points:"):
                            continue
                        if not seenreal and text == "points:":
                            continue
                    if text.endswith("E"):
                        text = text + " - "
                    seenreal = True
                    coords.append(text)
                    last = sub
                pass
            assert points == None
            coordstr = fixup(" ".join(coords))
            print "Raw coords:", coordstr
            points = mapper.parse_coord_str(coordstr)
            assert ceiling
            assert floor
            assert ctrname
            spaces.append(
                dict(name=ctrname,
                     points=points,
                     type="CTR",
                     ceiling=ceiling,
                     floor=floor,
                     freqs=freqs))

            #not first page:
    assert points != None