示例#1
0
def tratar_gob_es(total, visto, id, raiz, padre):
    if id in visto:
        org = visto[id]
        if padre:
            org.idPadres.add(padre)
        return
    print("%3d%% completado: %6d" % (len(visto.keys()) * 100 / total, id),
          end="\r")
    fn = "fuentes/administracion.gob.es/id_%06d.html" % id
    if not os.path.isfile(fn):
        return
    soup = soup_from_file(fn)
    for n in soup.select(".hideAccessible"):
        n.extract()
    codigo = None
    hijos = []
    deDireccion = None
    latlon = None
    for div in soup.select("section div"):
        for br in div.findAll("br"):
            br.replaceWith(" ")
        txt = div.get_text()
        txt = re_bk.sub(" ", txt)
        txt = txt.strip()
        if ":" not in txt:
            continue
        key, value = [i.strip() for i in txt.split(":", 1)]
        if key == "Código de unidad orgánica":
            codigo = value
        elif key == "Estructura orgánica":
            hijos = set([
                int(a.attrs["href"].split("&")[0].split("=")[1])
                for a in div.select("a[href]")
                if "idUnidOrganica=" in a.attrs["href"]
            ])
        elif key == "Dirección":
            deDireccion = value
    if not codigo:
        return
    deOrganismo = re_bk.sub(
        " ",
        soup.select("h1.ppg-heading")[0].get_text()).strip()
    img = soup.find("img", attrs={"src": re_map})
    if img:
        latlon = re_map.search(img.attrs["src"]).group(1)
    org = Organismo(codigo,
                    deOrganismo,
                    deDireccion,
                    latlon=latlon,
                    idRaiz=raiz,
                    idUnidOrganica=id)
    visto[id] = org
    for h in hijos:
        tratar_gob_es(total, visto, h, raiz, codigo)
示例#2
0
    convocatorias = (
        (2016, 'L', 'BOE-A-2018-991'),
        (2015, 'L', 'BOE-A-2016-12467'),
    )

    total = 1 + len(xlss) + len(pdfs) + len(convocatorias)
    count = 1
    print ("Leyendo puestos")
    print("%3d%% completado: cod_provincia.htm" %
          (count * 100 / total,), end="\r")

    idde = {}
    idde["provincias"] = {}

    soup = soup_from_file("fuentes/cod_provincia.htm")
    for tr in soup.select("table.miTabla tr"):
        tds = [td.get_text().strip() for td in tr.findAll("td")]
        if len(tds) == 2 and tds[0].isdigit():
            cod, prov = tds
            idde["provincias"][int(cod)] = prov

    todos = []
    organismos = {}

    for xls in xlss:
        count = count + 1
        print("%3d%% completado: %-30s" %
              (count * 100 / total, os.path.basename(xls)), end="\r")
        wb = xlrd.open_workbook(xls)
        sh = wb.sheet_by_index(0)