示例#1
0
def update_catalog():
    from pydatajson import writers, DataJson
    # Chequeo que la caché del datajson exista antes de pasar su path como parámetro
    if not os.path.isfile(CACHE_FILENAME):
        # No existe, así que la genero
        update_datajson_cache()
    catalog = DataJson(CACHE_FILENAME)
    catalog['themeTaxonomy'] = catalog.get('themeTaxonomy', [])
    new_catalog_filename = '%s/catalog.xlsx' % tempfile.mkdtemp(
        dir=CACHE_DIRECTORY)
    writers.write_xlsx_catalog(catalog, new_catalog_filename)
    os.rename(new_catalog_filename, XLSX_FILENAME)
    os.rmdir(new_catalog_filename.replace('/catalog.xlsx', ''))
示例#2
0
def nodes_to_df(input_path):
    """Lee los catálogos de la red de nodos a un DataFrame."""

    with open(input_path) as f:
        nodes = json.load(f)

    rows = []
    for jurisdiction in nodes["jurisdictions"]:
        for catalog in jurisdiction["catalogs"]:
            print("Leyendo catálogo '{}' de la jurisdiccion '{}' ({})".format(
                catalog["id"], jurisdiction["id"], jurisdiction["title"]),
                  end=" ")
            try:
                dj = DataJson(catalog["url_json"])
                print("...OK")
            except Exception as e:
                dj = {}
                print("...ERROR")
                print(e)

            rows.append({
                "jurisdiction_id": jurisdiction["id"],
                "jurisdiction_title": jurisdiction["title"],
                "catalog_id": catalog["id"],
                "catalog_title": dj.get("title"),
                "catalog_homepage": dj.get("homepage"),
                "catalog_url_json": catalog["url_json"],
                "catalog_url_xlsx": catalog.get("url_xlsx"),
                "catalog_url_datosgobar": catalog.get("url_datosgobar")
            })

    fields = [
        "jurisdiction_id", "jurisdiction_title", "catalog_id", "catalog_title",
        "catalog_homepage", "catalog_url_json", "catalog_url_xlsx",
        "catalog_url_datosgobar"
    ]

    return pd.DataFrame(rows)[fields]
示例#3
0
    def __init__(self, node: Node, task: IndexMetadataTask, index: str):
        self.node = node
        self.task = task
        self.index_name = index
        self.elastic: Elasticsearch = connections.get_connection()

        if not self.elastic.indices.exists(self.index_name):
            init_index(self.index_name)

        self.fields_meta = {}
        self.init_fields_meta_cache()
        try:
            data_json = DataJson(node.catalog_url)
            themes = data_json.get('themeTaxonomy', [])
            self.themes = self.get_themes(themes)
        except Exception:
            raise ValueError("Error de lectura de los themes del catálogo")