예제 #1
0
def build_files(df, config):
    filelist = []
    countrylist = []
    for iso3 in us.get_index_set(df):
        try:
            idf = df.ix[iso3]
            if (
                type(idf) == pd.Series
            ):  # idf a Series if there is only one element in it, but we want a DataFrame always
                idf = pd.DataFrame([idf])
            idf = idf[["Year", "Value", "Source", "Notes"]]
            idf.columns = ["year", "value", "source", "note"]
            mult = config["multiplier"]
            if mult:
                if (mult <= 1 and mult >= -1) or not type(mult) is int:
                    idf["value"] = idf["value"].apply(lambda x: x * mult)
                else:
                    idf["value"] = idf["value"].apply(lambda x: int(x * mult)).astype(object)
            idf["source"] = idf["source"].apply(lambda x: config["source"])
            idf["note"] = idf["note"].apply(lambda x: get_notes(str(x), config))
            filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"]
            filename = filestem + ".csv"
            filepath = config["gen_2_dir"] + filename
            us.log(filepath)
            idf.to_csv(filepath, encoding="utf8", index=False)

            country = us.get_country_by_iso3(iso3)
            meta = [
                ("name", "%s - %s [CEPALStat]" % (country, config["indicator"])),
                ("originalsource", config["source"]),
                ("proximatesource", "CEPALStat"),
                ("dataset", config["indicator"] + " [" + config["indicator_id"] + "]"),
                ("description", config["definition"]),
                ("category", config["indicator_category"]),
                ("type", config["indicator_type"]),
                ("file", filename),
                ("filehash", us.githash(filepath)),
                ("columns", "year,value,source,notes"),
            ]

            metafile = config["gen_2_dir"] + filestem + "_meta.csv"
            pd.DataFrame(meta, columns=["key", "value"]).to_csv(
                metafile, encoding="utf8", float_format="%.3f", index=False
            )
            filelist.append([filestem])
            countrylist.append(country)
        except Exception as strerror:
            us.log("ERROR: Failed to build data for %s" % iso3)
            us.log(sys.exc_info())
            traceback.print_tb(sys.exc_info()[2])

    fldf = pd.DataFrame(filelist, index=countrylist).sort_index()
    fldf.to_csv(
        config["gen_2_dir"] + "_" + config["prefix"] + ".csv",
        encoding="utf8",
        float_format="%.1f",
        index=False,
        header=False,
    )
    return fldf
예제 #2
0
def build_files(df, config):
    filelist = []
    countrylist = []
    for iso3 in us.get_index_set(df):
        try:
            idf = df.ix[iso3]
            if type(idf) == pd.Series: #idf a Series if there is only one element in it, but we want a DataFrame always
                idf = pd.DataFrame([idf])
            idf = idf[["Year","Value","Source","Notes"]]
            idf.columns = ["year","value","source","notes"]
            mult = config["multiplier"]
            if mult:
                if (mult <= 1 and mult >= -1) or not type(mult) is int:
                    idf["value"] = idf["value"].apply(lambda x : x * mult)
                else:
                    idf["value"] = idf["value"].apply(lambda x : int(x * mult)).astype(object)
            idf["source"] = idf["source"].apply(lambda x : config["source"])
            idf["notes"] = idf["notes"].apply(lambda x : get_notes(str(x), config))
            filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"]
            filename = filestem + ".csv"
            filepath = config["gen_2_dir"] + filename
            us.log(filepath)
            idf.to_csv(filepath, encoding="utf8", index=False)
                   
            country = us.get_country_by_iso3(iso3)    
            meta = [("name", "%s - %s [CEPALStat]" % (country, config["indicator"])),
                ("originalsource", config["source"]),
                ("proximatesource", "CEPALStat"),
                ("dataset", config["indicator"] + " [" + config["indicator_id"] + "]"),
                ("description", config["definition"]),
                ("category", config["indicator_category"]),
                ("type", config["indicator_type"]),
                ("file", filename),
                ("filehash", us.githash(filepath)),
                ("columns", "year,value,source,notes")
                ]
     
            metafile = config["gen_2_dir"] + filestem + "_meta.csv"    
            pd.DataFrame(meta,columns = ["key","value"]).to_csv(metafile, encoding="utf8", float_format='%.3f',index=False)
            filelist.append([filestem])
            countrylist.append(country)
        except Exception as strerror:
            us.log("ERROR: Failed to build data for %s" % iso3)
            us.log(sys.exc_info())
            traceback.print_tb(sys.exc_info()[2])
            
    fldf = pd.DataFrame(filelist, index=countrylist).sort_index()
    fldf.to_csv(config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", float_format='%.1f', index=False, header=False)
    return fldf
예제 #3
0
filelist = []
us.mkdirs(config["gen_2_dir"])
for (iso3, df, mf) in dataset:
    us.log(iso3)
    try:
        df.columns = ["description", "value"]
    except Exception:  # The data for St Kitts only has one column. We are excluding it for now.
        us.log(sys.exc_info())
        continue
    filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"]
    filename = filestem + ".csv"
    filepath = config["gen_2_dir"] + filename
    df.to_csv(filepath, encoding="utf8", float_format='%.3f', index=False)

    country = us.get_country_by_iso3(iso3)
    meta = [("name", "%s - %s [SIDS RCM]" % (country, config["indicator"])),
            ("originalsource", mf.ix["Capacity"]["Source"]),
            ("proximatesource", "SIDS RCM"), ("dataset", config["indicator"]),
            ("description", config["description"]),
            ("note", mf.ix["Capacity"]["Note"]),
            ("unit", mf.ix["Capacity"]["Unit"]),
            ("category", config["indicator_category"]),
            ("type", config["indicator_type"]), ("file", filename),
            ("filehash", us.githash(filepath)),
            ("columns", "description,value")]

    metafile = config["gen_2_dir"] + filestem + "_meta.csv"
    pd.DataFrame(meta, columns=["key", "value"]).to_csv(metafile,
                                                        encoding="utf8",
                                                        float_format='%.3f',
    us.log(iso3)
    try:
        if len(df.columns) == 2:
            df.columns = ["year", "value"]
        else:
            df.columns = ["year", "value", "notes"]
        
    except Exception: # The data for St Kitts only has one column. We are excluding it for now.
        us.log(sys.exc_info())
        continue
    filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"]
    filename = filestem + ".csv"
    filepath = config["gen_2_dir"] + filename
    df.to_csv(filepath, encoding="utf8", float_format='%.3f', index=False)
    
    country = us.get_country_by_iso3(iso3)
    meta = [("name", "%s - %s [SIDS RCM]" % (country, config["indicator"])),
      #  ("originalsource", mf.ix[keycol]["Source"]),
        ("originalsource", "SIDS RCM"),
        ("proximatesource", "SIDS RCM"),
        ("dataset", config["indicator"]),
        ("description", config["description"]),
      #  ("note", mf.ix[keycol]["Note"]),
      #  ("unit", mf.ix[keycol]["Unit"]),
        ("category", config["indicator_category"]),
        ("type", config["indicator_type"]),
        ("file", filename),
        ("filehash", us.githash(filepath)),
        ("columns", "description,value")
        ]