def build_files(df, config): filelist = [] countrylist = [] for iso3 in us.get_index_set(df): try: idf = df.ix[iso3] if ( type(idf) == pd.Series ): # idf a Series if there is only one element in it, but we want a DataFrame always idf = pd.DataFrame([idf]) idf = idf[["Year", "Value", "Source", "Notes"]] idf.columns = ["year", "value", "source", "note"] mult = config["multiplier"] if mult: if (mult <= 1 and mult >= -1) or not type(mult) is int: idf["value"] = idf["value"].apply(lambda x: x * mult) else: idf["value"] = idf["value"].apply(lambda x: int(x * mult)).astype(object) idf["source"] = idf["source"].apply(lambda x: config["source"]) idf["note"] = idf["note"].apply(lambda x: get_notes(str(x), config)) filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename us.log(filepath) idf.to_csv(filepath, encoding="utf8", index=False) country = us.get_country_by_iso3(iso3) meta = [ ("name", "%s - %s [CEPALStat]" % (country, config["indicator"])), ("originalsource", config["source"]), ("proximatesource", "CEPALStat"), ("dataset", config["indicator"] + " [" + config["indicator_id"] + "]"), ("description", config["definition"]), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("file", filename), ("filehash", us.githash(filepath)), ("columns", "year,value,source,notes"), ] metafile = config["gen_2_dir"] + filestem + "_meta.csv" pd.DataFrame(meta, columns=["key", "value"]).to_csv( metafile, encoding="utf8", float_format="%.3f", index=False ) filelist.append([filestem]) countrylist.append(country) except Exception as strerror: us.log("ERROR: Failed to build data for %s" % iso3) us.log(sys.exc_info()) traceback.print_tb(sys.exc_info()[2]) fldf = pd.DataFrame(filelist, index=countrylist).sort_index() fldf.to_csv( config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", float_format="%.1f", index=False, header=False, ) return fldf
def build_files(df, config): filelist = [] countrylist = [] for iso3 in us.get_index_set(df): try: idf = df.ix[iso3] if type(idf) == pd.Series: #idf a Series if there is only one element in it, but we want a DataFrame always idf = pd.DataFrame([idf]) idf = idf[["Year","Value","Source","Notes"]] idf.columns = ["year","value","source","notes"] mult = config["multiplier"] if mult: if (mult <= 1 and mult >= -1) or not type(mult) is int: idf["value"] = idf["value"].apply(lambda x : x * mult) else: idf["value"] = idf["value"].apply(lambda x : int(x * mult)).astype(object) idf["source"] = idf["source"].apply(lambda x : config["source"]) idf["notes"] = idf["notes"].apply(lambda x : get_notes(str(x), config)) filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename us.log(filepath) idf.to_csv(filepath, encoding="utf8", index=False) country = us.get_country_by_iso3(iso3) meta = [("name", "%s - %s [CEPALStat]" % (country, config["indicator"])), ("originalsource", config["source"]), ("proximatesource", "CEPALStat"), ("dataset", config["indicator"] + " [" + config["indicator_id"] + "]"), ("description", config["definition"]), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("file", filename), ("filehash", us.githash(filepath)), ("columns", "year,value,source,notes") ] metafile = config["gen_2_dir"] + filestem + "_meta.csv" pd.DataFrame(meta,columns = ["key","value"]).to_csv(metafile, encoding="utf8", float_format='%.3f',index=False) filelist.append([filestem]) countrylist.append(country) except Exception as strerror: us.log("ERROR: Failed to build data for %s" % iso3) us.log(sys.exc_info()) traceback.print_tb(sys.exc_info()[2]) fldf = pd.DataFrame(filelist, index=countrylist).sort_index() fldf.to_csv(config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", float_format='%.1f', index=False, header=False) return fldf
continue filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename df.to_csv(filepath, encoding="utf8", float_format='%.3f', index=False) country = us.get_country_by_iso3(iso3) meta = [("name", "%s - %s [SIDS RCM]" % (country, config["indicator"])), ("originalsource", mf.ix["Capacity"]["Source"]), ("proximatesource", "SIDS RCM"), ("dataset", config["indicator"]), ("description", config["description"]), ("note", mf.ix["Capacity"]["Note"]), ("unit", mf.ix["Capacity"]["Unit"]), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("file", filename), ("filehash", us.githash(filepath)), ("columns", "description,value")] metafile = config["gen_2_dir"] + filestem + "_meta.csv" pd.DataFrame(meta, columns=["key", "value"]).to_csv(metafile, encoding="utf8", float_format='%.3f', index=False) filelist.append(filestem) pd.DataFrame(filelist).to_csv(config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", index=False, header=False) us.log("%i series saved to %s" % (len(filelist), config["gen_2_dir"]))
filepath = config["gen_2_dir"] + filename df.to_csv(filepath, encoding="utf8", float_format='%.3f', index=False) country = us.get_country_by_iso3(iso3) meta = [("name", "%s - %s [SIDS RCM]" % (country, config["indicator"])), # ("originalsource", mf.ix[keycol]["Source"]), ("originalsource", "SIDS RCM"), ("proximatesource", "SIDS RCM"), ("dataset", config["indicator"]), ("description", config["description"]), # ("note", mf.ix[keycol]["Note"]), # ("unit", mf.ix[keycol]["Unit"]), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("file", filename), ("filehash", us.githash(filepath)), ("columns", "description,value") ] metafile = config["gen_2_dir"] + filestem + "_meta.csv" try: pd.DataFrame(meta,columns = ["key","value"]).to_csv(metafile, encoding="utf8", float_format='%.3f',index=False) except Exception: pd.DataFrame(meta,columns = ["key","value","notes"]).to_csv(metafile, encoding="utf8", float_format='%.3f',index=False) filelist.append(filestem) pd.DataFrame(filelist).to_csv(config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", index=False, header=False) us.log("%i series saved to %s" % (len(filelist), config["gen_2_dir"])) # <markdowncell>
cf["value"] = cf["value"].apply(lambda x: config["multiplier"] * x) #print (cf) cc = fix_country(country) iso3 = us.get_iso3_by_country(cc) filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename cf.to_csv(filepath, encoding="utf8", float_format='%.3f', index=False) meta = [ ("name", "%s - Percentage of population with Internet access [ITU]" % (cc)), ("dataset", "International Telecommunications Union database"), ("description", ("Pecentage of population of %s from the International Telecommunications Union" ) % (cc)), ("file", filename), ("filehash", us.githash(filepath)), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("originalsource", "International Telecommunications Union"), ("proximatesource", "UN Data"), ("columns", "year,value") ] mf = pd.DataFrame(meta, columns=["key", "value"]) metafile = config["gen_2_dir"] + filestem + "_meta.csv" mf.to_csv(metafile, encoding="utf8", index=False) filelist.append(filestem) pd.DataFrame(filelist).to_csv(config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", index=False, header=False)