def build_files(df, config): filelist = [] countrylist = [] for iso3 in us.get_index_set(df): try: idf = df.ix[iso3] if ( type(idf) == pd.Series ): # idf a Series if there is only one element in it, but we want a DataFrame always idf = pd.DataFrame([idf]) idf = idf[["Year", "Value", "Source", "Notes"]] idf.columns = ["year", "value", "source", "note"] mult = config["multiplier"] if mult: if (mult <= 1 and mult >= -1) or not type(mult) is int: idf["value"] = idf["value"].apply(lambda x: x * mult) else: idf["value"] = idf["value"].apply(lambda x: int(x * mult)).astype(object) idf["source"] = idf["source"].apply(lambda x: config["source"]) idf["note"] = idf["note"].apply(lambda x: get_notes(str(x), config)) filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename us.log(filepath) idf.to_csv(filepath, encoding="utf8", index=False) country = us.get_country_by_iso3(iso3) meta = [ ("name", "%s - %s [CEPALStat]" % (country, config["indicator"])), ("originalsource", config["source"]), ("proximatesource", "CEPALStat"), ("dataset", config["indicator"] + " [" + config["indicator_id"] + "]"), ("description", config["definition"]), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("file", filename), ("filehash", us.githash(filepath)), ("columns", "year,value,source,notes"), ] metafile = config["gen_2_dir"] + filestem + "_meta.csv" pd.DataFrame(meta, columns=["key", "value"]).to_csv( metafile, encoding="utf8", float_format="%.3f", index=False ) filelist.append([filestem]) countrylist.append(country) except Exception as strerror: us.log("ERROR: Failed to build data for %s" % iso3) us.log(sys.exc_info()) traceback.print_tb(sys.exc_info()[2]) fldf = pd.DataFrame(filelist, index=countrylist).sort_index() fldf.to_csv( config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", float_format="%.1f", index=False, header=False, ) return fldf
def build_files(df, config): filelist = [] countrylist = [] for iso3 in us.get_index_set(df): try: idf = df.ix[iso3] if type(idf) == pd.Series: #idf a Series if there is only one element in it, but we want a DataFrame always idf = pd.DataFrame([idf]) idf = idf[["Year","Value","Source","Notes"]] idf.columns = ["year","value","source","notes"] mult = config["multiplier"] if mult: if (mult <= 1 and mult >= -1) or not type(mult) is int: idf["value"] = idf["value"].apply(lambda x : x * mult) else: idf["value"] = idf["value"].apply(lambda x : int(x * mult)).astype(object) idf["source"] = idf["source"].apply(lambda x : config["source"]) idf["notes"] = idf["notes"].apply(lambda x : get_notes(str(x), config)) filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename us.log(filepath) idf.to_csv(filepath, encoding="utf8", index=False) country = us.get_country_by_iso3(iso3) meta = [("name", "%s - %s [CEPALStat]" % (country, config["indicator"])), ("originalsource", config["source"]), ("proximatesource", "CEPALStat"), ("dataset", config["indicator"] + " [" + config["indicator_id"] + "]"), ("description", config["definition"]), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("file", filename), ("filehash", us.githash(filepath)), ("columns", "year,value,source,notes") ] metafile = config["gen_2_dir"] + filestem + "_meta.csv" pd.DataFrame(meta,columns = ["key","value"]).to_csv(metafile, encoding="utf8", float_format='%.3f',index=False) filelist.append([filestem]) countrylist.append(country) except Exception as strerror: us.log("ERROR: Failed to build data for %s" % iso3) us.log(sys.exc_info()) traceback.print_tb(sys.exc_info()[2]) fldf = pd.DataFrame(filelist, index=countrylist).sort_index() fldf.to_csv(config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", float_format='%.1f', index=False, header=False) return fldf
filelist = [] us.mkdirs(config["gen_2_dir"]) for (iso3, df, mf) in dataset: us.log(iso3) try: df.columns = ["description", "value"] except Exception: # The data for St Kitts only has one column. We are excluding it for now. us.log(sys.exc_info()) continue filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename df.to_csv(filepath, encoding="utf8", float_format='%.3f', index=False) country = us.get_country_by_iso3(iso3) meta = [("name", "%s - %s [SIDS RCM]" % (country, config["indicator"])), ("originalsource", mf.ix["Capacity"]["Source"]), ("proximatesource", "SIDS RCM"), ("dataset", config["indicator"]), ("description", config["description"]), ("note", mf.ix["Capacity"]["Note"]), ("unit", mf.ix["Capacity"]["Unit"]), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("file", filename), ("filehash", us.githash(filepath)), ("columns", "description,value")] metafile = config["gen_2_dir"] + filestem + "_meta.csv" pd.DataFrame(meta, columns=["key", "value"]).to_csv(metafile, encoding="utf8", float_format='%.3f',
us.log(iso3) try: if len(df.columns) == 2: df.columns = ["year", "value"] else: df.columns = ["year", "value", "notes"] except Exception: # The data for St Kitts only has one column. We are excluding it for now. us.log(sys.exc_info()) continue filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename df.to_csv(filepath, encoding="utf8", float_format='%.3f', index=False) country = us.get_country_by_iso3(iso3) meta = [("name", "%s - %s [SIDS RCM]" % (country, config["indicator"])), # ("originalsource", mf.ix[keycol]["Source"]), ("originalsource", "SIDS RCM"), ("proximatesource", "SIDS RCM"), ("dataset", config["indicator"]), ("description", config["description"]), # ("note", mf.ix[keycol]["Note"]), # ("unit", mf.ix[keycol]["Unit"]), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("file", filename), ("filehash", us.githash(filepath)), ("columns", "description,value") ]