def build_files(df, config): filelist = [] countrylist = [] for iso3 in us.get_index_set(df): try: idf = df.ix[iso3] if ( type(idf) == pd.Series ): # idf a Series if there is only one element in it, but we want a DataFrame always idf = pd.DataFrame([idf]) idf = idf[["Year", "Value", "Source", "Notes"]] idf.columns = ["year", "value", "source", "note"] mult = config["multiplier"] if mult: if (mult <= 1 and mult >= -1) or not type(mult) is int: idf["value"] = idf["value"].apply(lambda x: x * mult) else: idf["value"] = idf["value"].apply(lambda x: int(x * mult)).astype(object) idf["source"] = idf["source"].apply(lambda x: config["source"]) idf["note"] = idf["note"].apply(lambda x: get_notes(str(x), config)) filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename us.log(filepath) idf.to_csv(filepath, encoding="utf8", index=False) country = us.get_country_by_iso3(iso3) meta = [ ("name", "%s - %s [CEPALStat]" % (country, config["indicator"])), ("originalsource", config["source"]), ("proximatesource", "CEPALStat"), ("dataset", config["indicator"] + " [" + config["indicator_id"] + "]"), ("description", config["definition"]), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("file", filename), ("filehash", us.githash(filepath)), ("columns", "year,value,source,notes"), ] metafile = config["gen_2_dir"] + filestem + "_meta.csv" pd.DataFrame(meta, columns=["key", "value"]).to_csv( metafile, encoding="utf8", float_format="%.3f", index=False ) filelist.append([filestem]) countrylist.append(country) except Exception as strerror: us.log("ERROR: Failed to build data for %s" % iso3) us.log(sys.exc_info()) traceback.print_tb(sys.exc_info()[2]) fldf = pd.DataFrame(filelist, index=countrylist).sort_index() fldf.to_csv( config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", float_format="%.1f", index=False, header=False, ) return fldf
def build_files(df, config): filelist = [] countrylist = [] for iso3 in us.get_index_set(df): try: idf = df.ix[iso3] if type(idf) == pd.Series: #idf a Series if there is only one element in it, but we want a DataFrame always idf = pd.DataFrame([idf]) idf = idf[["Year","Value","Source","Notes"]] idf.columns = ["year","value","source","notes"] mult = config["multiplier"] if mult: if (mult <= 1 and mult >= -1) or not type(mult) is int: idf["value"] = idf["value"].apply(lambda x : x * mult) else: idf["value"] = idf["value"].apply(lambda x : int(x * mult)).astype(object) idf["source"] = idf["source"].apply(lambda x : config["source"]) idf["notes"] = idf["notes"].apply(lambda x : get_notes(str(x), config)) filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename us.log(filepath) idf.to_csv(filepath, encoding="utf8", index=False) country = us.get_country_by_iso3(iso3) meta = [("name", "%s - %s [CEPALStat]" % (country, config["indicator"])), ("originalsource", config["source"]), ("proximatesource", "CEPALStat"), ("dataset", config["indicator"] + " [" + config["indicator_id"] + "]"), ("description", config["definition"]), ("category", config["indicator_category"]), ("type", config["indicator_type"]), ("file", filename), ("filehash", us.githash(filepath)), ("columns", "year,value,source,notes") ] metafile = config["gen_2_dir"] + filestem + "_meta.csv" pd.DataFrame(meta,columns = ["key","value"]).to_csv(metafile, encoding="utf8", float_format='%.3f',index=False) filelist.append([filestem]) countrylist.append(country) except Exception as strerror: us.log("ERROR: Failed to build data for %s" % iso3) us.log(sys.exc_info()) traceback.print_tb(sys.exc_info()[2]) fldf = pd.DataFrame(filelist, index=countrylist).sort_index() fldf.to_csv(config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", float_format='%.1f', index=False, header=False) return fldf
def get_meta_map(mf): metamap = {} mf = pd.read_csv(metafile, encoding="utf-8") source = mf.ix[mf["Key"] == "source"]["Value"] if len(source) > 0: metamap["source"] = clip_period(source[source.index[0]].strip()) indicator = mf.loc[mf["Key"] == "indicator"]["Value"] metamap["indicator"] = clip_period(indicator[indicator.index[0]].strip()) definition = mf.loc[mf["Key"] == "definition"]["Value"] if len("definition") > 0: metamap["definition"] = clip_period(definition[definition.index[0]].strip()) nf = mf.loc[mf["Key"] == "note"][["ID","Value"]] nf = nf.set_index(["ID"]) for index in us.get_index_set(nf): note = nf.ix[index]["Value"].strip() note = clip_period(note) metamap[str(index)] = note return metamap
import base64 from settings_statmart import * import utils_statmart as us config = {'gen_1_dir': statmart_facts_gen1 + 'profiles/', 'gen_2_dir': statmart_facts_gen2 + 'profiles/', 'prefix': 'profile', 'suffix': ''} # <codecell> countries = us.get_index_set(pd.DataFrame(us.load_carib_country_dict(key_column="name")).T) chatter = True def elog(s): if chatter: print(s) def formatComputerReadableString(strn): if strn == "U.S. Virgin Islands": strn = "United States Virgin Islands" return(strn.strip().lower().replace(" ","-")) countries = sorted(list(map(lambda x: (x,formatComputerReadableString(x)), countries))) countries # <codecell>
import http.client import base64 from settings_statmart import * import utils_statmart as us config = { 'gen_1_dir': statmart_facts_gen1 + 'profiles/', 'gen_2_dir': statmart_facts_gen2 + 'profiles/', 'prefix': 'profile', 'suffix': '' } # <codecell> countries = us.get_index_set( pd.DataFrame(us.load_carib_country_dict(key_column="name")).T) chatter = True def elog(s): if chatter: print(s) def formatComputerReadableString(strn): if strn == "U.S. Virgin Islands": strn = "United States Virgin Islands" return (strn.strip().lower().replace(" ", "-"))
# <codecell> statfile = config["gen_1_dir"] + indicator_id + "_all.csv" df = pd.read_csv(statfile, encoding="utf-8", index_col=["Item"]) df = df.transpose() sample = df["Construction"].transpose() sample[11:20] # <markdowncell> # We need a list of sectors so we can build a map out of them. __Should any sectors be added to the data in the future, the additional fields will need to be added in the sectorMap, below.__ If the additional sector fields are not added, an error will occur during the file generation step, when the sector name is not found in sector_map. # <codecell> sectors = us.get_index_set(df.transpose()) sectors # <markdowncell> # Here is the sector_map, which maps every long name of a sector (see set listing above) to a short one for use in the file system. We also ensure that a directory exists to corresponding to each sector in the map. # <codecell> sector_map = {'Agriculture, hunting and forestry':'aghufo', 'Agriculture, hunting, forestry and fishing':'aghufofi', 'Construction':'cstrn', 'Electricity, gas and water supply':'egws', 'Financial intermediation services indirectly measured (FISIM)':'fisim', 'Financial intermediation, real estate, renting and business activities':'firerba', 'Fishing':'fish',