def create_datapackage(path, update): """create datapackage.json""" from ddf_utils.index import get_datapackage import json if not update: if os.path.exists(os.path.join(path, 'datapackage.json')): click.echo('datapackage.json already exists. skipping') return res = get_datapackage(path) with open(os.path.join(path, 'datapackage.json'), 'w', encoding='utf8') as f: json.dump(res, f, indent=4, ensure_ascii=False) else: get_datapackage(path, update_existing=True) click.echo('Done.')
def run_recipe(recipe, outdir, update, dry_run, show_tree): """generate new ddf dataset with recipe""" import ddf_utils.chef as chef from ddf_utils.index import get_datapackage import json click.echo('building recipe...') recipe = chef.build_recipe(recipe) if show_tree: dag = chef.cook.build_dag(recipe) dag.tree_view() return if update: pass res = chef.run_recipe(recipe) if not dry_run: click.echo('saving result to disk...') chef.dish_to_csv(res, outdir) click.echo('creating datapackage file...') res = get_datapackage(outdir) with open(os.path.join(outdir, 'datapackage.json'), 'w', encoding='utf8') as f: json.dump(res, f, indent=4, ensure_ascii=False) click.echo("Done.")
concs = data['Element'].unique() cdf = pd.DataFrame([], columns=['concept', 'name', 'concept_type']) cdf['name'] = ['Name', 'Country', 'Item', 'Year', *concs] cdf['concept'] = cdf['name'].map(to_concept_id) cdf.concept_type = 'measure' cdf.loc[0, 'concept_type'] = 'string' cdf.loc[1, 'concept_type'] = 'entity_domain' cdf.loc[2, 'concept_type'] = 'entity_domain' cdf.loc[3, 'concept_type'] = 'time' cdf.to_csv(os.path.join(out_path, 'ddf--concepts.csv'), index=False) # datapoints data_ = data[['Country Code', 'Item Code', 'Element', 'Year Code', 'Value']] gs = data_.groupby('Element').groups for k, idx in gs.items(): cid = to_concept_id(k) df = data_.ix[idx].copy() df = df.drop('Element', axis=1) df.columns = ['country', 'item', 'year', cid] path = os.path.join( out_path, 'ddf--datapoints--{}--by--country--item--year.csv').format(cid) df.to_csv(path, index=False) get_datapackage(out_path, use_existing=True, to_disk=True) print('Done.')
# # concept_discrete = extract_concept_discrete(country, series) # concept_discrete.to_csv( # os.path.join(output_dir, 'ddf--concepts--discrete.csv'), # index=False, encoding='utf8') # # print('creating entities files...') # entities_country = extract_entities_country(country, series) # entities_country.to_csv( # os.path.join(output_dir, 'ddf--entities--country.csv'), # index=False, encoding='utf8') # # print('creating datapoints...') # datapoints = extract_datapoints_country_year(data) # for k, v in datapoints.items(): # v[k] = pd.to_numeric(v[k]) # v.to_csv( # os.path.join(output_dir, # 'ddf--datapoints--'+k+'--by--country--year.csv'), # index=False, # encoding='utf8', # # keep 10 digits. this is to avoid pandas # # use scientific notation in the datapoints # # and also keep precision. There are really # # small/big numbers in this datset. # float_format='%.10f' # ) print('generating datapackage file...') datapackage = get_datapackage(output_dir, to_disk=True)
# -*- coding: utf-8 -*- from ddf_utils import chef from ddf_utils.index import get_datapackage import patch import os import json import logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s -%(levelname)s- %(message)s', datefmt="%H:%M:%S" ) out_dir = '../../' recipe_file = '../recipes/recipe_main.yaml' if __name__ == '__main__': # removing old files for f in os.listdir(out_dir): if f.startswith("ddf--"): os.remove(os.path.join(out_dir, f)) recipe = chef.build_recipe(recipe_file) res = chef.run_recipe(recipe, serve=True, outpath=out_dir) patch.do_all_changes() datapackage = get_datapackage(out_dir, use_existing=True, to_disk=True) print('Done.')
def copy_other_files(): for f in os.listdir(source_path): if 'entities' in f or 'concepts' in f or f == 'datapackage.json': shutil.copy(os.path.join(source_path, f), out_path) def apply_path_concepts(): from ddf_utils.patch import apply_patch new_concepts = apply_patch(os.path.join(source_path, 'ddf--concepts.csv'), './concept_patch.csv') new_concepts.to_csv(os.path.join(out_path, 'ddf--concepts.csv'), index=False) if __name__ == '__main__': datapoints_by_basomrade_gender() datapoints_by_municipality() entity_gender() copy_other_files() apply_path_concepts() get_datapackage(out_path, to_disk=True, use_existing=True) print('Done.') print( 'Please manually edit the entries in datapackage.json for ' 'ddf--datapoints--indicators--by--gender--municipality--year.csv and ' 'ddf--datapoints--indicators--by--gender--municipality--year.csv')
concept_discrete = extract_concept_discrete(country, series) concept_discrete.to_csv( os.path.join(output_dir, 'ddf--concepts--discrete.csv'), index=False, encoding='utf8') print('creating entities files...') entities_country = extract_entities_country(country, series) entities_country.to_csv( os.path.join(output_dir, 'ddf--entities--country.csv'), index=False, encoding='utf8') print('creating datapoints...') datapoints = extract_datapoints_country_year(data) for k, v in datapoints.items(): v[k] = pd.to_numeric(v[k]) v.to_csv( os.path.join(output_dir, 'ddf--datapoints--'+k+'--by--country--year.csv'), index=False, encoding='utf8', # keep 10 digits. this is to avoid pandas # use scientific notation in the datapoints # and also keep precision. There are really # small/big numbers in this datset. float_format='%.10f' ) print('generating datapackage file...') datapackage = get_datapackage(output_dir, to_disk=True)
import os import json import logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s -%(levelname)s- %(message)s', datefmt="%H:%M:%S" ) out_dir = '../../' recipe_file = '../recipes/recipe_main.yaml' if __name__ == '__main__': # removing old files for f in os.listdir(out_dir): if f.startswith("ddf--"): os.remove(os.path.join(out_dir, f)) recipe = chef.build_recipe(recipe_file) res = chef.run_recipe(recipe) print('saving result to disk...') chef.dish_to_csv(res, out_dir) patch.do_all_changes() # TODO: keep older datapacakge's basic info(author etc) datapackage = get_datapackage(out_dir) with open(os.path.join(out_dir, 'datapackage.json'), 'w', encoding='utf8') as f: json.dump(datapackage, f, indent=4, ensure_ascii=False) print('Done.')