示例#1
0
def _get_denpendencies(dataset, all_datasets, include_indirect=False):
    try:
        etl_type, fn = all_datasets[dataset]
    except KeyError:  # not open_numbers datasets
        return list()

    if etl_type == 'recipe':
        dataset_path = osp.join(datasets_dir, dataset)
        etl_dir = osp.join(dataset_path, 'etl/scripts')
        recipe = osp.join(etl_dir, fn)
        logging.info("using recipe file: " + fn)
        chef = Chef.from_recipe(recipe, ddf_dir=datasets_dir)
        dependencies = list()
        for i in chef.ingredients:
            if i.dataset is not None:
                dependencies.append(i.dataset)
                if include_indirect:
                    for d in _get_denpendencies(i.dataset,
                                                all_datasets,
                                                include_indirect=True):
                        dependencies.append(d)
        dependencies = list(set(dependencies))
        logging.info("dependencies: {}".format(dependencies))
        return dependencies
    else:
        return list()
示例#2
0
def test_chef_load_recipe():
    recipe_file = os.path.join(wd, 'recipes/test_flatten.yml')
    chef = Chef.from_recipe(recipe_file)
    try:
        chef.validate()
    except ChefRuntimeError:
        pass
    assert 1
示例#3
0
def test_build_dictionary():
    from ddf_utils.chef.api import Chef
    from ddf_utils.chef.helpers import build_dictionary

    d = {'China': 'chn', 'USA': 'usa'}
    c = Chef()
    assert build_dictionary(c, d) == d

    d2 = {
        "imr_lower": "infant_mortality_lower",
        "imr_median": "infant_mortality_median",
        "imr_upper": "infant_mortality_upper"
    }

    dfp = os.path.join(wd, 'chef', 'translation_dictionaries')
    fp = 'indicators_cme_to_sg.json'
    c.add_config(dictionaries_dir=dfp)
    assert build_dictionary(c, fp) == d2
示例#4
0
文件: cli.py 项目: semio/ddf_utils
def build_recipe(recipe, format):
    """create a complete recipe by expanding all includes in the input recipe."""
    from ddf_utils.chef.api import Chef
    chef = Chef.from_recipe(recipe)
    fp = click.open_file('-', 'w')
    if format == 'json':
        import json
        json.dump(recipe, fp, indent=4, ensure_ascii=False)
    elif format == 'yaml':
        import yaml
        yaml.dump(recipe, fp)
示例#5
0
文件: cli.py 项目: semio/ddf_utils
def run_recipe(recipe, outdir, ddf_dir, update, dry_run, gen_dp, show_tree):
    """generate new ddf dataset with recipe"""
    from ddf_utils.chef.api import Chef
    from ddf_utils.package import create_datapackage
    from ddf_utils.io import dump_json
    import json

    coloredlogs.install(logger=logging.getLogger('Chef'),
                        fmt='%(asctime)s %(name)s %(levelname)s %(message)s',
                        level=LOG_LEVEL)

    click.echo('building recipe...')
    if ddf_dir:
        chef = Chef.from_recipe(recipe, ddf_dir=ddf_dir)
    else:
        chef = Chef.from_recipe(recipe)
    if show_tree:
        chef.dag.tree_view()
        return
    if update:
        pass
    serve = not dry_run
    chef.run(serve=serve, outpath=outdir)
    if serve and gen_dp:
        click.echo('creating datapackage file...')
        datapackage_path = os.path.join(outdir, 'datapackage.json')
        if os.path.exists(datapackage_path):
            click.echo('backup old datapackage.json to datapackage.json.bak')
            shutil.copyfile(datapackage_path, os.path.join(outdir, 'datapackage.json.bak'))
            dp_old = json.load(open(datapackage_path))
            # copy translations info. other info should be in the recipe.
            if 'translations' in dp_old.keys():
                chef = chef.add_metadata(translations=dp_old['translations'])
        dump_json(os.path.join(outdir, 'datapackage.json'),
                  create_datapackage(outdir, gen_schema=True, **chef.metadata))
    click.echo("Done.")
# -*- coding: utf-8 -*-

import os
from ddf_utils.chef.api import Chef

out_dir = '../../'
recipe_file = '../recipes/recipe_main.yaml'

try:
    datasets_dir = os.environ['DATASETS_DIR']
except KeyError:
    datasets_dir = '../../../'

if __name__ == '__main__':
    chef = Chef.from_recipe(recipe_file, ddf_dir=datasets_dir)
    chef.run(serve=True, outpath=out_dir)
示例#7
0
def test_ingredients():
    chef = Chef()
    chef = chef.add_config(ddf_dir=os.path.join(wd, 'datasets'))

    i = ingredient_from_dict(dictionary={
        'id': 'ddf--cme',
        'dataset': 'ddf--cme',
        'key': 'country, year',
        'value': {
            '$in': ['*lower']
        }
    }, **chef.config)
    assert set(list(i.get_data().keys())) == set(['imr_lower'])

    i = ingredient_from_dict(dictionary={
        'id': 'ddf--cme',
        'dataset': 'ddf--cme',
        'key': 'country, year',
        'value': {
            '$nin': ['*lower']
        }
    }, **chef.config)
    assert set(list(i.get_data().keys())) == set(['imr_upper', 'imr_median'])

    i = ingredient_from_dict(dictionary={
        'id': 'ddf--cme',
        'dataset': 'ddf--cme',
        'key': 'country, year',
        'value': {
            '$nin': ['imr_lower', 'imr_upper']
        }
    }, **chef.config)
    assert set(list(i.get_data().keys())) == set(['imr_median'])

    i = ingredient_from_dict(dictionary={
        'id': 'ddf--cme',
        'dataset': 'ddf--cme',
        'key': 'country, year',
        'value': {
            '$in': ['imr_lower', 'imr_upper']
        }
    }, **chef.config)
    assert set(list(i.get_data().keys())) == set(['imr_lower', 'imr_upper'])

    i = ingredient_from_dict(dictionary={
        'id': 'ddf--cme',
        'dataset': 'ddf--cme',
        'key': 'country, year',
        'value': {
            '$in': ['imr_lower', 'lsdf']
        }
    }, **chef.config)
    assert set(list(i.get_data().keys())) == set(['imr_lower'])

    i = ingredient_from_dict(dictionary={
        'id': 'ddf--cme',
        'dataset': 'ddf--cme',
        'key': 'country, year',
        'value': {
            '$nin': ['imr_*']
        }
    }, **chef.config)
    try:
        i.get_data()
    except IngredientError:
        pass

    i = ingredient_from_dict(dictionary={
        'id': 'ddf--dummy',
        'dataset': 'ddf--gapminder--dummy_companies',
        'key': 'synonym, region'
    }, **chef.config)
    assert set(list(i.get_data().keys())) == set(['region'])
示例#8
0
def test_chef_api_call():
    from ddf_utils.chef.model.dag import DAG
    from ddf_utils.chef.model.ingredient import DataPointIngredient
    # create empty chef
    dag = DAG()
    Chef(dag=dag, metadata={}, config={}, cooking={}, serving=[])

    # create chef and add config
    chef = Chef()

    (chef.add_config(ddf_dir=os.path.join(wd, 'datasets'))
         .add_metadata(id='test_dataset',
                       base=['ddf--bp--energy'])
         .add_ingredient(id='bp-datapoints', dataset='ddf--bp--energy', key='geo, year', value='*')
         .add_procedure(collection='datapoints',
                        procedure='translate_header',
                        ingredients=['bp-datapoints'],
                        result='bp-datapoints-translate',
                        options={'dictionary': {'geo': 'country'}}))

    def multiply_1000(chef, ingredients, result, **options):
        # ingredients = [chef.dag.get_node(x) for x in ingredients]
        ingredient = ingredients[0]

        new_data = dict()
        for k, df in ingredient.get_data().items():
            df_ = df.copy()
            df_[k] = df_[k] * 1000
            new_data[k] = df_

        return DataPointIngredient.from_procedure_result(result, ingredient.key, new_data)

    chef.register_procedure(multiply_1000)
    chef.add_procedure(collection='datapoints',
                       procedure='multiply_1000',
                       ingredients=['bp-datapoints-translate'],
                       result='res')

    chef.serving
    chef.add_dish(['bp-datapoints-translate'], options={})
    chef.to_graph()
    chef.to_graph(node='res')
    chef.to_recipe()
    chef.dag.tree_view()
    chef.validate()
    res = chef.run()

    assert 1
示例#9
0
# -*- coding: utf-8 -*-

import os

from ddf_utils.chef.api import Chef

recipe_file = '../recipes/etl.yml'

if __name__ == '__main__':

    try:
        d = os.environ['DATASETS_DIR']
        chef = Chef.from_recipe(recipe_file, ddf_dir=d)
    except KeyError:
        chef = Chef.from_recipe(recipe_file)

    chef.run(serve=True, outpath='../../')
示例#10
0
# coding: utf8

import os
from ddf_utils.chef.api import Chef

recipe_file = ''
out_dir = '../../'

try:
    datasets_dir = os.environ['DATASETS_DIR']
except KeyError:
    datasets_dir = '../../../'

if __name__ == '__main__':
    chef = Chef.from_recipe(recipe_file)
    chef.add_config(ddf_dir=datasets_dir)
    chef.run(serve=True, outpath=out_dir)
示例#11
0
def chef_fn(fn):
    return Chef.from_recipe(os.path.join(wd, 'recipes', fn),
                            ddf_dir=os.path.join(wd, 'datasets'),
                            procedure_dir=os.path.join(wd, 'procedures'))