Python draws示例，gopher.draws Python示例

示例#1

0

显示文件

def test_risk_id():
    ''' draws can accept risk_id or rei_id '''
    df = draws({'risk_ids': [84]},
               source='risk',
               location_ids=[1],
               draw_type='rr')
    return df

示例#2

0

显示文件

def test_big_hdf():
    ''' some epi draws are stored as one big hdf '''
    df = draws({'rei_ids': [94]},
               source='risk',
               location_ids=101,
               year_ids=1990,
               sex_ids=1,
               draw_type='exposure')

    return df

示例#3

0

显示文件

def test_exposure_draws():
    df = draws({'rei_ids': [84]},
               source='risk',
               year_ids=[1990],
               location_ids=[102],
               age_group_ids=[6],
               sex_ids=[2],
               draw_type='exposure')
    # there should be one row per categorical risk
    assert len(df) == len(
        df.modelable_entity_id.unique()), ('more than one row per ME returned')
    return df

示例#4

0

显示文件

def test_string_format():
    ''' some identifier variables were returning as strings'''
    df1 = draws({'rei_ids': [114]},
                source='risk',
                location_ids=101,
                year_ids=1990,
                sex_ids=1,
                draw_type='rr',
                verbose=True)

    df2 = draws({'modelable_entity_ids': [1449]},
                source='epi',
                location_ids=[132],
                verbose=True)

    for df in [df1, df2]:
        for col in df.columns:
            if col.endswith('id'):
                assert df[col].dtype != 'O'

    return df1, df2

示例#5

0

显示文件

def test_rr_draws():
    df = draws({'rei_ids': [128]},
               source='risk',
               year_ids=[1990],
               location_ids=[102],
               age_group_ids=[15],
               sex_ids=[2],
               draw_type='rr')
    # there should be one row per categorical risk
    assert len(df) == len(df.parameter.unique()), ('more than one row per '
                                                   'category returned')
    return df

示例#6

0

显示文件

def test_como_version():
    ''' version should override status'''
    df = draws({'sequela_ids': [1]},
               source='como',
               location_id=[1],
               sex_id=[1],
               year_id=[1990],
               verbose=True,
               age_group_id=[12],
               metric_id=[1],
               status='best',
               version=86)
    return df

示例#7

0

显示文件

def test_missing_files():
    ''' some risk exposure h5 files were missing because save results
    called h5 conversion from csv and that occasionally fails. If get_draws
    gets a file not found error, it should try to fall back on reading
    the original csv'''
    df = draws({'modelable_entity_ids': [8946]},
               location_ids=[44553],
               sex_ids=[2],
               year_ids=[2010],
               status=56300,
               verbose=True,
               source='epi')

    return df

示例#8

0

显示文件

def test_como_gopher_interface():
    '''instead of calling como.draws, call gopher.draws for como results'''
    df = draws({'sequela_ids': [1]},
               source='como',
               location_id=[1],
               sex_id=[1],
               year_id=[1990],
               verbose=True,
               age_group_id=[12],
               metric_id=[1],
               status='latest')
    assert not df.empty
    assert len(df.groupby(['sequela_id',
                           'measure_id'])) == len(df.measure_id.unique())
    return df

示例#9

0

显示文件

def test_risk_dups():
    ''' some risk exposure draws had duplicate cat4 rows '''
    df = draws({'rei_ids': [94]},
               source='risk',
               location_ids=101,
               year_ids=1990,
               sex_ids=1,
               age_group_ids=4,
               draw_type='exposure')

    assert ~df.duplicated(
        subset=[col for col in df.columns if 'draw' not in col]).any(), (
            'duplicates found')

    return df

示例#10

0

显示文件

    status = args['status']
args.pop('status')

# convert kwargs from a list of single key dicts to one dict with
# multiple keys, if any specified from get_draws.ado
for d in args.pop('kwargs'):
    for k, v in d.iteritems():
        args[k] = v

# Get draws
try:
    df = draws(gbd_id_dict,
               measure_ids=args.pop('measure_ids'),
               location_ids=args.pop('location_ids'),
               year_ids=args.pop('year_ids'),
               age_group_ids=args.pop('age_group_ids'),
               sex_ids=args.pop('sex_ids'),
               status=status,
               source=args.pop('source'),
               include_risks=args.pop('include_risks'),
               **args)

except Exception as e:
    # catch all exceptions, because we need to write something to stdout
    # no matter what error. Get_draws.ado creates a pipe and reads from
    # it -- if nothing is written to the pipe, stata hangs
    print "Encountered error while reading draws: {}".format(e)
    raise

# stream results to sys.stdout for get_draws.ado to read in
# Use a dct because stata is faster at reading those
to_dct(df=df, fname=sys.stdout, include_header=True)