def test_risk_id(): ''' draws can accept risk_id or rei_id ''' df = draws({'risk_ids': [84]}, source='risk', location_ids=[1], draw_type='rr') return df
def test_big_hdf(): ''' some epi draws are stored as one big hdf ''' df = draws({'rei_ids': [94]}, source='risk', location_ids=101, year_ids=1990, sex_ids=1, draw_type='exposure') return df
def test_exposure_draws(): df = draws({'rei_ids': [84]}, source='risk', year_ids=[1990], location_ids=[102], age_group_ids=[6], sex_ids=[2], draw_type='exposure') # there should be one row per categorical risk assert len(df) == len( df.modelable_entity_id.unique()), ('more than one row per ME returned') return df
def test_string_format(): ''' some identifier variables were returning as strings''' df1 = draws({'rei_ids': [114]}, source='risk', location_ids=101, year_ids=1990, sex_ids=1, draw_type='rr', verbose=True) df2 = draws({'modelable_entity_ids': [1449]}, source='epi', location_ids=[132], verbose=True) for df in [df1, df2]: for col in df.columns: if col.endswith('id'): assert df[col].dtype != 'O' return df1, df2
def test_rr_draws(): df = draws({'rei_ids': [128]}, source='risk', year_ids=[1990], location_ids=[102], age_group_ids=[15], sex_ids=[2], draw_type='rr') # there should be one row per categorical risk assert len(df) == len(df.parameter.unique()), ('more than one row per ' 'category returned') return df
def test_como_version(): ''' version should override status''' df = draws({'sequela_ids': [1]}, source='como', location_id=[1], sex_id=[1], year_id=[1990], verbose=True, age_group_id=[12], metric_id=[1], status='best', version=86) return df
def test_missing_files(): ''' some risk exposure h5 files were missing because save results called h5 conversion from csv and that occasionally fails. If get_draws gets a file not found error, it should try to fall back on reading the original csv''' df = draws({'modelable_entity_ids': [8946]}, location_ids=[44553], sex_ids=[2], year_ids=[2010], status=56300, verbose=True, source='epi') return df
def test_como_gopher_interface(): '''instead of calling como.draws, call gopher.draws for como results''' df = draws({'sequela_ids': [1]}, source='como', location_id=[1], sex_id=[1], year_id=[1990], verbose=True, age_group_id=[12], metric_id=[1], status='latest') assert not df.empty assert len(df.groupby(['sequela_id', 'measure_id'])) == len(df.measure_id.unique()) return df
def test_risk_dups(): ''' some risk exposure draws had duplicate cat4 rows ''' df = draws({'rei_ids': [94]}, source='risk', location_ids=101, year_ids=1990, sex_ids=1, age_group_ids=4, draw_type='exposure') assert ~df.duplicated( subset=[col for col in df.columns if 'draw' not in col]).any(), ( 'duplicates found') return df
status = args['status'] args.pop('status') # convert kwargs from a list of single key dicts to one dict with # multiple keys, if any specified from get_draws.ado for d in args.pop('kwargs'): for k, v in d.iteritems(): args[k] = v # Get draws try: df = draws(gbd_id_dict, measure_ids=args.pop('measure_ids'), location_ids=args.pop('location_ids'), year_ids=args.pop('year_ids'), age_group_ids=args.pop('age_group_ids'), sex_ids=args.pop('sex_ids'), status=status, source=args.pop('source'), include_risks=args.pop('include_risks'), **args) except Exception as e: # catch all exceptions, because we need to write something to stdout # no matter what error. Get_draws.ado creates a pipe and reads from # it -- if nothing is written to the pipe, stata hangs print "Encountered error while reading draws: {}".format(e) raise # stream results to sys.stdout for get_draws.ado to read in # Use a dct because stata is faster at reading those to_dct(df=df, fname=sys.stdout, include_header=True)