def read_helper_files(parent_dir, location_id): """ Read in and return helper DataFrames. Returns: Config DataFrame containing age_weights for age-standardized rate calculation Most detailed locations """ # Config file config = read_json(parent_dir + r'FILEPATH.json') # Age weights age_weights = pd.read_csv(parent_dir + r'FILEPATH.csv') # Most-detailed location location_hierarchy = pd.read_csv(parent_dir + r'FILEPATH.csv') estimate_locations = location_hierarchy.ix[ location_hierarchy['is_estimate'] == 1, 'location_id'].tolist() if int(location_id) in estimate_locations: most_detailed_location = True else: most_detailed_location = False return age_weights, most_detailed_location, config
def read_helper_files(parent_dir): """Read in and return helper DataFrames. Returns: DataFrame containing config file. """ # Config file config = read_json(os.path.join(parent_dir, '_temp/config.json')) return config
def read_helper_files(parent_dir): """ Read in and return helper DataFrames. Returns: DataFrame containing cause hierarchy used for aggregation """ # Config file config = read_json(parent_dir + r'/FILEPATH.json') return config
def read_helper_files(parent_dir): """ Return the config dictionary from json. Arguments: parent_dir (str): Returns: A dictionary containing the configuration specifications for this run. """ return read_json(os.path.join(parent_dir, 'FILEPATH'))
def read_helper_files(parent_dir, location_id): """Read in and return helper DataFrames. Returns: DataFrame containing cause hierarchy used for aggregation """ # Config file config = read_json(os.path.join(parent_dir, 'FILEPATH')) # Cause hierarchy cause_hierarchy = pd.read_csv(os.path.join(parent_dir, 'FILEPATH')) return config, cause_hierarchy
def read_helper_files(parent_dir): ''' Read in and return helper DataFrames. Returns: config list of causes ''' logger = logging.getLogger('correct.read_helper_files') # Config file logger.info('Reading config file') config = read_json(parent_dir + r'FILEPATH.json') causes = sorted( pd.read_csv(parent_dir + 'FILEPATH.csv').cause_id.unique(), key=int) return config, causes
def read_helper_files(parent_dir): """ Read in and return helper DataFrames. Returns: DataFrame containing cause hierarchy used for aggregation """ # Config file config = read_json(parent_dir + r'/_temp/config.json') # Location hierarchy location_hierarchy = pd.read_csv(parent_dir + r'/_temp/location_hierarchy.csv') location_ids = location_hierarchy['location_id'].drop_duplicates().tolist() return config, location_ids
def read_helper_files(parent_dir, location_id, sex_id): """Read in and return helper DataFrames. Returns: best_models: DataFrame containing all best model ids and relevant cause metadata for a given sex eligible_data: a DataFrame containing all demographics and their restriction status """ logger = logging.getLogger('correct.read_helper_files') # Config file logger.info('Reading config file') config = read_json(parent_dir + r'FILEPATH') # List of best models (excluding shocks) logger.info('Reading best models') best_models = pd.read_csv(parent_dir + r'FILEPATH') best_models = best_models.loc[(best_models['sex_id'] == int(sex_id)) & (best_models['model_version_type_id'] .isin(list(range(0, 5))))] # List of eligible data logger.info('Reading eligible models') eligible_data = pd.read_csv(parent_dir + r'FILEPATH.csv') # Space-time restrictions spacetime_restriction_data = pd.read_csv( parent_dir + 'FILEPATH') # Envelope logger.info('Reading envelope draws') envelope_data = read_envelope_draws(parent_dir + r'FILEPATH.h5', location_id) rename_columns = {} for x in range(1000): rename_columns['env_{}'.format(x)] = 'draw_{}'.format(x) envelope_data = envelope_data.rename(columns=rename_columns) envelope_summ = read_envelope_draws(parent_dir + r'FILEPATH.h5', location_id, key='summary') return (config, best_models, eligible_data, spacetime_restriction_data, envelope_data, envelope_summ)
def read_helper_files(parent_dir): """Read in and return helper DataFrames. Returns: config list of causes """ logger = logging.getLogger('correct.read_helper_files') # Config file logger.info('Reading config file') config = read_json(os.path.join(parent_dir, '_temp/config.json')) causes = sorted(pd.read_csv( os.path.join( parent_dir, '_temp/cause_aggregation_hierarchy.csv')).cause_id.unique(), key=int) return config, causes
def read_helper_files(parent_dir, location): """ Read in and return helper DataFrames. Returns: DataFrame config """ # Config file config = read_json(parent_dir + r'FILEPATH.json') location_hierarchy = pd.read_csv(parent_dir + r'FILEPATH.csv') estimate_locations = location_hierarchy.ix[ location_hierarchy['is_estimate'] == 1, 'location_id'].tolist() if int(location) in estimate_locations: most_detailed_location = True else: most_detailed_location = False return config, most_detailed_location
def read_helper_files(parent_dir, location): """ Read in and return helper DataFrames. Returns: DataFrame config, most_detailed_location bool """ # Config file config = read_json(os.path.join(parent_dir, '_temp/config.json')) location_hierarchy = pd.read_csv( os.path.join(parent_dir, '_temp/location_hierarchy.csv')) estimate_locations = location_hierarchy.loc[ location_hierarchy['is_estimate'] == 1, 'location_id'].tolist() if int(location) in estimate_locations: most_detailed_location = True else: most_detailed_location = False return config, most_detailed_location
def read_helper_files(parent_dir, location_id, sex_name): ''' Read in and return helper DataFrames. Returns: best_models: DataFrame containing all best model ids and relevant cause metadata for a given sex eligible_data: a DataFrame containing all demographics and their restriction status ''' logger = logging.getLogger('correct.read_helper_files') sex_dict = {1: 'male', 2: 'female'} # Config file logger.info('Reading config file') config = read_json(parent_dir + r'/_temp/config.json') # List of best models (excluding shocks) logger.info('Reading best models') best_models = pd.read_csv(parent_dir + r'/_temp/best_models.csv') best_models['sex_name'] = best_models['sex_id'].map(lambda x: sex_dict[x]) best_models = best_models.ix[(best_models['sex_name'] == sex_name)& (best_models['model_version_type_id'].isin(range(0,5)))] # List of eligible data logger.info('Reading eligible models') eligible_data = pd.read_csv(parent_dir + r'/_temp/eligible_data.csv') # Space-time restrictions spacetime_restriction_data = pd.read_csv(parent_dir+'/_temp/spacetime_restrictions.csv') # Envelope logger.info('Reading envelope draws') envelope_data = read_envelope_draws(parent_dir + r'/_temp/envelope.h5', location_id) rename_columns = {} for x in xrange(1000): rename_columns['env_{}'.format(x)] = 'draw_{}'.format(x) envelope_data = envelope_data.rename(columns=rename_columns) return config, best_models, eligible_data, spacetime_restriction_data, envelope_data
def read_helper_files(parent_dir): """ Read in and return helper DataFrames. Returns: DataFrame containing cause hierarchy used for aggregation """ # Config file config = read_json(os.path.join(parent_dir, '_temp/config.json')) # Location hierarchy location_hierarchy = get_location_metadata(gbd_round_id=5, location_set_id=35) location_hierarchy = location_hierarchy[[ 'location_id', 'parent_id', 'level', 'is_estimate', 'most_detailed', 'sort_order' ]] location_ids = location_hierarchy['location_id'].drop_duplicates().tolist() estimate_locations = location_hierarchy.loc[ location_hierarchy['is_estimate'] == 1, 'location_id'].tolist() return config, location_ids, estimate_locations
def read_helper_files(parent_dir, location_id, sex_name): ''' Read in and return helper DataFrames. Returns: best_models: DataFrame containing all best model ids and relevant cause metadata for a given sex eligible_data: a DataFrame containing all demographics and their restriction status ''' logger = logging.getLogger('shocks.read_helper_files') sex_dict = {1: 'male', 2: 'female'} # Config file logger.info('Reading config file') config = read_json(parent_dir + r'/_temp/config.json') # List of best models for shocks logger.info('Reading best models') best_models = pd.read_csv(parent_dir + r'/_temp/best_models.csv') best_models['sex_name'] = best_models['sex_id'].map(lambda x: sex_dict[x]) best_models = best_models.ix[(best_models['sex_name'] == sex_name)&(best_models['model_version_type_id'].isin(range(5,8)))] return config, best_models
def read_helper_files(parent_dir, location_id, sex_id): """Read in and return helper DataFrames. Returns: best_models: DataFrame containing all best model ids and relevant cause metadata for a given sex eligible_data: a DataFrame containing all demographics and their restriction status """ logger = logging.getLogger('shocks.read_helper_files') # Config file logger.info('Reading config file') config = read_json(os.path.join(parent_dir, '_temp/config.json')) # List of best models for shocks/imported_cases/hiv logger.info('Reading best models') best_models = pd.read_csv(parent_dir + r'/_temp/best_models.csv') best_models = best_models.loc[(best_models['sex_id'] == int(sex_id)) & (best_models['model_version_type_id'] .isin(list(range(5, 8))))] return config, best_models
def read_helper_files(parent_dir, location_id, sex_id): ''' Read in and return helper DataFrames. Returns: best_models: DataFrame containing all best model ids and relevant cause metadata for a given sex eligible_data: a DataFrame containing all demographics and their restriction status ''' logger = logging.getLogger('shocks.read_helper_files') # Config file logger.info('Reading config file') config = read_json(parent_dir + r'FILEPATH.json') # List of best models for shocks logger.info('Reading best models') best_models = pd.read_csv(parent_dir + r'FILEPATH.csv') best_models = best_models.ix[(best_models['sex_id'] == int(sex_id)) & (best_models['model_version_type_id'] .isin(range(5, 8)))] return config, best_models
config['eligible_sex_ids'] = eligible_sex_ids config['eligible_cause_ids'] = eligible_cause_ids config['eligible_year_ids'] = codcorrect_years config['eligible_location_ids'] = eligible_location_ids config['diagnostic_year_ids'] = [1990, 2005, 2017] config['change_years'] = change_years config['process_version_id'] = process_version_id write_json(config, parent_dir + r'/_temp/config.json') else: # Read in location data location_data = pd.read_csv( os.path.join(parent_dir, '_temp/location_hierarchy.csv')) # Read in config file config = read_json(os.path.join(parent_dir, '_temp/config.json')) # Read in variables eligible_location_ids = config['eligible_location_ids'] envelope_version_id = config['envelope_version_id'] pop_version_id = config['pop_version_id'] process_version_id = config['process_version_id'] change_years = config['change_years'] # if eligible_year_ids do not match, then do not resume jobs if config['eligible_year_ids'] != codcorrect_years: logging.info("CoDCorrect years do not match!") logging.info("Can't just resume jobs") config['eligible_year_ids'] != codcorrect_years write_json(config, os.path.join(parent_dir, '_temp/config.json')) resume = False
def read_helper_files(parent_dir): return read_json(os.path.join(parent_dir, '_temp/config.json'))
config['eligible_cause_ids'] = eligible_cause_ids config['eligible_year_ids'] = codcorrect_years config['eligible_location_ids'] = eligible_location_ids config['dalynator_export_years_ids'] = codcorrect_years config['diagnostic_year_ids'] = [ 1990, 1995, 2000, 2005, 2010, 2013, 2015 ] write_json(config, parent_dir + r'/_temp/config.json') else: # Read in location data location_data = pd.read_csv(parent_dir + '/_temp/location_hierarchy.csv') # Read in config file config = read_json(parent_dir + r'/_temp/config.json') # Read in variables eligible_location_ids = config['eligible_location_ids'] envelope_version_id = config['envelope_version_id'] # if eligible_year_ids do not match, then do not resume jobs if config['eligible_year_ids'] != codcorrect_years: print "CoDCorrect years do not match!" print "Can't just resume jobs" config['eligible_year_ids'] != codcorrect_years write_json(config, parent_dir + r'/_temp/config.json') resume = False # Generate CoDCorrect jobs codcorrect_job_list = TaskList()
def prep_upload(parent_dir): change_permission(parent_dir, recursively=False) change_permission(parent_dir + r'/_temp/', recursively=True) output_upload_files = read_json(parent_dir + r'/_temp/output_upload.json') return output_upload_files
config['eligible_age_group_ids'] = eligible_age_group_ids config['eligible_sex_ids'] = eligible_sex_ids config['eligible_cause_ids'] = eligible_cause_ids config['eligible_year_ids'] = codcorrect_years config['eligible_location_ids'] = eligible_location_ids config['diagnostic_year_ids'] = [ 1990, 1995, 2000, 2005, 2006, 2010, 2016] write_json(config, parent_dir + r'FILEPATH.json') else: # Read in location data location_data = pd.read_csv( parent_dir + 'FILEPATH.csv') # Read in config file config = read_json(parent_dir + r'FILEPATH.json') # Read in variables eligible_location_ids = config['eligible_location_ids'] envelope_version_id = config['envelope_version_id'] lifetable_version_id = config['lifetable_version_id'] pop_version_id = config['pop_version_id'] # if eligible_year_ids do not match, then do not resume jobs if config['eligible_year_ids'] != codcorrect_years: logging.info("CoDCorrect years do not match!") logging.info("Can't just resume jobs") config['eligible_year_ids'] != codcorrect_years write_json(config, parent_dir + r'/FILEPATH.json') resume = False