Пример #1
0
def import_cod_model_draws(model_version_id, location_id, cause_id, sex_id,
                           required_columns, filter_years=None, db_env=None,
                           envelope=None):
    """Import model draws from CODEm/custom models.

    Read in CODEm/custom model draws from a given filepath (filtered by a
    specific location_id) and then check to make sure that the imported draws
    are not missing any columns and do not have null values.
    """
    logger = logging.getLogger('io.import_cod_model_draws')
    try:
        data = draws(cause_id, lids=[int(location_id)], sids=[int(sex_id)],
                     yids=filter_years, status=model_version_id, n_draws='max',
                     db_env=db_env)
    except DirectoryNotFoundException:
        data = draws(cause_id, lids=[int(location_id)], sids=[int(sex_id)],
                     yids=filter_years, status=model_version_id, n_draws='max',
                     db_env='prod')
    except Exception:
        logger.exception("Failed to read" + '/n' +
                         'Problem demographics were mvid {} cause {}, '
                         'location {}, sex {}, and years {}'
                         .format(model_version_id, cause_id, location_id,
                                 sex_id, ','.join(str(y) for y in filter_years)
                                 ))
        sys.exit(1)
    data = data[data.age_group_id.isin(list(range(2, 22)) + [30, 31, 32, 235])]
    data = add_envelope(data, envelope)
    r = check_data_format(data, required_columns)
    if not r:
        print(model_version_id, r)
        return None
    data = data.loc[:, required_columns]
    return data
Пример #2
0
def import_cod_model_draws(model_version_id, location_id, cause_id, sex_id,
                           required_columns, filter_years=None):
    """ Import model draws from CODEm/custom models

    Read in CODEm/custom model draws from a given filepath (filtered by a
    specific location_id) and then check to make sure that the imported draws
    are not missing any columns and do not have null values.

    """
    logger = logging.getLogger('io.import_cod_model_draws')
    try:
        data = draws(gbd_ids={'cause_ids': [cause_id]}, source='codem',
                     location_ids=[int(location_id)], sex_ids=[int(sex_id)],
                     year_ids=filter_years, status=model_version_id)
        data = data.ix[data.age_group_id.isin(range(2, 22) +
                                              [30, 31, 32, 235])]
    except Exception:
        logger.exception("Failed to read" + '/n' +
                         'Problem demographics were mvid {} cause {}, '
                         'location {}, sex {}, and years {}'
                         .format(model_version_id, cause_id, location_id,
                                 sex_id, ','.join(str(y) for y in filter_years)
                                 ))
        sys.exit()
    r = check_data_format(data, required_columns)
    if not r:
        print model_version_id, r
        return None
    data = data.ix[:, required_columns]
    return data
Пример #3
0
def import_cod_model_draws(model_version_id,
                           location_id,
                           acause,
                           sex_name,
                           required_columns,
                           filter_years=None):
    """ Import model draws from CODEm/custom models

    Read in CODEm/custom model draws from a given filepath (filtered by a specific
    location_id) and then check to make sure that the imported draws are not missing any
    columns and do not have null values.

    """
    sex_dict = {'male': 1, 'female': 2}
    logger = logging.getLogger('io.import_cod_model_draws')
    try:
        # Get file path for CoD model
        draws_filepath = DRAWS_PATH
        # Read in file
        data = read_hdf_draws(draws_filepath,
                              location_id,
                              key="data",
                              filter_sexes=[sex_dict[sex_name]],
                              filter_ages=range(2, 22),
                              filter_years=filter_years)
        data['model_version_id'] = model_version_id
    except IOError:
        logger.warn('Failed to read {}'.format(draws_filepath))
        print model_version_id
        return None
    logger.info('Reading {}'.format(draws_filepath))
    r = check_data_format(data, required_columns)
    if not r:
        print model_version_id, r
        return None
    data = data.ix[:, required_columns]
    return data
Пример #4
0
                                              'parent_id', 'year_id',
                                              'location_id']]

        # Make envelope object
        logging.info("Make envelope object")
        envelope = Envelope(envelope_data, envelope_index_columns,
                            data_columns)

        # Read in draw files
        logging.info("Reading in best model draws")
        raw_data = read_all_model_draws(best_models, raw_data_columns,
                                        filter_years=eligible_year_ids,
                                        env_df=envelope_summ)
        # Check formatting
        logging.info("Checking in best model draws")
        check_data_format(raw_data, raw_data_columns, fail=True)

        # Filter out zeros
        logging.info("Filtering out zeroes")
        data, zeroes = filter_zeros(raw_data, data_columns)

        # Format data for rescale
        logging.info("Formatting data for rescale")
        formatted_data, model_data = format_for_rescale(
            data,
            eligible_data,
            index_columns,
            data_columns,
            envelope_column,
            parent_dir + '/debug',
            '{}_{}_{}'.format(output_version_id, location_id, sex_id))