Exemplo n.º 1
0
def get_locations():
    '''
    Description: get list of locations to iterate through for every part of the
    maternal custom process, down to one level of subnationals

    Args: None

    Output: (list) location_ids
    '''
    enginer = dbapis.engine_factory()
    loc_set_version_query = '''
    SELECT location_set_version_id FROM shared.location_set_version
    WHERE location_set_id = 35 AND end_date IS NULL'''
    location_set_version = query_tools.query_2_df(
        loc_set_version_query,
        engine=enginer.engines["cod_prod"]).ix[
        0, 'location_set_version_id']

    query = ('call shared.view_location_hierarchy_history(%s)'
             % location_set_version)
    locations_df = query_tools.query_2_df(query,
                                          engine=enginer.engines["cod_prod"])
    locations = (locations_df[locations_df['most_detailed'] == 1][
                 'location_id'].tolist())

    return locations
Exemplo n.º 2
0
def create_tables(gbd_env):
    code_version = str(subprocess.check_output(["git", "rev-parse", "HEAD"]))
    query = ('SELECT MAX(kit_version_id) AS kit_vers '
             'FROM gbd.kit_version '
             'WHERE kit_id = 2')
    vers_df = query_tools.query_2_df(query, engine=enginer.engines[gbd_env])
    kit_id = vers_df.loc[0, 'kit_vers']
    kit_id = int(kit_id) + 1

    query = ('CALL gbd.new_gbd_process_version(3, 12, "MMR Upload", "%s", '
             'NULL, %s)' % (code_version.rstrip('\n'), kit_id))
    gbd_process = query_tools.query_2_df(query,
                                         engine=enginer.engines[gbd_env])
    return gbd_process
Exemplo n.º 3
0
def get_bundle_id_from_crosswalk(
        crosswalk_version_id: Optional[int],
        session: orm.Session
) -> Optional[int]:
    """
    Gets a bundle version ID from a crosswalk version ID.

    Args:
        crosswalk_version_id: the crosswalk version ID linked to a bundle ID
        session: an epi database session

    Returns:
        Bundle ID linked to the passed crosswalk version ID.

    Raises:
        ValueError: if crosswalk version ID does not exist or is linked to
            a bundle other than the bundle in the config
    """
    if not crosswalk_version_id:
        return None

    bundle_id_df = query_tools.query_2_df(
        queries.GET_BUNDLE_FROM_CROSSWALK,
        session=session,
        parameters={'crosswalk_version_id': crosswalk_version_id}
    )
    if bundle_id_df.empty:
        raise ValueError(
            'Could not retrieve a bundle associated with crosswalk version '
            f'{crosswalk_version_id}'
        )

    return bundle_id_df.at[0, columns.BUNDLE_ID]
Exemplo n.º 4
0
def get_gbd_covariate_ids(
        gbd_covariates: List[str],
        session: orm.Session
) -> pd.DataFrame:
    """
    Gets all GBD covariate IDs and covariate short names.

    Args:
        gbd_covariates: List of GBD covariate short names for which to
            pull covariate IDs
        session: session with the epi database server

    Returns:
        Dataframe containing GBD covariate IDs and covariate short names.

    Raises:
        ValueError: if a covariate short name was specified that does not
            exist in the database
    """
    covariate_df = query_tools.query_2_df(queries.GET_COVARIATE_IDS, session)
    covariates = set(covariate_df[columns.COVARIATE_NAME_SHORT].unique())
    missing_covariates = set(gbd_covariates) - set(covariates)
    if missing_covariates:
        raise ValueError(
            f'Invalid {parameters.GBD_COVARIATES}: '
            f'{", ".join(sorted(missing_covariates))}'
        )
    return covariate_df
Exemplo n.º 5
0
def test_insert_defaults(covariate_db, cov_conn_def):
    """Insert a new row into the covariate.model_version without a best_start,
    best_end, best_user, best_description column to check the default value."""
    covariate_db.add_conn_def(cov_conn_def)
    sesh = get_session(cov_conn_def)

    insert_q = """
        INSERT INTO covariate.model_version (model_version_id, covariate_id, 
            description, code_version, status, is_best, gbd_round_id)
        VALUES ('1', '1', 'testing defaults', 'version 1', '1', '0', '5');
    """
    exec_query(insert_q, session=sesh)
    sesh.commit()

    select_q = """
        SELECT * FROM covariate.model_version WHERE model_version_id = 1;
    """
    res = query_2_df(select_q, session=sesh)

    assert not res.empty
    assert not res.at[0, 'best_user']
    assert not res.at[0, 'best_description']
    assert not res.at[0, 'best_start']
    assert not res.at[0, 'best_end']
    # we need to close the session or the next test will hang
    sesh.close()
Exemplo n.º 6
0
def test_insert_new_row(covariate_db, cov_conn_def):
    """Tests the addition of a single row to the covariate.model table."""
    initialize_and_fill_covariate_db(covariate_db, cov_conn_def)
    sesh = get_session(cov_conn_def)

    insert_q = """
        INSERT INTO covariate.model (model_version_id, year_id, location_id, 
            sex_id, age_group_id, mean_value, upper_value, lower_value)
        VALUES ('1', '2017', '1', '2', '22', '10.4', '11.2', '9.6'),
               ('1', '2016', '1', '2', '22', '10.5', '11.4', '9.4'),
               ('1', '2015', '1', '2', '22', '10.6', '11.6', '9.2'),
               ('1', '2014', '1', '2', '22', '10.7', '11.8', '9'),
               ('1', '2013', '1', '2', '22', '10.8', '12', '8.8');
    """
    exec_query(insert_q, session=sesh)
    sesh.commit()

    select_q = """
        SELECT * FROM covariate.model WHERE model_version_id = 1;
    """
    res = query_2_df(select_q, session=sesh)

    assert not res.empty
    # We added five entries associated with model_version_id 1
    assert len(res) == 5
    # We have to close the session or the next test will hang
    sesh.close()
Exemplo n.º 7
0
 def get_locations(self, location_set_id):
     query = ('SELECT location_id, most_detailed FROM shared.'
              'location_hierarchy_history WHERE location_set_version_id=('
              'SELECT location_set_version_id FROM shared.location_set_'
              'version WHERE location_set_id = %s AND end_date IS NULL) '
              'AND most_detailed = 1' % location_set_id)
     loc_df = query_tools.query_2_df(
         query, engine=self.enginer.engines["cod_prod"])
     return loc_df
Exemplo n.º 8
0
def _verify_no_data_present(table, schema, model_version_id, session):
    qry = f"""
    SELECT count(*) as count from {schema}.{table}
    WHERE
    model_version_id = :model_version_id"""
    count = query_tools.query_2_df(
        qry, session=session,
        parameters={'model_version_id': model_version_id})['count'].iat[0]
    return count == 0
Exemplo n.º 9
0
def get_model_version_ids(sesh):
    """Return a list of model_version_ids.

    Arguments:
        sesh (sqlalchemy.orm.session.Session): connection to our database
            server.
    """
    q = """
        SELECT model_version_id
        FROM covariate.model_version_new
        ORDER BY model_version_id
    """
    res = query_2_df(q, session=sesh)
    return res.model_version_id.tolist()
Exemplo n.º 10
0
 def data_rich_data_poor(self, df):
     '''Splits a given dataframe into two dataframes, based on
         data rich or data poor, and returns the two dfs'''
     query = ('SELECT location_id, parent_id FROM shared.'
              'location_hierarchy_history WHERE location_set_version_id=('
              'SELECT location_set_version_id FROM shared.location_set_'
              'version WHERE location_set_id = 43 AND end_date IS NULL)')
     loc_df = query_tools.query_2_df(
         query, engine=self.enginer.engines["cod_prod"])
     all = df.merge(loc_df, on='location_id', how='inner')
     data_rich = all.query("parent_id==44640")
     data_rich.drop('parent_id', axis=1, inplace=True)
     data_poor = all.query("parent_id==44641")
     data_poor.drop('parent_id', axis=1, inplace=True)
     return data_rich, data_poor
Exemplo n.º 11
0
 def get_asfr(self):
     query = ('SELECT '
              'model.location_id, model.year_id, model.age_group_id, '
              'model.sex_id, model.mean_value AS asfr FROM covariate.model '
              'JOIN covariate.model_version ON model.model_version_id='
              'model_version.model_version_id JOIN covariate.data_version '
              'ON model_version.data_version_id=data_version.'
              'data_version_id JOIN shared.covariate ON data_version.'
              'covariate_id=covariate.covariate_id '
              'AND covariate.last_updated_action!="DELETE" AND is_best=1 '
              'AND covariate.covariate_id= 13 AND model.age_group_id '
              'BETWEEN 7 AND 15 AND model.year_id = %s' % self.year_id)
     asfr = query_tools.query_2_df(query,
                                   engine=self.enginer.engines["cov_prod"])
     asfr['sex_id'] = 2
     loc_df = self.get_locations(35)
     asfr = asfr.merge(loc_df, on='location_id', how='inner')
     asfr.drop('most_detailed', axis=1, inplace=True)
     return asfr
Exemplo n.º 12
0
def execute_select(query, conn_def='epi', params=None):
    '''Run a sql query, return a dataframe. Uses ENVIRONMENT_NAME environment
    variable to determine prod/dev database server.

    Args:
        query(str): sql statement to execute
        conn_def(str, 'epi'): one of 'epi' or 'cod'

    Returns:
        Pandas Dataframe
    '''
    engine = get_engine(conn_def=conn_def,
                        env=settings['env_variables']['ENVIRONMENT_NAME'])
    engine.dispose()
    Session = sessionmaker(bind=engine)
    sesh = Session()
    try:
        df = qt.query_2_df(query, session=sesh, parameters=params)
    finally:
        sesh.close()
    return df
Exemplo n.º 13
0
def test_insert_new_row(covariate_db, cov_conn_def):
    """Inserts a new row into the model_version table and queries it to make
    sure that the model version ids match."""
    covariate_db.add_conn_def(cov_conn_def)
    sesh = get_session(cov_conn_def)

    dummy_mvid = 1
    insert_q = """
        INSERT INTO covariate.model_version (model_version_id, covariate_id, 
            description, code_version, status, is_best, gbd_round_id)
        VALUES ('{mvid}', '1', 'best dummy round 5', 'vers 1', '1', '0', '5');
    """.format(mvid=dummy_mvid)
    exec_query(insert_q, session=sesh)
    sesh.commit()

    select_q = """
        SELECT * FROM covariate.model_version WHERE model_version_id = {mvid};
    """.format(mvid=dummy_mvid)

    res = query_2_df(select_q, session=sesh)
    assert not res.empty
    assert res.at[0, 'model_version_id'] == dummy_mvid
    # we need to close the session or the next test will hang
    sesh.close()
Exemplo n.º 14
0
only_subnats = adjust_df[adjust_df.subnationals == 1]
only_subnats.rename(columns={'location_id': 'parent_id'}, inplace=True)

query = '''SELECT
    location_id, parent_id
FROM
    shared.location_hierarchy_history
WHERE
    location_set_id = 35
        AND location_set_version_id = (SELECT
            location_set_version_id
        FROM
            shared.location_set_version
        WHERE
            location_set_id = 35 AND end_date IS NULL)'''
lhh = query_tools.query_2_df(query, engine=enginer.engines['cod_prod'])
only_subnats = only_subnats.merge(lhh, on='parent_id', how='inner')
only_subnats.drop('parent_id', axis=1, inplace=True)
adjust_df = adjust_df[adjust_df.subnationals == 0]
adjust_df = pd.concat([adjust_df, only_subnats])

# get original codem envelope
env_fname = '%s/deaths_female.h5' % envelope_dir
rlog.log('envelope is %s' % env_fname)
env = pd.read_hdf('%s' % env_fname, 'data')

# we only want maternal age groups and most-detailed locations
env = env[(env.age_group_id.isin(range(7, 16))) & (env.location_id.isin(locs))]

# get prop from Late dismod model
query = ('''SELECT
Exemplo n.º 15
0
from db_tools import dbapis, query_tools
import subprocess
import time

# create enginer
enginer = dbapis.engine_factory()

query = ('''
         SELECT
            *
         FROM
            cod.model_version
         WHERE
            cause_id = 366 AND model_version_type_id = 3 AND is_best = 1''')
while True:
    codem_df = query_tools.query_2_df(query,
                                      engine=enginer.engines["cod_prod"])
    if len(codem_df) == 1:
        break
    else:
        print "CODEm model still running"
        time.sleep(600)

call = ('qsub -cwd -P proj_custom_models -o FILEPATH '
        '-e FILEPATH -pe multi_slot 10 '
        '-N master_maternal cluster_shell.sh 00_master_maternal.py')
subprocess.call(call, shell=True)
         'FROM '
         'gbd.gbd_process_version_metadata gpvm '
         'JOIN '
         'gbd.gbd_process_version USING (gbd_process_version_id) '
         'JOIN '
         'gbd.compare_version_output USING (compare_version_id) '
         'WHERE '
         'compare_version_id = (SELECT '
         'compare_version_id '
         'FROM '
         'gbd.compare_version '
         'WHERE '
         'compare_version_status_id = 1 '
         'AND gbd_round_id = 3) '
         'AND gpvm.metadata_type_id = 5')
model_vers = query_tools.query_2_df(
    query, engine=enginer.engines["gbd_prod"]).loc[0, 'daly_id']

# load dalynator draws for the appropriate cause
dalynator_dir = '/ihme/centralcomp/dalynator/%s/draws/hdfs/' % model_vers
files = []
for root, dirnames, filenames in os.walk('%s' % dalynator_dir):
    for filename in fnmatch.filter(filenames, '*%s.h5' % year_id):
        files.append(os.path.join(root, filename))


def read_file(f):
    return pd.read_hdf(f, 'data', where=[("'cause_id'==%d & 'measure_id'==1"
                                          "& 'metric_id'==1 & 'sex_id'==2"
                                          "& 'rei_id'==0") % cause_id])

draw_list = []
Exemplo n.º 17
0
try:
    from db_tools import dbapis, query_tools
    import db_process_upload
    import maternal_fns
except:
    sys.path.append(str(os.getcwd()).rstrip('/mmr'))
    from db_tools import dbapis, query_tools
    import db_process_upload
    import maternal_fns

enginer = dbapis.engine_factory()
query = ('SELECT cause_id FROM shared.cause_hierarchy_history '
         'WHERE cause_set_id = 8 AND cause_set_version_id = '
         '(SELECT cause_set_version_id FROM shared.cause_set_version '
         'WHERE cause_set_id = 8 and end_date IS NULL)')
causes = (query_tools.query_2_df(
    query, engine=enginer.engines["cod_prod"])['cause_id'].tolist())

# set out directory
date_regex = re.compile('\W')
date_unformatted = str(datetime.now())[0:13]
date_str = date_regex.sub('_', date_unformatted)
out_dir = '/ihme/centralcomp/maternal_mortality/mmr/%s' % date_str
arc_out_dir = '%s/multi_year' % out_dir
mmr_out_dir = '%s/single_year' % out_dir
if not os.path.exists('%s' % out_dir):
    os.makedirs('%s' % out_dir)
if not os.path.exists('%s' % arc_out_dir):
    os.makedirs('%s' % arc_out_dir)
if not os.path.exists('%s' % mmr_out_dir):
    os.makedirs('%s' % mmr_out_dir)
Exemplo n.º 18
0
        c = read_json(credential_path)
        return c[key]['user'], c[key]['password']

    user, password = get_credentials(
        "save_results_cod",
        credential_path=('/ihme/code/central_comp/secrets/'
                         'db_credentials.json'))
    enginer.define_engine(engine_name='cod_prod',
                          server_name='cod',
                          default_schema='cod',
                          envr='prod',
                          user=user,
                          password=password,
                          replace=True)
    try:
        unmark = query_tools.query_2_df(unmark_sql_statement,
                                        engine=enginer.engines['cod_prod'])
    except:
        print "caught the 'no rows returned' bug error of sqlalchemy"

# -------------------------------------------------------------------------

##############################################################
# 03: GET FINAL DEATH COUNTS
# For each subcause, multiply the cause
# fractions from step 1 with the envelope from the late correction of codem,
# giving estimates of death counts for each subtype.
##############################################################

# wait for cause fractions to be saved to the database if it was run this round
maternal_fns.wait('adjustment_save_results', 300)
Exemplo n.º 19
0
def get_best_date(enginer, step, dep_type, dep_id=""):
    '''
    Description: Queries the database for the best_start date of the most
    recent model version for the given cause_id/modelable_entity_id/process.
    Also pulls the most recent date of timestamped files.
    See dependency_map.csv for context.

    Args:
        enginer: connection to the db
        step: step of the process we're on
        dep_type: "cause_id" or "modelable_entity_id"
        dep_id: the modelable_entity_id, cod_correct id, or codem_id for
        which you want to get the best_start date.
        NOTE: dep_id REQUIRED for dismod process

    Output: (datetime) best start date
    '''

    if dep_type == 'cause_id':
        query = '''
            SELECT best_start from cod.model_version where
            best_end IS NULL AND best_start IS NOT NULL AND
            best_start > "2015-01-01 00:00:01" AND cause_id = %s''' % dep_id
        most_recent_best_df = query_tools.query_2_df(
            query, engine=enginer.engines["cod_prod"])
        if len(most_recent_best_df) == 0:
            most_recent_best = datetime(1800, 01, 01, 00, 00, 00)
        else:
            most_recent_best = (most_recent_best_df.ix[0, 'best_start'].
                                to_datetime())

    elif dep_type == 'filename':
        fdict = {}
        for filename in os.listdir(dep_id):
            if 'pafs_draws' in filename:
                if 'hdf' not in filename and not filename.startswith('.'):
                    d = filename.split('_')[2:5]
                    time = ' 00:00:00'
                    d[2] = d[2].rstrip(".csv")
                    ord_date = date(year=int(d[0]), month=int(d[1]),
                                    day=int(d[2])).toordinal()
                    fdict[ord_date] = d
        ord_dates = fdict.keys()
        ord_dates.sort()
        most_recent_str = str(
            fdict[ord_dates[-1]]
        ).replace("['", "").replace("']", "").replace("', '", "-") + time
        most_recent_best = datetime.strptime(
            most_recent_str, "%Y-%m-%d %H:%M:%S")

    elif dep_type == 'modelable_entity_id':
        query = '''
            SELECT best_start from epi.model_version where is_best = 1 AND
            best_end IS NULL AND modelable_entity_id = %s''' % dep_id
        most_recent_best_df = query_tools.query_2_df(
            query, engine=enginer.engines["epi_prod"])
        if len(most_recent_best_df) == 0:
            most_recent_best = datetime(1800, 01, 01, 00, 00, 00)
        else:
            most_recent_best = (most_recent_best_df.ix[0, 'best_start'].
                                to_datetime())

    elif dep_type == 'process':
        query = '''
            SELECT best_start FROM cod.output_version WHERE
            is_best = 1 AND env_version = (SELECT MAX(env_version)
            FROM cod.output_version)'''
        most_recent_best_df = query_tools.query_2_df(
            query, engine=enginer.engines["cod_prod"])
        if len(most_recent_best_df) == 0:
            most_recent_best = datetime(1800, 01, 01, 00, 00, 00)
        elif most_recent_best_df.ix[0, 'best_start'] is None:
            most_recent_best = datetime(1800, 01, 01, 00, 00, 00)
        else:
            most_recent_best = (most_recent_best_df.ix[0, 'best_start'].
                                to_datetime())
    else:
        raise ValueError(
            '''Dep_type must be "cause_id", "modelable_entity_id",
               or "process"'''
        )

    return most_recent_best
Exemplo n.º 20
0
def get_model_vers(process, model_id=None, step=None):
    '''
    Description: Queries the database for the best model_version for the given
    model_id. Can do this for Dismod, Codem, or Codcorrect outputs.

    Args:
        1. (str) process ('dismod', 'codem', or 'codcorrect')
        2. id (model_id for dismod, cause_id for codem, or none for codcorrect)

    Output: (int) best model_version
    '''
    enginer = dbapis.engine_factory()
    enginer.servers["gbd"] = {"prod": "modeling-gbd-db.ihme.washington.edu"}
    enginer.define_engine(strConnection)
    if model_id is not None:
        model_id = int(model_id)

    if process == 'dismod':
        if model_id is None:
            raise ValueError('Must specify a me_id')
        else:
            query = '''SELECT model_version_id from epi.model_version WHERE
            is_best = 1 AND best_end IS NULL AND modelable_entity_id =
            %d''' % model_id
            model_vers_df = query_tools.query_2_df(
                query, engine=enginer.engines['epi_prod'])
            if len(model_vers_df) > 0:
                model_vers = model_vers_df.ix[0, 'model_version_id']
            else:
                model_vers = None
    elif process == 'codem':
        if model_id is None:
            raise ValueError('Must specify a cause_id')
        if step == 2:
            query = '''SELECT MAX(model_version_id) as model_version_id from
              cod.model_version where best_start IS NOT NULL AND
              best_start > '2015-01-01 00:00:01'
              AND cause_id = %d and model_version_type_id = 3''' % model_id
        else:
            query = '''SELECT model_version_id from cod.model_version where
              best_end IS NULL AND best_start IS NOT NULL AND
              best_start > '2015-01-01 00:00:01'
              AND cause_id = %d''' % model_id
        model_vers = query_tools.query_2_df(query, engine=enginer.engines[
            "cod_prod"]).ix[0, 'model_version_id']
    else:
        query = ('SELECT  '
                 'distinct(val) AS daly_id '
                 'FROM '
                 'gbd.gbd_process_version_metadata gpvm '
                 'JOIN '
                 'gbd.gbd_process_version USING (gbd_process_version_id) '
                 'JOIN '
                 'gbd.compare_version_output USING (compare_version_id) '
                 'WHERE '
                 'compare_version_id = (SELECT '
                 'compare_version_id '
                 'FROM '
                 'gbd.compare_version '
                 'WHERE '
                 'compare_version_status_id = 1 '
                 'AND gbd_round_id = 3) '
                 'AND gpvm.metadata_type_id = 5')
        model_vers = query_tools.query_2_df(
            query, engine=enginer.engines["gbd_prod"]).loc[0, 'daly_id']
    return model_vers
Exemplo n.º 21
0
 def __init__(self, cluster_dir, year_id, input_me, output_me,
              infertile_me):
     Base.__init__(self, cluster_dir, year_id, input_me, output_me)
     self.infertile_me = infertile_me
     k_cols, i_cols, d_cols = self.keep_cols()
     # get incidence draws
     draws = self.get_draws()
     # create new incidence
     asfr = self.get_asfr()
     new_inc = self.get_new_incidence(draws, asfr)
     self.output(new_inc, output_me, 6)
     # create new prevalence
     duration = self.create_draws(0.01918, 0.0137, 0.0274)
     new_prev = self.mul_draws(new_inc, duration)
     self.output(new_prev, output_me, 5)
     # create incidence of infertility & output as input data for that model
     infertility_sev = self.create_draws(0.09, 0.077, 0.104)
     infert_inc = self.mul_draws(new_inc, infertility_sev)
     # get mean/upper/lower
     infert_inc.set_index(i_cols, inplace=True)
     infert_inc = infert_inc.transpose().describe(
         percentiles=[.025, .975]).transpose()[['mean', '2.5%', '97.5%']]
     infert_inc.rename(columns={
         '2.5%': 'lower',
         '97.5%': 'upper'
     },
                       inplace=True)
     infert_inc.index.rename(i_cols, inplace=True)
     infert_inc.reset_index(inplace=True)
     # get year_start, year_end
     infert_inc['year_start'] = infert_inc['year_id']
     infert_inc['year_end'] = infert_inc['year_id']
     # get age_start and age_end
     query = "SELECT age_group_id, age_group_name FROM shared.age_group"
     age_df = query_tools.query_2_df(
         query, engine=self.enginer.engines['cod_prod'])
     age_df = age_df.query("age_group_id < 22")
     age_df = age_df[age_df.age_group_name.str.contains('to')]
     age_df['age_start'], age_df['age_end'] = zip(
         *age_df['age_group_name'].apply(lambda x: x.split(' to ', 1)))
     age_df['age_start'] = age_df['age_start'].astype(int)
     age_df['age_end'] = age_df['age_end'].astype(int)
     infert_inc = infert_inc.merge(age_df, on='age_group_id', how='left')
     # get location_name
     query = ('SELECT location_id, location_ascii_name AS location_name '
              'FROM shared.location_hierarchy_history LEFT JOIN shared.'
              'location USING(location_id) WHERE '
              'location_set_version_id=(SELECT location_set_version_id '
              'FROM shared.location_set_version WHERE location_set_id = 9 '
              'and end_date IS NULL) AND most_detailed=1')
     loc_df = query_tools.query_2_df(
         query, engine=self.enginer.engines['cod_prod'])
     infert_inc = infert_inc.merge(loc_df, on='location_id', how='inner')
     # get sex
     infert_inc['sex'] = infert_inc['sex_id'].map({2: 'Female'})
     infert_inc.drop(
         ['year_id', 'sex_id', 'age_group_id', 'age_group_name'],
         axis=1,
         inplace=True)
     # add other necessary cols for the epi uploader
     infert_inc['modelable_entity_id'] = infertile_me
     query = ('SELECT modelable_entity_name FROM epi.modelable_entity '
              'WHERE modelable_entity_id = %s' % infertile_me)
     infert_inc['modelable_entity_name'] = (query_tools.query_2_df(
         query, engine=self.enginer.engines['epi_prod']).ix[
             0, 'modelable_entity_name'])
     infert_inc['nid'] = 254237
     empty_cols = [
         'row_num', 'parent_id', 'input_type', 'underlying_nid',
         'underlying_field_citation_value', 'field_citation_value',
         'file_path', 'page_num', 'table_num', 'ihme_loc_id',
         'smaller_site_unit', 'site_memo', 'age_demographer',
         'standard_error', 'effective_sample_size', 'cases', 'sample_size',
         'design_effect', 'measure_adjustment', 'recall_type_value',
         'sampling_type', 'response_rate', 'case_name', 'case_definition',
         'case_diagnostics', 'group', 'specificity', 'group_review',
         'note_modeler', 'note_SR', 'extractor', 'data_sheet_filepath'
     ]
     for col in empty_cols:
         infert_inc['%s' % col] = np.nan
     infert_inc['sex_issue'] = 0
     infert_inc['year_issue'] = 0
     infert_inc['age_issue'] = 0
     infert_inc['measure'] = "incidence"
     infert_inc['measure_issue'] = 0
     infert_inc['representative_name'] = "Unknown"
     infert_inc['urbanicity_type'] = "Unknown"
     infert_inc['unit_type'] = "Person"
     infert_inc['unit_value_as_published'] = 1
     infert_inc['is_outlier'] = 0
     infert_inc['recall_type'] = "Point"
     infert_inc['uncertainty_type'] = "Confidence interval"
     infert_inc['uncertainty_type_value'] = 95
     infert_inc['source_type'] = "Mixed or estimation"
     self.output_for_epiuploader(infert_inc, infertile_me)