Exemplo n.º 1
0
def insert_new_sites(df, db): 
    ''' Function will update cod.site by adding entries, and will only do so 
        if the entry doesn't already exist. 
    '''

    # check for overlaps before uploading 
    name_df = df[['site_name']].drop_duplicates().dropna()
    unique_names = name_df['site_name'].unique()
    names_clause = "','".join(unique_names)
    names_query = """
        SELECT *
        FROM cod.site
        WHERE site_name IN ('{names_clause}')
    """.format(names_clause=names_clause)
    overlap = ez.query(names_query, conn_def=db)
    if len(overlap) > 0:
        raise AssertionError(
            "Conflicting site_name's already present: \n{overlap}".format(
            overlap=overlap)
        )
    engine = ez.get_engine(db)
    conn = engine.connect()
    df.to_sql('site', conn, if_exists='append', index=False)
    conn.close()
    print("Uploaded new {name_col}s: \n{name_df}".format(
        name_col='site', name_df=df))
Exemplo n.º 2
0
 def unmark_current_best(self):
     now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     q = """
         UPDATE epi.output_version
         SET best_end='{be}', is_best=0
         WHERE is_best=1""".format(be=now)
     eng = ezfuncs.get_engine(conn_def="como-epi")
     res = eng.execute(q)
     return res
Exemplo n.º 3
0
def insert_extract_type(extract_type, conn_def='ADDRESS'):
    """Insert a new extract type."""
    assert pull_extract_type_id(extract_type, conn_def=conn_def) is None, \
        "Already exists"
    engine = ezfuncs.get_engine(conn_def)
    conn = engine.connect()
    conn.execute("""
        INSERT INTO cod.claude_extract_type
            (extract_type)
            VALUES
            ("{}")
    """.format(extract_type))
    conn.close()
Exemplo n.º 4
0
def activate_sequela_set_version(sequela_set_version_id,
                                 gbd_round_id=GBD_ROUND_ID,
                                 validate=True,
                                 conn_def=None):

    if conn_def is not None:
        config.engine = get_engine(conn_def=conn_def)

    with session_scope() as session:
        activate = ActivateSequelaVersion(session, sequela_set_version_id,
                                          gbd_round_id)
        if validate:
            activate.validate_version()
        activate.activate_version()
Exemplo n.º 5
0
def insert_source_id(source, conn_def='ADDRESS'):
    """Insert a new source_id."""
    if pull_source_id(source, conn_def=conn_def) is None:
        engine = ezfuncs.get_engine(conn_def)
        conn = engine.connect()
        print('\nInserting new source to cod.source table')
        conn.execute("""
            INSERT INTO cod.source
                (source_name)
                VALUES
                ("{}")
        """.format(source))
        conn.close()
    else:
        print("Source already exists")
Exemplo n.º 6
0
def get_engine(conn_def, env):
    '''Retrieve a SQLAlchemy engine.
    '''
    conn_def = conn_def.lower().strip()
    if conn_def not in ['epi', 'cod']:
        raise ValueError("Expected epi or cod, got {}".format(conn_def))

    env = env.lower().strip()
    if env not in ['prod', 'dev']:
        raise ValueError("Expected prod or dev, got {}".format(env))

    # always read prod cod data
    if conn_def == 'cod':
        true_conn_def = 'cod'
    else:
        true_conn_def = "cascade-{}".format(env)
    eng = ezfuncs.get_engine(conn_def=true_conn_def)
    return eng
Exemplo n.º 7
0
 def create_gbd_process_version(self, gbd_round_id):
     q = """
     CALL gbd.new_gbd_process_version (
         {}, 1, 'Como run', 'fix epi.ov table to accept hash', NULL, NULL)
     """.format(gbd_round_id)
     eng = ezfuncs.get_engine(conn_def="como-gbd")
     res = eng.execute(q)
     row = res.fetchone()
     pv_meta = row[0]
     self.gbd_process_version_id = int(json.loads(
         pv_meta)[0]["gbd_process_version_id"])
     q = """
         INSERT INTO gbd.gbd_process_version_metadata
             (`gbd_process_version_id`, `metadata_type_id`, `val`)
         VALUES
             ({gpvid}, 4, '{cv}')
     """.format(gpvid=self.gbd_process_version_id, cv=self.como_version_id)
     eng.execute(q)
     return pv_meta
Exemplo n.º 8
0
 def mark_best(self, description=""):
     self.unmark_current_best()
     now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     q = """
         UPDATE epi.output_version
         SET
             best_start='{best_start}',
             best_end=NULL,
             is_best=1,
             best_description='{best_description}',
             best_user='******'
         WHERE output_version_id={ovid}
     """.format(
         best_start=now,
         best_description=description,
         bu=getpass.getuser(),
         ovid=self.como_version_id)
     eng = ezfuncs.get_engine(conn_def="como-epi")
     res = eng.execute(q)
     return res
Exemplo n.º 9
0
def compute_global_ratios(year_id, drawcols):
    eng = ezfuncs.get_engine(conn_def="cod")
    ccv = pd.read_sql("""
        SELECT output_version_id FROM cod.output_version
        WHERE code_version=5 AND is_best=1""", eng).squeeze()
    sg = SuperGopher({
        'file_pattern': '{measure_id}_{location_id}.h5',
        'h5_tablename': 'draws'},
        'FILEPATH/{ccv}/draws'.format(ccv=ccv))
    ylls = sg.content(location_id=1, year_id=year_id, sex_id=[1, 2],
                      measure_id=4)

    ratios = []
    for resid_cid, yldmap in rkey.groupby('input_cause_id'):
        # get the ylls
        these_ylls = ylls[ylls.cause_id == resid_cid]
        ratio_ylls = ylls[ylls.cause_id.isin(yldmap.ratio_cause_id.unique())]

        # aggregate the inputs to the appropriate level
        group_cols = ['age_group_id', 'year_id']
        these_ylls = these_ylls.groupby(group_cols)
        these_ylls = these_ylls[drawcols].sum().mean(axis=1)
        ratio_ylls = ratio_ylls.groupby(group_cols)
        ratio_ylls = ratio_ylls[drawcols].sum().mean(axis=1)

        # compute the ratio
        ratio = these_ylls / ratio_ylls
        ratio = ratio.reset_index()
        ratio = ratio.replace(np.inf, 0)
        ratio = ratio.replace(np.NaN, 0)

        ratio["cause_id"] = resid_cid
        ratios.append(ratio)

    df = pd.concat(ratios)
    df_male = df.copy()
    df_male["sex_id"] = 1
    df_female = df.copy()
    df_female["sex_id"] = 2

    return df_male.append(df_female)
Exemplo n.º 10
0
def upload_sequela_year_summaries(como_dir, process_id, location_id,
                                  measure_id):
    eng = ezfuncs.get_engine(conn_def="como-gbd")
    for tn in ['single_year', 'multi_year']:
        try:
            if tn == 'single_year':
                cols = ",".join([
                    'location_id', 'year_id', 'age_group_id', 'sex_id',
                    'measure_id', 'metric_id', 'sequela_id', 'val', 'lower',
                    'upper'
                ])
            elif tn == 'multi_year':
                cols = ",".join([
                    'location_id', 'year_start_id', 'year_end_id',
                    'age_group_id', 'sex_id', 'measure_id', 'sequela_id',
                    'metric_id', 'val', 'lower', 'upper'
                ])

            summdir = os.path.join(como_dir, 'summaries', "sequela")
            summary_file = os.path.join(
                summdir, "%s_%s_%s.csv" % (measure_id, location_id, tn))
            ldstr = """
                LOAD DATA INFILE '{sf}'
                INTO TABLE gbd.output_sequela_{tn}_v{pid}
                FIELDS
                    TERMINATED BY ","
                    OPTIONALLY ENCLOSED BY '"'
                LINES
                    TERMINATED BY "\\n"
                IGNORE 1 LINES
                    ({cols})""".format(sf=summary_file,
                                       pid=process_id,
                                       tn=tn,
                                       cols=cols)
            res = eng.execute(ldstr)
            print 'Uploaded %s %s %s %s' % (location_id, measure_id, tn)
        except Exception as e:
            print e
            res = None
    return res
Exemplo n.º 11
0
def clear_prev_data_version_status(database, table):
    """Update the data_version table.
    """
    date = make_db_datestamp()  
    can_nid = utils.get_gbd_parameter('generic_cancer_nid')
    update_query = """
        UPDATE cod.{tbl}
        SET status_end = "{dt}", status='0'
        WHERE nid={nid}
        AND status=1
    """
    #conn_string = cdb.create_connection_string('testcod')
    engine = get_engine(conn_def=table)
    conn = engine.connect()

    res = conn.execute(update_query.format(
        tbl=table,
        dt=date,
        nid=can_nid 
    ))

    conn.close()
Exemplo n.º 12
0
def insert_names(name_table, name_df, df_name_col=None, conn_def='ADDRESS'):
    """Insert the name in the df to cod.name_table table."""
    # make sure the column name in the df matches that in the db
    name_tables_to_col_name = {'site': 'site_name', 'source': 'source_name'}
    assert name_table in name_tables_to_col_name.keys(), \
        "Invalid name table: {}".format(name_table)
    name_col = name_tables_to_col_name[name_table]

    assert set(name_df.columns) == set([name_col]), \
        "Pass a df with one column: '{}'. You gave a df with these " \
        "columns: {}".format(name_col, name_df.columns)

    # verify that sources are ok to upload
    if name_table == "source":
        assert_is_valid_source(name_df[name_col].unique())

    # restrict data to just that
    name_df = name_df[[name_col]].drop_duplicates().dropna()
    unique_names = name_df[name_col].unique()
    names_clause = "','".join(unique_names)
    names_query = """
        SELECT *
        FROM cod.{name_table}
        WHERE {name_col} IN ('{names_clause}')
    """.format(name_table=name_table,
               name_col=name_col,
               names_clause=names_clause)
    overlap = ezfuncs.query(names_query, conn_def=conn_def)
    if len(overlap) > 0:
        raise AssertionError(
            "Conflicting {name_col}s already present: \n{overlap}".format(
                name_col=name_col, overlap=overlap))
    engine = ezfuncs.get_engine(conn_def)
    conn = engine.connect()
    name_df.to_sql(name_table, conn, if_exists='append', index=False)
    conn.close()
    print("Uploaded new {name_col}s: \n{name_df}".format(name_col=name_col,
                                                         name_df=name_df))
Exemplo n.º 13
0
    def upload_to_db(self, user_input=True):
        """uploads the package_id to the database

        Assumes input_data global contains
        meta data and weights for the given package id. Saves farthest_step,
        list of dataframes containing uploaded tables.

        this method ensures the correct order of operations for the upload.

        KEY ASSUMPTIONS
            - You have one cause group per target
            - You do not have any OR clauses in your weight group logic

        [OPEN CONNECTION]
        [BEGIN TRANSACTION]
        1. add a new version using the given package id and description
        2. add new target cause groups to the database based on the targets
           in the input data (WILL ASSUME ONE CAUSE GROUP PER TARGET)
            a. add the actual targets using a 1:1 mapping to the new cause
               group ids created in the database (essentially, each target gets
               a random cause group id)
        3. add new weight groups to the database based on input data,
           associated with the version id in (1) in the database
            a. add new weight group logic sets that bridge between weight
               groups and weight group logic using version id from (1) to find
               weight group ids created in (3) (WILL ASSUME THERE ARE NO
               'OR' LOGIC CLAUSES IN THE WEIGHT GROUP)
            b. add new weight group logic using version id
               from (1) to find weight group logic sets created in (3a)
        4. map the input data to newly created weight group ids and cause
           group ids using the cause group names and the
           weight group names in the input data, and using the version id
           created in (1) to find both. Then upload to weights
           table. Do all of this with version id from (1)
        5. mark version metadata so that new version is best
        [COMMIT CHANGES]
        [CLOSE CONNECTION]

        """
        name_function_order = [
            ('1', 'Versions table', self.prep_version,
             'rdp_sharedpackageversion'),
            ('2', 'Cause groups table', self.prep_cause_groups,
             'rdp_sharedcausegroup'),
            ('2a', 'Targets table', self.prep_targets, 'rdp_sharedtarget'),
            ('3', 'Weight groups table', self.prep_weight_groups,
             'rdp_sharedwgtgroup'),
            ('3a', 'Weight logic set table', self.prep_weight_group_logic_set,
             'rdp_sharedwgtgrouplogicset'),
            ('3b', 'Weight logic table', self.prep_weight_group_logic,
             'rdp_sharedwgtgrouplogic'),
            ('4', 'Weights table', self.prep_weights, 'rdp_sharedwgt')
        ]
        # rows_expected = printExpectedRowAdditions(do_print=user_input)
        self.farthest_step = 'Nowhere'
        self.tables_uploaded = {}
        # start a transaction - if anything fails from this point out,
        # rollback all changes
        engine = get_engine(self.conn_def)
        conn = engine.connect()
        trans = conn.begin()
        try:
            for step_number, step_name, prep_function, table_name in \
                    name_function_order:

                print(
                    "[{t}] ({no}): {nm}".format(
                        t=str(datetime.now()), no=step_number, nm=step_name
                    )
                )
                # prep the data to upload
                df = prep_function()
                # upload it to the given table name
                RegressionUploader.upload_table_to_db(
                    df, table_name, conn
                )
                if step_number == '1':
                    self.new_version_id = self.get_new_pvid()
                    print("VERSION ID: {}".format(self.new_version_id))
                # add to appended tables
                self.tables_uploaded[step_number] = df
                self.farthest_step = step_name

            print 'uploading...'
            continue_upload = 'unknown'
            if user_input:
                while continue_upload != 'Y' and continue_upload != 'N':
                    continue_upload = raw_input(
                        'Should the new version be accepted?'
                        'Check that everything looks right above [Y/N]'
                    )
                    if continue_upload == 'Y':
                        print(
                            'Ok. Setting the old version'
                            'to old and the new version {n} to'
                            'best status'.format(n=self.new_version_id)
                        )
                        self.switch_best_flag(conn)
                        self.farthest_step = 'Flag switch'
                    elif continue_upload == 'N':
                        print(
                            'Got it. Check that out and in the '
                            'meantime I\'ll rollback everything '
                            'that was just uploaded.'
                        )
                        self.rollback_everything(engine)
                    else:
                        print(
                            'I\'m dumb and didnt understand your '
                            'input of \'{u}\'. Press either \'Y\' '
                            'or \'N\'. I\'ll keep asking \\    '
                            '                until either the end of '
                            'time or you give me a good '
                            'answer.'.format(u=continue_upload)
                        )
            else:
                # just check that
                # if rows uploaded equals rows expected
                if False:
                    self.switch_best_flag(conn)
                else:
                    # print(rows_expected)
                    # print(rows_uploaded)
                    print(
                        'rows uploaded didnt equal rows expected. '
                        'rolling back.'
                    )
                    self.rollback_everything(engine)
                    raise
            trans.commit()
            conn.close()
        except Exception, e:
            trans.rollback()
            conn.close()
            print(
                "ROLLING BACK: Got an {et}: {m}".format(et=type(e), m=str(e))
            )
            self.rollback_everything(engine)
            raise(e)
Exemplo n.º 14
0
 def engine(self):
     if not self._engine:
         self._engine = get_engine(conn_def=self.conn_def,
                                   connectable=False)
         self._engine.pool_recycle = 40.0
     return self._engine