Exemplo n.º 1
0
def test_insert_new_row(covariate_db, cov_conn_def):
    """Tests the addition of a single row to the covariate.model table."""
    initialize_and_fill_covariate_db(covariate_db, cov_conn_def)
    sesh = get_session(cov_conn_def)

    insert_q = """
        INSERT INTO covariate.model (model_version_id, year_id, location_id, 
            sex_id, age_group_id, mean_value, upper_value, lower_value)
        VALUES ('1', '2017', '1', '2', '22', '10.4', '11.2', '9.6'),
               ('1', '2016', '1', '2', '22', '10.5', '11.4', '9.4'),
               ('1', '2015', '1', '2', '22', '10.6', '11.6', '9.2'),
               ('1', '2014', '1', '2', '22', '10.7', '11.8', '9'),
               ('1', '2013', '1', '2', '22', '10.8', '12', '8.8');
    """
    exec_query(insert_q, session=sesh)
    sesh.commit()

    select_q = """
        SELECT * FROM covariate.model WHERE model_version_id = 1;
    """
    res = query_2_df(select_q, session=sesh)

    assert not res.empty
    # We added five entries associated with model_version_id 1
    assert len(res) == 5
    # We have to close the session or the next test will hang
    sesh.close()
Exemplo n.º 2
0
def exec_query(call, conn_def):
    """
    Executes a raw SQL statement with db_tools
    """
    session = ezfuncs.get_session(conn_def)
    call = clean_string(call)
    return query_tools.exec_query(call, session=session, close=True)
Exemplo n.º 3
0
def write_data(df, db, table, conn_def):
    """
    Writes data in bulk as a pd.DataFrame to a specified table/database.
    """
    session = ezfuncs.get_session(conn_def)
    loaders.Inserts(table=table, schema=db, insert_df=df).insert(session,
                                                                 commit=True)
Exemplo n.º 4
0
def test_insert_defaults(covariate_db, cov_conn_def):
    """Insert a new row into the covariate.model_version without a best_start,
    best_end, best_user, best_description column to check the default value."""
    covariate_db.add_conn_def(cov_conn_def)
    sesh = get_session(cov_conn_def)

    insert_q = """
        INSERT INTO covariate.model_version (model_version_id, covariate_id, 
            description, code_version, status, is_best, gbd_round_id)
        VALUES ('1', '1', 'testing defaults', 'version 1', '1', '0', '5');
    """
    exec_query(insert_q, session=sesh)
    sesh.commit()

    select_q = """
        SELECT * FROM covariate.model_version WHERE model_version_id = 1;
    """
    res = query_2_df(select_q, session=sesh)

    assert not res.empty
    assert not res.at[0, 'best_user']
    assert not res.at[0, 'best_description']
    assert not res.at[0, 'best_start']
    assert not res.at[0, 'best_end']
    # we need to close the session or the next test will hang
    sesh.close()
Exemplo n.º 5
0
 def generate_new_version(cls):
     session = ezfuncs.get_session(conn_def="como-epi")
     try:
         q = """
             INSERT INTO epi.output_version (
                 username,
                 description,
                 code_version,
                 status,
                 is_best
             )
             VALUES(
                 '{user}',
                 'Central COMO run',
                 '{code_version}',
                 0,
                 0)""".format(user=getpass.getuser(), code_version=4)
         session.execute(q)
         session.flush()
         como_version_id = session.execute("select LAST_INSERT_ID()"
                                           ).scalar()
         session.commit()
     except Exception:
         session.rollback()
         raise
     return como_version_id
Exemplo n.º 6
0
def run_parent_adjust(cause_id, children, mvt, vers, detail, sex):
    ##########################################
    #Add oldCorrect model and its children
    #into the cod.model_version_relation table
    #for codCorrect
    ##########################################
    session = get_session('cod')
    parent = pull_mvid(cause_id, mvt, sex=sex)
    assert parent, "No parent saved"
    parent = parent[0]
    if mvt == 9:
        desc = 'oldC v%s hybrid of %s (%s)' % (vers, children, detail)
    elif mvt == 10:
        desc = 'oldC v%s target (%s)' % (vers, detail)
    try:
        for child in children:
            query = """INSERT INTO cod.model_version_relation
                    (parent_id, child_id, model_version_relation_note)
                    VALUES ({parent}, {child}, "{desc}")
                    """.format(parent=parent, child=child, desc=desc)
            exec_query(query, session=session)
        session.commit()
    except:
        session.rollback()
        session.close()
        raise
Exemplo n.º 7
0
    def upload(self):
        """
        Attempts to infile any .csv files found in the
        /{output_dir}/{version_id} directory to
        gbd.output_le_decomp_v{self.process_version_id}. Any
        .csv files not to be uploaded should be stashed in subfolders.
        """
        if not self._process_version_id:
            raise RuntimeError("A process version must be created "
                               "before results can be uploaded.")
        table_name = list(
            GBDProcessVersion(self._process_version_id,
                              env=self.env).tables)[0]
        session = get_session(conn_def=self.conn_def)

        infiler = Infiles(table=table_name,
                          schema=self._schema,
                          session=session)

        print(f"Starting upload at {datetime.now().time()}")
        infiler.indir(path=self.write_dir,
                      partial_commit=True,
                      sort_files=True)
        session.close()
        print(f"Finished upload at {datetime.now().time()}")
Exemplo n.º 8
0
def create_new_output_version_row(output_version_id,
                                  description,
                                  envelope_version_id,
                                  conn_def,
                                  status=2):
    logging.info("database.create_new_output_version_row")
    if not is_existing_output_version(output_version_id, conn_def):
        # Create a row to input
        now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        ins_df = pd.DataFrame(
            {
                'output_version_id': output_version_id,
                'description': description,
                'code_version': 5,
                'env_version': envelope_version_id,
                'status': status,
                'is_best': 0,
                'date_inserted': now,
                'inserted_by': getpass.getuser(),
                'last_updated': now,
                'last_updated_by': getpass.getuser(),
                'last_updated_action': "INSERT"
            },
            columns=[
                'output_version_id', 'description', 'code_version',
                'env_version', 'status', 'is_best', 'date_inserted',
                'inserted_by', 'last_updated', 'last_updated_by',
                'last_updated_action'
            ],
            index=[0])
        # Insert row
        ins = Inserts(table='output_version', schema='cod', insert_df=ins_df)
        sesh = get_session(conn_def)
        ins.insert(sesh, commit=True)
Exemplo n.º 9
0
def upload_to_db(table, db):
    """Upload a csv to the cod database."""
    #determine schema (uploading to cod or cancer)
    upload_path = _get_upload_path(table)
    sesh = get_session(db)
    inf = Infiles(table=table, schema='cod', session=sesh)
    inf.infile(path=upload_path, with_replace=False, commit=True)
def upload_to_table(file_list, table_name, env, raise_on_failure):
    if env == DBEnv.PROD:
        conn_def = _ConnDefs.PROD.value
    elif env == DBEnv.DEV:
        conn_def = _ConnDefs.DEV.value
    sesh = get_session(conn_def=conn_def, connectable=True)
    infiler = Infiles(table=table_name, schema="gbd", session=sesh)

    logger.info("Beginning infile, time = {}".format(time.time()))
    for f in file_list:
        try:
            logger.info("infiling {}, time = {}".format(f, time.time()))
            # summaries are saved with col mean, need to upload to table as
            # val
            infiler.infile(f,
                           with_replace=False,
                           commit=True,
                           rename_cols={'mean': 'val'})
        except Exception:
            if raise_on_failure:
                raise
            tb = traceback.format_exc()
            logger.error("failed to infile {} with exception {};"
                         "Skipping to next file".format(f, tb))
    logger.info("Done infile, time = {}".format(time.time()))
Exemplo n.º 11
0
def mark_gbd_best(output_version_id, process_version_id, gbd_conn_def):
    gbd_sesh = get_session(gbd_conn_def)
    update_gbd_cmd = """UPDATE
                        gbd.gbd_process_version
                        SET gbd_process_version_status_id = 1
                        WHERE gbd_process_version_id = {v}
                        """.format(v=process_version_id)
    exec_query(update_gbd_cmd, session=gbd_sesh, close=True)
Exemplo n.º 12
0
def activate_new_process_version(process_version_id, conn_def):
    sesh = get_session(conn_def)
    q = """
        UPDATE gbd_process_version
        SET gbd_process_version_status_id = 2
        WHERE gbd_process_version_id = {process_version_id}
    """.format(process_version_id=process_version_id)
    exec_query(q, session=sesh, close=True)
Exemplo n.º 13
0
def add_process_version_to_compare_version_output(process_version_id,
                                                  compare_version_id,
                                                  conn_def):
    sesh = get_session(conn_def)
    add_process_vers_query = """
        CALL gbd.add_process_version_to_compare_version_output ({}, {})
    """.format(compare_version_id, process_version_id)
    exec_query(add_process_vers_query, session=sesh, close=True)
Exemplo n.º 14
0
def get_parallelization_location_group(location_set_id, nparallel, location_group, gbd_round_id, decomp_step, national_level = NATIONAL_LEVEL ):
    session = ezfuncs.get_session(conn_def='epi')
    locs = query.get_locations(location_set_id, gbd_round_id, decomp_step, session)[0]
    locs = locs.sort_values(by = ['level_{}'.format(national_level)])
    parallel_groups = np.array_split(locs.loc[locs.level >= national_level][SPACEVAR].values, nparallel)
    prediction_location_ids = parallel_groups[location_group].tolist()

    return prediction_location_ids
Exemplo n.º 15
0
def upload_cod_summaries(directories, conn_def):
    sesh = get_session(conn_def)
    exec_query("set unique_checks= 0", sesh)
    inf = Infiles(table='output', schema='cod', session=sesh)
    for directory in directories:
        inf.indir(path=directory,
                  with_replace=True,
                  partial_commit=True,
                  commit=True)
Exemplo n.º 16
0
def wipe_diagnostics():
    # Create a connection
    sesh = get_session("codcorrect")
    unmark_best = ('UPDATE codcorrect.diagnostic_version SET is_best = 0 '
                   'WHERE is_best = 2;')
    exec_query(unmark_best, session=sesh, close=True)
    unmark_best = ('UPDATE codcorrect.diagnostic_version SET is_best = 2 '
                   'WHERE is_best = 1;')
    exec_query(unmark_best, session=sesh, close=True)
Exemplo n.º 17
0
def create_output_partition(codcorrect_version_id) -> None:
    exec_query(CoDCorrectVersion.CREATE_OUTPUT_PARTITION,
               session=get_session(ConnectionDefinitions.COD),
               close=True,
               parameters={
                   'schema': COD.DataBase.SCHEMA,
                   'table': COD.DataBase.TABLE,
                   Columns.OUTPUT_VERSION_ID: codcorrect_version_id
               })
Exemplo n.º 18
0
 def add_kit_vers(self):
     sesh = get_session(self.conn_def)
     query = ('INSERT INTO gbd.kit_version '
              '(kit_id, kit_version_note, '
              'gbd_round_id, kit_version_status_id) '
              'VALUES '
              '(2, "MMR run for codcorrect version {}", {}, -1)'
              .format(self.codcorrect_vers, self.gbd_round_id))
     exec_query(query, session=sesh, close=True)
Exemplo n.º 19
0
def activate_new_compare_version(compare_version_id, conn_def):
    sesh = get_session(conn_def)
    q = """
        UPDATE compare_version
        SET compare_version_status_id = 2
        WHERE compare_version_id = {}
        AND compare_version_status_id = -1
    """.format(compare_version_id)
    exec_query(q, session=sesh, close=True)
Exemplo n.º 20
0
def update_status(output_version_id, status, conn_def):
    # Update status
    sesh = get_session(conn_def)
    update_cmd = """UPDATE
                        cod.output_version
                    SET
                        status = {s}
                    WHERE
                        output_version_id = {v}""".format(s=status,
                                                          v=output_version_id)
    exec_query(update_cmd, session=sesh, close=True)
Exemplo n.º 21
0
def session_scope(conn_def: str) -> Generator[orm.Session, None, None]:
    """Provides a transactional scope around a series of operations."""
    session = ezfuncs.get_session(conn_def)
    try:
        yield session
        session.commit()
    except Exception:
        session.rollback()
        raise
    finally:
        session.close()
Exemplo n.º 22
0
def new_diagnostic_version(output_version_id):
    # Create values to insert
    ins_df = pd.DataFrame([{
        'output_version_id': output_version_id,
        'is_best': 1
    }])
    # Insert row
    ins = Inserts(table='diagnostic_version',
                  schema='codcorrect',
                  insert_df=ins_df)
    sesh = get_session('codcorrect')
    ins.insert(sesh, commit=True)
Exemplo n.º 23
0
def unmark_cod_best(output_version_id, cod_conn_def):
    now = datetime.datetime.now()
    cod_sesh = get_session(cod_conn_def)
    update_cod_cmd = """UPDATE cod.output_version SET
                        is_best = 0,
                        best_end = '{now}'
                    WHERE
                        output_version_id != {v}
                        AND best_start IS NOT NULL
                        AND best_end IS NULL""".format(now=now,
                                                       v=output_version_id)
    exec_query(update_cod_cmd, session=cod_sesh, close=True)
Exemplo n.º 24
0
def mark_cod_best(output_version_id, cod_conn_def):
    now = datetime.datetime.now()
    cod_sesh = get_session(cod_conn_def)
    update_cod_cmd = """UPDATE
                        cod.output_version
                    SET
                        is_best = 1,
                        best_start = '{now}'
                    WHERE
                        output_version_id = {v}""".format(now=now,
                                                          v=output_version_id)
    exec_query(update_cod_cmd, session=cod_sesh, close=True)
Exemplo n.º 25
0
    def upload_emr(self):
        session = ezfuncs.get_session(envs.Environment.get_odbc_key())

        path = ("FILEPATH").format(
            cascade_root=envs.Environment.get_cascade_root(),
            mv=self.model_version_id)
        infiler = loaders.ToInfile(path)
        infiler.stage_2_stagedir(self.emr)
        infiler.upload_stagedir(table="t3_model_version_emr",
                                schema="epi",
                                session=session,
                                commit=True)
Exemplo n.º 26
0
 def add_metadata_to_process_version(self, process_version_id):
     sesh = get_session(self.conn_def)
     query = ('INSERT INTO gbd.gbd_process_version_metadata ('
              'gbd_process_version_id, '
              'metadata_type_id, '
              'val) '
              'VALUES '
              '({pv_id}, '
              '1, '
              '"{ov_id}")').format(pv_id=process_version_id,
                                   ov_id=self.codcorrect_vers)
     exec_query(query, session=sesh, close=True)
Exemplo n.º 27
0
    def upload_emr(self):
        session = ezfuncs.get_session(envs.Environment.get_odbc_key())

        path = (
            "{cascade_root}/{mv}/full/locations/1/outputs/both/2000").format(
                cascade_root=envs.Environment.get_cascade_root(),
                mv=self.model_version_id)
        infiler = loaders.ToInfile(path)
        infiler.stage_2_stagedir(self.emr)
        infiler.upload_stagedir(table="t3_model_version_emr",
                                schema="epi",
                                session=session,
                                commit=True)
Exemplo n.º 28
0
 def upload(self, sm, process_vers, in_dir, with_replace=True):
     '''Args: sm: 'single' or 'multi', referring to which table to use
              process_vers: comes from the output of above
              in_dir: filepath of csvs to be uploaded
              conn_def: 'gbd' or 'gbd_test'
     '''
     table = 'output_mmr_%s_year_v%s' % (sm, process_vers)
     sesh = get_session(self.conn_def)
     inf = Infiles(table=table, schema='gbd', session=sesh)
     inf.indir(path=in_dir, with_replace=with_replace,
               partial_commit=True, commit=True)
     activate_process_version(process_version_id=process_vers, session=sesh)
     print("Uploaded! You win at life.")
Exemplo n.º 29
0
def find_parallelization_group_for_loc(loc, gbd_round_id, decomp_step, location_set_id = 22, nparallel = 50, national_level = NATIONAL_LEVEL):
    session = ezfuncs.get_session(conn_def='epi')
    locs = query.get_locations(location_set_id, gbd_round_id, decomp_step, session)[0]
    locs = locs.sort_values(by = ['level_{}'.format(national_level)])
    parallel_groups = np.array_split(locs.loc[locs.level >= national_level][SPACEVAR].values, nparallel)

    for i in range(0, len(parallel_groups)):
        if loc in list(parallel_groups[i]):
            location_group = print(i)
        else:
            'You sure that is a location_id? No values here.'

    return location_group
Exemplo n.º 30
0
 def load_component_location(self, component, location_id, year_type):
     table_tmp = getattr(self, year_type)[component]
     table = table_tmp.format(self.como_version.gbd_process_version_id)
     sesh = get_session("gbd")
     infiler = Infiles(table, "gbd", sesh)
     if year_type == "single_year":
         indir_glob = f"{self.como_version.como_dir}/summaries/{component}/*/{year_type}/{location_id}/*.csv"
     else:
         indir_glob = f"{self.como_version.como_dir}/summaries/{component}/*/{year_type}/{location_id}.csv"
     infiler.indir(path=indir_glob,
                   commit=True,
                   partial_commit=True,
                   rename_cols={"mean": "val"},
                   no_raise=(IntegrityError))