示例#1
0
def main():
    # Parse command-line arguments and load experiment configuration
    cmd_args = parse_arguments()
    config = common.load_config(cmd_args.ini_file_initial)
    # TRAPID and ref. db data (lists containing all needed parameters for `common.db_connect()`)
    trapid_db_data = common.get_db_connection_data(config, 'trapid_db')
    ref_db_data = common.get_db_connection_data(config, 'reference_db')
    exp_id = config['experiment']['exp_id']

    try:
        # Clean TRAPID db (in case of previous results)
        db_conn = common.db_connect(*trapid_db_data)
        cleanup_db(db_conn, exp_id)
        db_conn.close()

        # Parse eggNOG-mapper's output file
        emapper_output = os.path.join(config['experiment']['tmp_exp_dir'], "emapper_%s.emapper.annotations" % exp_id)
        if not os.path.exists(emapper_output):
            sys.stderr.write("Error: emapper output file (%s) not found!\n" % emapper_output)
        emapper_results = parse_emapper_output(emapper_output)

        # Perform GF assignment
        db_conn = common.db_connect(*trapid_db_data)
        perform_gf_assignment(emapper_results, db_conn, exp_id)
        common.update_experiment_log(exp_id, "infer_genefamilies", "", 3, db_conn)
        db_conn.close()

        # Get GO data
        db_conn = common.db_connect(*ref_db_data)
        go_data = get_go_data(db_conn)
        db_conn.close()
        # Perform GO annotation
        # Read RFAM GO data
        rfam_go_file = os.path.join(config['experiment']['tmp_exp_dir'], "rfam_go_data.tsv")
        rfam_transcript_gos = read_rfam_go_data(rfam_go_file)
        db_conn = common.db_connect(*trapid_db_data)
        perform_go_annotation(emapper_results, rfam_transcript_gos, db_conn, go_data, exp_id)
        db_conn.close()

        # Perform KO annotation
        db_conn = common.db_connect(*trapid_db_data)
        perform_ko_annotation(emapper_results, db_conn, exp_id)
        common.update_experiment_log(exp_id, "infer_functional_annotation", "", 3, db_conn)
        db_conn.close()
    # If any exception was raised, update the experiment's log, set status to 'error', and exit
    except Exception:
        print_exc()
        common.stop_initial_processing_error(exp_id, trapid_db_data)
示例#2
0
def flag_rna_genes(exp_id, infernal_results, trapid_db_data):
    """Flag a set of transcripts as RNA genes in TRAPID's database.

    :param exp_id: TRAPID experiment id
    :param infernal_results: parsed (filtered) infernal results
    :param trapid_db_data: TRAPID db connection data

    """
    # Before updating `transcripts` with the current Infernal results, clear previous content
    clear_transcripts_table(exp_id, trapid_db_data)
    sys.stderr.write('[Message] Flag RNA genes in `transcripts` table. \n')
    query_str = "UPDATE `transcripts` SET `is_rna_gene`=1, `rf_ids`='{rf_ids}' WHERE `experiment_id`='{exp_id}' and transcript_id='{transcript_id}';"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    # Get Rfam families associated to each transcript
    transcript_families = {}
    for rec in infernal_results:
        exp_cm_acc = "%s_%s" % (exp_id, rec["cm_acc"])
        if rec["query"] not in transcript_families:
            transcript_families[rec["query"]] = set([exp_cm_acc])
        else:
            transcript_families[rec["query"]].add(exp_cm_acc)
    for transcript_id in sorted(transcript_families):
        rf_ids = ",".join(sorted(list(transcript_families[transcript_id])))
        formatted_query = query_str.format(exp_id=exp_id,
                                           rf_ids=rf_ids,
                                           transcript_id=transcript_id)
        cursor.execute(formatted_query)
    db_conn.commit()
    db_conn.close()
示例#3
0
def main(args):
    # database init
    path = "sqlite:///{}".format(args.database_path)
    conn = db_connect(path, args.debug)

    market_data = get_market_data()
    populate_db(conn, market_data)
示例#4
0
def store_tree_db(tree_file,
                  tree_program,
                  exp_id,
                  gf_id,
                  trapid_db_data,
                  verbose=False):
    """Read a gene family's newick tree file and upload its content to TRAPID database.

    :param tree_file: path to newick tree file to upload
    :param tree_program: program used to generate the tree (also stored in the database)
    :param exp_id: TRAPID experiment id
    :param gf_id: gene family id
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)

    """
    query_str = "UPDATE `gene_families` SET `tree`='{tree_data}', `tree_params`='{tree_params}'  WHERE `experiment_id`='{exp_id}' AND `gf_id`='{gf_id}';"
    if verbose:
        sys.stderr.write(
            "[Message] Store newick tree data ('%s') for GF '%s' of experiment '%d' to TRAPID database.\n"
            % (tree_file, gf_id, exp_id))
    with open(tree_file, 'r') as in_file:
        tree_data = in_file.read().replace('\n', '')
        db_conn = common.db_connect(*trapid_db_data)
        cursor = db_conn.cursor()
        cursor.execute(
            query_str.format(tree_data=tree_data,
                             tree_params=tree_program,
                             exp_id=exp_id,
                             gf_id=gf_id))
        db_conn.commit()
        db_conn.close()
示例#5
0
def get_transcript_sqces(gf_data,
                         transl_tables,
                         trapid_db_data,
                         verbose=False):
    """Retrieve gene family transcript sequences for MSA/tree creation, translate them & return them as a dictionary.

    :param gf_data: all gene family data retrieved from the TRAPID database (output of `get_gf_data()`)
    :param transl_tables: translation tables dictionary
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)
    :return: translated transcript sequences as a sequence_id:sequence dictionary

    """
    trs_sqces = {}
    query_str = "SELECT `transcript_id`, UNCOMPRESS(`orf_sequence`) as `orf_sequence`, `transl_table` FROM `transcripts` WHERE `experiment_id`='{exp_id}' AND `gf_id`='{gf_id}';"
    excluded_trs = set([])
    if gf_data['exclude_transcripts']:
        excluded_trs = set(gf_data['exclude_transcripts'].split(','))
    if verbose:
        sys.stderr.write(
            "[Message] These transcripts will be excluded: %s.\n" %
            ", ".join(excluded_trs))
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor(MS.cursors.DictCursor)
    cursor.execute(
        query_str.format(exp_id=gf_data['experiment_id'],
                         gf_id=gf_data['gf_id']))
    for record in common.ResultIter(cursor):
        if record['transcript_id'] not in excluded_trs:
            trs_sqces[record['transcript_id']] = translate_dna_to_aa(
                dna_string=record['orf_sequence'],
                transl_tables=transl_tables,
                transl_table_idx=record['transl_table'])
    return trs_sqces
示例#6
0
def get_gf_data(exp_id, gf_id, trapid_db_data, verbose=False):
    """Retrieve all gene family data for a TRAPID experiment's gene family.


    :param exp_id: TRAPID experiment id
    :param gf_id: gene family id, for which data is retrieved
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)
    :return: gene family data as a field:value dictionary

    """
    if verbose:
        sys.stderr.write(
            "[Message] Retrieve data for GF '%s' (experiment '%d') from TRAPID DB. \n"
            % (gf_id, exp_id))
    query_str = "SELECT * FROM `gene_families` WHERE `experiment_id`='{exp_id}' AND `gf_id`='{gf_id}';"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor(MS.cursors.DictCursor)
    cursor.execute(query_str.format(exp_id=exp_id, gf_id=gf_id))
    gf_data = cursor.fetchone()
    db_conn.close()
    if not gf_data:
        sys.stderr.write(
            "[Error] No GF data could be retrieved (GF '%s', experiment '%d')!\n"
            % (gf_id, exp_id))
        sys.exit(1)
    return gf_data
示例#7
0
def get_ref_db_sqce_type(trapid_db_data, ref_db_name, verbose=False):
    """Get sequence type (`DNA` or `AA`) of a reference database.

    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param ref_db_name: reference database for which to retrieve sequence type
    :param verbose: whether to be verbose (print extra information to stderr if set to True)
    :return: reference database sequence type

    """
    if verbose:
        sys.stderr.write(
            "[Message] Retrieve sequence type for reference database '%s' from TRAPID DB. \n"
            % ref_db_name)
    query_str = "SELECT `seq_type` FROM `data_sources` WHERE `db_name`='{ref_db_name}';"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    cursor.execute(query_str.format(ref_db_name=ref_db_name))
    sqce_type = cursor.fetchone()[0]
    db_conn.close()
    if not sqce_type:
        sys.stderr.write(
            "[Error] Impossible to retrieve sequence type for reference database '%s'!\n"
            % (ref_db_name))
        sys.exit(1)
    if sqce_type not in ['DNA', 'AA']:
        sys.stderr.write(
            "[Error] Invalid sequence type for reference DB '%s'!\n" %
            ref_db_name)
    return sqce_type
示例#8
0
def get_exp_data(exp_id, trapid_db_data, verbose=False):
    """Get reference database name and gene family type used for a TRAPID experiment.

    :param exp_id: TRAPID experiment id
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)
    :return: experiment's reference database and gene family type

    """
    if verbose:
        sys.stderr.write(
            "[Message] Retrieve data for experiment '%d' from TRAPID DB. \n" %
            exp_id)
    query_str = "SELECT `used_plaza_database`, `genefamily_type` FROM `experiments` WHERE `experiment_id`='{exp_id}';"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor(MS.cursors.DictCursor)
    cursor.execute(query_str.format(exp_id=exp_id))
    exp_data = cursor.fetchone()
    db_conn.close()
    if not exp_data:
        sys.stderr.write(
            "[Error] Impossible to retrieve experiment data (experiment '%d')!\n"
            % (exp_id))
        sys.exit(1)
    return exp_data
示例#9
0
def main():
    # Parse command-line arguments
    cmd_args = parse_arguments()
    # Read enrichment configuration file
    config = common.load_config(
        cmd_args.ini_file_enrichment,
        {'trapid_db', 'reference_db', 'experiment', 'enrichment'})
    # TRAPID db data (list containing all needed parameters for `common.db_connect()`)
    trapid_db_data = common.get_db_connection_data(config, 'trapid_db')
    # Ref. db data (list containing all needed parameters for `common.db_connect()`)
    ref_db_data = common.get_db_connection_data(config, 'reference_db')
    exp_id = config['experiment']['exp_id']
    tmp_dir = config['experiment']['tmp_exp_dir']
    enricher_bin = config['enrichment']['enricher_bin']
    # Check existence of enrichmer bin
    fe.check_enricher_bin(enricher_bin, cmd_args.verbose)
    # Get GO data from reference database if `fa_type` is GO
    go_data = {}
    if cmd_args.fa_type == 'go':
        db_conn = common.db_connect(*ref_db_data)
        go_data = fe.get_go_data(db_conn)
        db_conn.close()
    # Delete previous enrichment results from TRAPID DB
    db_conn = common.db_connect(*trapid_db_data)
    fe.delete_previous_results(db_conn, exp_id, cmd_args.fa_type,
                               cmd_args.subset, cmd_args.max_pval,
                               cmd_args.verbose)
    db_conn.close()
    # Run enricher
    enrichment_data = fe.run_enricher(trapid_db_data, exp_id, cmd_args.fa_type,
                                      cmd_args.subset, cmd_args.max_pval,
                                      go_data, enricher_bin, tmp_dir,
                                      cmd_args.keep_tmp, cmd_args.verbose)
    # Create result records and upload them to TRAPID DB
    enrichment_rows = fe.create_enrichment_rows(enrichment_data['results'],
                                                enrichment_data['gf_data'],
                                                exp_id, cmd_args.subset,
                                                cmd_args.fa_type,
                                                cmd_args.max_pval, go_data)
    db_conn = common.db_connect(*trapid_db_data)
    fe.upload_results_to_db(db_conn, enrichment_rows, cmd_args.verbose)
    db_conn.close()
示例#10
0
def get_gf_sqces(gf_data, ref_db_data, gf_type, ref_sqce_type, transl_tables):
    """Retrieve gene family reference/external protein sequences for MSA and tree creation.

    :param gf_data: all gene family data retrieved from the TRAPID database (output of `get_gf_data()`)
    :param ref_db_data: reference database data (parameters for `common.db_connect()`)
    :param gf_type: gene family type of the current TRAPID experiment
    :param ref_sqce_type: sequence type of reference database sequences (DNA or AA)
    :param transl_tables: translation tables dictionary
    :return: reference protein sequences as a sequence_id:sequence dictionary

    """
    if gf_type not in ['HOM', 'IORTHO']:
        sys.stderr.write(
            "[Error] Invalid GF type ('%s'), cannot retrieve GF sequences!\n" %
            gf_type)
    # Get used species (spacies name) based on `gf_data` tax ids
    species_query_str = "SELECT `species` FROM `annot_sources` WHERE `tax_id` IN ({tax_id_str});"
    tax_id_str = "'%s'" % "','".join(gf_data['used_species'].split(','))
    used_species = set([])
    used_genes = set([])
    used_sqces = {}
    db_conn = common.db_connect(*ref_db_data)
    cursor = db_conn.cursor(MS.cursors.DictCursor)
    cursor.execute(species_query_str.format(tax_id_str=tax_id_str))
    for rec in cursor.fetchall():
        used_species.add(rec['species'])  # To replace (update)?
    # Would it be better to fetch filtered data directly?
    gf_data_query_str = "SELECT gene_id, species FROM `gf_data` WHERE `gf_id` = '{gf_id}';"
    cursor.execute(gf_data_query_str.format(gf_id=gf_data['plaza_gf_id']))
    for record in common.ResultIter(cursor):
        if record['species'] in used_species:
            used_genes.add(record['gene_id'])
    # If we are dealing with an IORTHO GF, further filtering is required (only keep sequences that are in `gf_content`)
    if gf_type == "IORTHO" and gf_data['gf_content']:
        to_remove = set(gf_data['gf_content'].split(','))
        used_genes = used_genes - to_remove
    # Retrieve and translate sequences for genes in `used_genes`
    sqce_query_str = "SELECT `gene_id`, `seq`, `transl_table` FROM annotation  WHERE `gene_id` IN ({gene_id_str});"
    gene_id_str = "'%s'" % "','".join(sorted(list(used_genes)))
    # print sqce_query_str.format(gene_id_str=gene_id_str)
    cursor.execute(sqce_query_str.format(gene_id_str=gene_id_str))
    # Store & return translated sequences
    if ref_sqce_type == 'DNA':
        for record in common.ResultIter(cursor):
            used_sqces[record['gene_id']] = translate_dna_to_aa(
                dna_string=record['seq'],
                transl_tables=transl_tables,
                transl_table_idx=record['transl_table'])
    else:
        for record in common.ResultIter(cursor):
            used_sqces[record['gene_id']] = record['seq']
    db_conn.close()
    return used_sqces
示例#11
0
def main(args):
  # Initialize database
  path = "sqlite:///{}".format(args.database_path)
  conn = db_connect(path, args.debug)

  # query SQL
  s = select([ms.c.Last, ms.c.PrevDay, ms.c.MarketName])
  rp = conn.execute(s)

  # show report
  for record in rp:
    daily_return = record.Last / record.PrevDay - 1
    daily_return = format(daily_return, '.2f')
    print("{} {}%".format(record.MarketName, daily_return))
示例#12
0
def clear_transcripts_table(exp_id, trapid_db_data):
    """Clear content from the `transcripts` table of the TRAPID database, which is necessary prior to updating the table
    with new Infernal results.

    :param exp_id: TRAPID experiment id
    :param trapid_db_data: TRAPID db connection data

    """
    sys.stderr.write('[Message] Clear content in `transcripts` table. \n')
    clear_query_str = "UPDATE `transcripts` SET `is_rna_gene`=0, `rf_ids`=NULL WHERE `experiment_id`='{exp_id}';"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    formatted_query = clear_query_str.format(exp_id=exp_id)
    cursor.execute(formatted_query)
    db_conn.commit()
    db_conn.close()
示例#13
0
def get_infernal_z_value(exp_id, trapid_db_data):
    """Retrieve value needed for cmscan/cmsearch `-Z` parameter (search space size in megabase, here the total length of
    query sequences in Mb multiplied by 2).

    :param exp_id: TRAPID experiment id
    :param trapid_db_data: TRAPID db connection data
    :return: value for `-Z` parameter

    """
    query_str = "SELECT SUM(`len`) FROM (SELECT CHAR_LENGTH(UNCOMPRESS(`transcript_sequence`)) AS len FROM `transcripts` WHERE experiment_id ='{exp_id}') tr;"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    cursor.execute(query_str.format(exp_id=exp_id))
    total_nts = float([record for record in cursor.fetchone()][0])
    db_conn.close()
    return (total_nts / 10e6) * 2
示例#14
0
def cleanup_table(exp_id, table_name, trapid_db_data):
    """Cleanup a table from the db for an experiment.

    :param exp_id: TRAPID experiment id
    :param table_name: the table to cleanup
    :param trapid_db_data: TRAPID db connection data

    """
    query_str = "DELETE FROM `{table_name}` WHERE `experiment_id`='{exp_id}'"
    # Cleanup previous Infernal results for the experiment
    sys.stderr.write(
        '[Message] Cleanup previous data from `{table_name}`. \n'.format(
            table_name=table_name))
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    cursor.execute(query_str.format(table_name=table_name, exp_id=exp_id))
    db_conn.commit()
    db_conn.close()
示例#15
0
def retrieve_rfam_go_data(trapid_db_data):
    """Retrieve Rfam GO annotation stored the `configuration` table of TRAPID's database. Return it as cm:gos dictionary.

    :param trapid_db_data: TRAPID db connection data
    :return: Rfam RNA family GO annotations as cm:gos dictionary.

    """
    sys.stderr.write(
        "[Message] Retrieve Rfam GO annotation from `configuration`.\n")
    rfam_go = {}
    query_str = "SELECT `key`, `value` FROM `configuration` WHERE `method`='rfam_annotation' AND `attr`='go'"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor(MS.cursors.DictCursor)
    cursor.execute(query_str)
    for record in cursor.fetchall():
        rfam_go[record['key']] = record['value'].split(',')
    db_conn.close()
    return rfam_go
示例#16
0
def get_ref_db(exp_id, trapid_db_data, verbose=False):
    """Get reference database name used for a TRAPID experiment from TRAPID DB.

    :param exp_id: TRAPID experiment id
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)
    :return: the experiment's reference database name

    """
    if verbose:
        sys.stderr.write("[Message] Retrieve reference database name of experiment '%d' from TRAPID DB. \n" % exp_id)
    query_str = "SELECT `used_plaza_database` FROM `experiments` WHERE `experiment_id`='{exp_id}';"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    cursor.execute(query_str.format(exp_id=exp_id))
    ref_db_name = cursor.fetchone()[0]
    db_conn.close()
    return ref_db_name
示例#17
0
def store_rna_similarities(exp_id, infernal_results, trapid_db_data):
    """Store Infernal tabulated output data in the `rna_similarities` table of TRAPID's db.

    :param exp_id: TRAPID experiment id
    :param infernal_results: parsed infernal results to store
    :param trapid_db_data: TRAPID db connection data

    """
    # First cleanup the table
    cleanup_table(exp_id=exp_id,
                  table_name="rna_similarities",
                  trapid_db_data=trapid_db_data)
    sorted_infernal_results = sorted(infernal_results,
                                     key=lambda k: float(k['score']),
                                     reverse=True)
    sys.stderr.write(
        '[Message] Store Infernal results in `rna_similarities`. \n')
    query_str = "INSERT INTO `rna_similarities` (`experiment_id`,`transcript_id`,`similarity_data`) VALUES ('{exp_id}','{transcript_id}', COMPRESS(\"{infernal_data}\"))"
    # Get and format similarity data
    fields_to_keep = [
        "cm_acc", "cm_id", "clan", "e_value", "score", "bias", "mdl_from",
        "mdl_to", "trunc", "seq_from", "seq_to"
    ]
    rna_sim_data = {}
    for rec in sorted_infernal_results:
        if rec["query"] not in rna_sim_data:
            sim_str = ",".join([rec[f] for f in fields_to_keep])
            rna_sim_data[rec["query"]] = [sim_str]
        else:
            sim_str = ",".join([rec[f] for f in fields_to_keep])
            rna_sim_data[rec["query"]].append(sim_str)
    for transcript_id in rna_sim_data:
        rna_sim_data[transcript_id] = ";".join(rna_sim_data[transcript_id])
    # Store Infernal results in `rna_similarities`
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    for transcript_id in sorted(rna_sim_data.keys()):
        cursor.execute(
            query_str.format(exp_id=exp_id,
                             transcript_id=transcript_id,
                             infernal_data=rna_sim_data[transcript_id]))
    db_conn.commit()
    db_conn.close()
示例#18
0
def get_tax_data(ref_db_data, verbose=False):
    """Get species, common name and tax ids from the `annot_sources` table of a reference database.

    :param ref_db_data: reference database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)
    :return: return fetched `annot_sources` data as a dictionary (species:{tax_id, common_name})

    """
    if verbose:
        sys.stderr.write("[Message] Retrieve taxonomy data from reference DB '%s'. \n" % ref_db_data[-1])
    tax_data = {}
    query_str = "SELECT `species`, `tax_id`, `common_name` FROM `annot_sources`;"
    db_conn = common.db_connect(*ref_db_data)
    cursor = db_conn.cursor(MS.cursors.DictCursor)
    cursor.execute(query_str)
    for rec in cursor.fetchall():
        tax_data[rec['species']] = {'tax_id': rec['tax_id'], 'common_name': rec['common_name']}
    db_conn.close()
    return tax_data
示例#19
0
def create_infernal_files(exp_id, tmp_exp_dir, rfam_dir, exp_clans,
                          trapid_db_data):
    """Create `cm` and `clanin` files needed by Infernal for user-selected Rfam clans.

    :param exp_id: TRAPID experiment id
    :param tmp_exp_dir: experiment's directory
    :param rfam_dir: directory with Rfam data
    :param exp_clans: user-selected Rfam clans
    :param trapid_db_data: TRAPID db connection data

    """
    # individual_cms = "individual_cms"  # Name of directory containing individual CMs (in `rfam_dir`)
    rfam_clans_file = "Rfam_%s.clanin" % exp_id
    rfam_cm_file = os.path.join(tmp_exp_dir, "Rfam_%s.cm" % exp_id)
    exp_cms_file = "rfam_cms_%s.lst" % exp_id
    sys.stderr.write(
        "[Message] Create Rfam `cm` and `clanin` files for Infernal ('%s' and '%s').\n"
        % (rfam_cm_file, rfam_clans_file))
    clan_members = {}
    exp_cms = set()
    # Get clan membership information from `configuration` table
    # Since there are only 111 clans, we can retrieve this information for all of them
    query_str = "SELECT `key`, `value` FROM `configuration` WHERE `method`='rfam_clans' AND `attr`='families'"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor(MS.cursors.DictCursor)
    cursor.execute(query_str)
    for record in cursor.fetchall():
        clan_members[record['key']] = record['value'].split(',')
    db_conn.close()
    # Create `clanin` file
    with open(os.path.join(tmp_exp_dir, rfam_clans_file), "w") as out_file:
        for clan in exp_clans:
            clanin_str = "{clan}\t{members}\n"
            out_file.write(
                clanin_str.format(clan=clan,
                                  members="\t".join(clan_members[clan])))
            # Also retrieve Rfam models (update `exp_cms`, later used to retrieve individual models)
            exp_cms.update(clan_members[clan])
    # Create file listing the experiment's CM (input file for `cmfetch`)
    with open(os.path.join(tmp_exp_dir, exp_cms_file), "w") as out_file:
        out_file.write('\n'.join(sorted(list(exp_cms))) + '\n')
    # Create `cm` file using Infernal's `cmfetch` command
    run_cmfetch(exp_id, tmp_exp_dir, rfam_dir)
示例#20
0
def get_gene_species(gf_id, ref_db_data, verbose=False):
    """Retrieve the species of the members of a gene family from a reference database.

    :param gf_id: gene family for which to retrieve reference data
    :param ref_db_data: reference database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)
    :return: gene family species data as a dictionary (gene:species).

    """
    ref_gf_id = gf_id.split('_')[1]
    if verbose:
        sys.stderr.write("[Message] Retrieve species for members of GF '%s' ('%s' reference DB). \n" % (ref_gf_id, ref_db_data[-1]))
    gene_species = {}
    query_str = "SELECT `gene_id`, `species` FROM `gf_data` WHERE `gf_id`='{ref_gf_id}';"
    db_conn = common.db_connect(*ref_db_data)
    cursor = db_conn.cursor(MS.cursors.DictCursor)
    cursor.execute(query_str.format(ref_gf_id=ref_gf_id))
    for rec in cursor.fetchall():
        gene_species[rec['gene_id']] = rec['species']
    db_conn.close()
    return gene_species
示例#21
0
def store_msa_db(msa_file,
                 msa_program,
                 exp_id,
                 gf_id,
                 trapid_db_data,
                 verbose=False):
    """Read faln MSA file generated for a GF and upload its content to TRAPID database.

    :param msa_file: path to faln MSA file to upload
    :param msa_program: program used to perform the MSA (also stored in the database)
    :param exp_id: TRAPID experiment id
    :param gf_id: gene family id
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)

    """
    query_str = "UPDATE `gene_families` SET `msa`='{msa_data}', `msa_params`='{msa_params}'  WHERE `experiment_id`='{exp_id}' AND `gf_id`='{gf_id}';"
    if verbose:
        sys.stderr.write(
            "[Message] Store MSA data ('%s') for GF '%s' of experiment '%d' to TRAPID database.\n"
            % (msa_file, gf_id, exp_id))
    msa_data = []
    with open(msa_file, 'r') as in_file:
        for line in in_file:
            if line.startswith(">"):
                msa_data.append(line.strip() + ";")
            else:
                msa_data[-1] += line.strip()
    msa_data_str = ''.join(msa_data)
    if msa_data_str:
        db_conn = common.db_connect(*trapid_db_data)
        cursor = db_conn.cursor()
        cursor.execute(
            query_str.format(msa_data=msa_data_str,
                             msa_params=msa_program,
                             exp_id=exp_id,
                             gf_id=gf_id))
        db_conn.commit()
        db_conn.close()
示例#22
0
def upload_phyloxml_tree(exp_id, gf_id, phyloxml_tree, trapid_db_data, verbose=False):
    """Upload a PhyloXML tree generated for a gene family to TRAPID db.

    :param exp_id: TRAPID experiment id
    :param gf_id: gene family for which a PhyloXML tree is generated
    :param phyloxml_tree: PhyloXML tree string
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)

    """
    if verbose:
        sys.stderr.write("[Message] Upload PhyloXML tree for GF '%s' (experiment '%d'). \n" % (gf_id, exp_id))
    # Minify XML tree string
    minified_tree_xml = etree.XML(phyloxml_tree, parser=etree.XMLParser(remove_blank_text=True))
    minified_tree_str = etree.tostring(minified_tree_xml)
    # Upload minified XML tree
    query_str = "UPDATE `gene_families` SET `xml_tree`='{xml_str}' WHERE `experiment_id`='{exp_id}' AND `gf_id`='{gf_id}';"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    cursor.execute(query_str.format(xml_str=minified_tree_str, exp_id=exp_id, gf_id=gf_id))
    db_conn.commit()
    db_conn.close()
示例#23
0
def delete_db_job(exp_id, gf_id, trapid_db_data, msa_only, verbose=False):
    """Delete MSA/tree creation job from TRAPID db.

    :param exp_id: TRAPID experiment id
    :param gf_id: gene family id
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param msa_only: whether only an MSA, and no tree, was generated
    :param verbose: whether to be verbose (print extra information to stderr if set to True)

    """
    if verbose:
        sys.stderr.write(
            "[Message] Delete MSA/tree creation job from TRAPID database...\n")
    job_name = "create_tree"
    if msa_only:
        job_name = "create_msa"
    comment_str = "{} {}".format(job_name, gf_id)
    db_conn = common.db_connect(*trapid_db_data)
    common.delete_experiment_job(experiment_id=exp_id,
                                 job_name=comment_str,
                                 db_conn=db_conn)
    db_conn.close()
示例#24
0
def send_end_email(exp_id, gf_id, trapid_db_data, msa_only):
    """Send an email to the owner of an experiment to warn them of job completion.

    :param exp_id: TRAPID experiment id
    :param gf_id: gene family id for which an MSA/tree was generated
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param msa_only: whether only an MSA, and no tree, was generated

    """
    # Get title & email address associated to the experiment
    query_str = "SELECT a.`title`, b.`email` FROM `experiments` a,`authentication` b WHERE a.`experiment_id`='{exp_id}' AND b.`user_id`=a.`user_id`;"
    page_url = '/'.join(
        [common.TRAPID_BASE_URL, 'tools', 'create_tree',
         str(exp_id), gf_id])
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    cursor.execute(query_str.format(exp_id=exp_id))
    exp_data = cursor.fetchone()
    db_conn.close()
    # Send email
    if not exp_data:
        sys.stderr.write(
            "[Error] Impossible to retrieve experiment title/email address (experiment '%d')!\n"
            % (exp_id))
        sys.exit(1)
    email_subject = "TRAPID phylogenetic tree finished (%s)\n" % gf_id
    if msa_only:
        email_subject = "TRAPID MSA finished (%s)\n" % gf_id
    email_content = (
        "Dear user,\n\n"
        "The phylogenetic tree for gene family '{gf_id}' in experiment '{exp_title}' has been created.\n\n"
        "You can now view it at this URL: {page_url}\n\n"
        "Thank you for using TRAPID.\n").format(gf_id=gf_id,
                                                page_url=page_url,
                                                exp_title=exp_data[0])
    common.send_mail(to=[exp_data[1]],
                     subject=email_subject,
                     text=email_content)
示例#25
0
def get_subsets(exp_id, gf_id, trapid_db_data, verbose=False):
    """Retrieve subset information for transcripts assigned to a gene family from the TRAPID database.

    :param exp_id: TRAPID experiment id
    :param gf_id: gene family containing transcripts for which to retrieve subset information
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)
    :return: transcript subset information as a dictionary (transcript_id:subsets)

    """
    if verbose:
        sys.stderr.write("[Message] Retrieve subsets for transcripts of GF '%s' (experiment '%d'). \n" % (gf_id, exp_id))
    tr_subsets = {}
    transcripts = set()
    tr_query_str = "SELECT `transcript_id` FROM `transcripts` WHERE `experiment_id`='{exp_id}' AND `gf_id`='{gf_id}';"
    subset_query_str = "SELECT `transcript_id`,`label` FROM `transcripts_labels` WHERE `experiment_id`='{exp_id}' AND `transcript_id` IN ({transcripts});"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    # Get transcript ids
    cursor.execute(tr_query_str.format(exp_id=exp_id, gf_id=gf_id))
    # OK to use `fetchall()` since we are getting data for a single GF only...
    for rec in cursor.fetchall():
        transcripts.add(rec[0])
    # Create a string for `IN` clause of SQL query
    transcripts_str = "'%s'" % "', '".join(sorted(list(transcripts)))
    # Get subset information for transcripts
    cursor.execute(subset_query_str.format(exp_id=exp_id, transcripts=transcripts_str))
    for rec in cursor.fetchall():
        if rec[0] in tr_subsets:
            tr_subsets[rec[0]].append(rec[1])
        else:
            tr_subsets[rec[0]] = [rec[1]]
    db_conn.close()
    if verbose:
        sys.stderr.write("[Message] Retrieved subset information: %s\n" % str(tr_subsets))
    return tr_subsets
示例#26
0
def get_meta_annotation(exp_id, gf_id, trapid_db_data, verbose=False):
    """Retrieve meta-annotation for transcripts assigned to a gene family from the TRAPID database.

    :param exp_id: TRAPID experiment id
    :param gf_id: gene family containing transcripts for which to retrieve meta-annotation
    :param trapid_db_data: TRAPID database connection data (parameters for common.db_connect())
    :param verbose: whether to be verbose (print extra information to stderr if set to True)
    :return: GF transcript meta-annotation as a dictionary (transcript_id:meta_annotation)

    """
    if verbose:
        sys.stderr.write("[Message] Retrieve meta-annotation for transcripts of GF '%s' (experiment '%d'). \n" % (gf_id, exp_id))
    tr_meta_annotation = {}
    query_str = "SELECT `transcript_id`, `meta_annotation` FROM `transcripts` WHERE `experiment_id`='{exp_id}' AND `gf_id`='{gf_id}';"
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    cursor.execute(query_str.format(exp_id=exp_id, gf_id=gf_id))
    # OK to use `fetchall()` since we are getting data for a single GF only...
    for rec in cursor.fetchall():
        tr_meta_annotation[rec[0]] = rec[1]
    db_conn.close()
    if verbose:
        sys.stderr.write("[Message] Retrieved meta-annotation: %s\n" % str(tr_meta_annotation))
    return tr_meta_annotation
示例#27
0
def store_rna_families(exp_id, infernal_results, trapid_db_data):
    """Populate `rna_families` for the experiment from Infernal results.

    :param exp_id: TRAPID experiment id
    :param infernal_results: parsed infernal results
    :param trapid_db_data: TRAPID db connection data

    """
    # First cleanup the table
    cleanup_table(exp_id=exp_id,
                  table_name="rna_families",
                  trapid_db_data=trapid_db_data)
    sys.stderr.write('[Message] Store Infernal results in `rna_families`. \n')
    query_str = "INSERT INTO `rna_families` (`experiment_id`,`rf_id`,`rfam_rf_id`,`rfam_clan_id`, `num_transcripts`) VALUES ('{e}','{f}','{rf}','{c}','{n}')"
    # Get and format data from Infernal results
    rna_fam_data = {}
    for rec in infernal_results:
        if rec["cm_acc"] not in rna_fam_data:
            rna_fam_data[rec["cm_acc"]] = {
                "clan": rec["clan"],
                "n_transcripts": 1
            }
        else:
            rna_fam_data[rec["cm_acc"]]["n_transcripts"] += 1
    db_conn = common.db_connect(*trapid_db_data)
    cursor = db_conn.cursor()
    for rf_id in sorted(rna_fam_data.keys()):
        exp_rf_id = "%s_%s" % (exp_id, rf_id)
        cursor.execute(
            query_str.format(e=exp_id,
                             f=exp_rf_id,
                             rf=rf_id,
                             c=rna_fam_data[rf_id]["clan"],
                             n=rna_fam_data[rf_id]["n_transcripts"]))
    db_conn.commit()
    db_conn.close()
示例#28
0
def main():
    sys.stderr.write('[Message] Starting Kaiju procedure: %s\n' %
                     time.strftime('%Y/%m/%d %H:%M:%S'))
    cmd_args = parse_arguments()
    # sys.stderr.write(str(cmd_args)+'\n')  # Debug
    # Read experiment's initial processing configuration file
    config = common.load_config(cmd_args.ini_file_initial,
                                {"tax_binning", "trapid_db", "experiment"})
    # A list containing all needed parameters for `common.db_connect()`
    trapid_db_data = common.get_db_connection_data(config, 'trapid_db')
    exp_id = config['experiment']['exp_id']
    # Input / output
    input_file = os.path.join(config['experiment']['tmp_exp_dir'],
                              "transcripts_%s.fasta" % exp_id)
    output_dir = os.path.join(config['experiment']['tmp_exp_dir'], "kaiju")
    output_script = os.path.join(config['experiment']['tmp_exp_dir'],
                                 "run_kaiju_split.sh")
    # Paths to files needed for taxonomic classification and post-processing
    split_db_dir = config['tax_binning']['splitted_db_dir']
    kaiju_parameters = config['tax_binning']['kaiju_parameters']
    names_dmp_file = config['tax_binning']['names_dmp_file']
    nodes_dmp_file = config['tax_binning']['nodes_dmp_file']
    # Update experiment log
    db_connection = common.db_connect(*trapid_db_data)
    common.update_experiment_log(exp_id, 'start_tax_classification',
                                 'kaiju_mem', 2, db_connection)
    db_connection.close()
    # Create and run shell script that will run Kaiju.
    kaiju_split_script = create_shell_script(
        get_split_db_files(split_db_dir),
        os.path.join(output_dir, "split_results"), output_script,
        kaiju_parameters, nodes_dmp_file, input_file)
    # Impossible to qsub directly from the webcluster... So run Kaiju shell script from here
    # qsub_and_wait(script_name=kaiju_split_script, n_cores=1, mem_per_core=8)
    os.chmod(kaiju_split_script, 0o755)
    job = subprocess.Popen(kaiju_split_script)
    job.communicate()
    # Merge Kaiju results
    merge_kaiju_split_results.merge_results(
        os.path.join(output_dir, "split_results"), nodes_dmp_file,
        os.path.abspath(os.path.join(output_dir, "kaiju_merged.out")))
    # Process output file to generate graphical outputs
    kaiju_data_dir = os.path.abspath(output_dir)
    data_dict = {
        'names_dmp':
        names_dmp_file,
        'nodes_dmp':
        nodes_dmp_file,
        'kaiju_output':
        os.path.join(kaiju_data_dir, "kaiju_merged.out"),
        # Domain, phylum, order, genus composition files
        'domain_comp':
        os.path.join(kaiju_data_dir, 'top_tax.domain.tsv'),
        'phylum_comp':
        os.path.join(kaiju_data_dir, 'top_tax.phylum.tsv'),
        'order_comp':
        os.path.join(kaiju_data_dir, 'top_tax.order.tsv'),
        'genus_comp':
        os.path.join(kaiju_data_dir, 'top_tax.genus.tsv'),
        # Krona HTML
        'kaiju_tsv_output':
        os.path.join(kaiju_data_dir, 'kaiju_merged.to_krona.out'),
        'krona_html_file':
        os.path.join(kaiju_data_dir, 'kaiju_merged.krona.html'),
        # Treeview JSON
        'treeview_json':
        os.path.join(kaiju_data_dir, 'kaiju_merged.to_treeview.json')
    }
    # Generate all visualization output files
    # 1. Krona
    try:
        kaiju_viz.kaiju_to_krona(data_dict['kaiju_output'],
                                 data_dict['kaiju_tsv_output'],
                                 data_dict['krona_html_file'],
                                 data_dict['names_dmp'],
                                 data_dict['nodes_dmp'])
    except Exception as e:
        print(e)
        sys.stderr.write("[Error] Unable to produce Krona output. \n")
    # 2. Treeview JSON data
    try:
        kaiju_viz.kaiju_to_treeview(data_dict['kaiju_output'],
                                    data_dict['treeview_json'],
                                    data_dict['names_dmp'],
                                    data_dict['nodes_dmp'])
    except Exception as e:
        print(e)
        sys.stderr.write("[Error] Unable to produce Treeview JSON file. \n")
    # 3. Pie/barcharts summaries data
    try:
        # At superkingdom/domain level
        kaiju_viz.kaiju_to_domain_summary_data(data_dict['kaiju_tsv_output'],
                                               data_dict['domain_comp'],
                                               data_dict['names_dmp'],
                                               data_dict['nodes_dmp'])
        # At selected taxonomic ranks (iterating over a rank->rank composition file dictionary)
        tax_ranks = {
            'phylum': 'phylum_comp',
            'order': 'order_comp',
            'genus': 'genus_comp'
        }
        for rank, rank_comp in tax_ranks.items():
            kaiju_viz.kaiju_to_tax_summary_data(data_dict['kaiju_output'],
                                                data_dict[rank_comp],
                                                data_dict['names_dmp'],
                                                data_dict['nodes_dmp'],
                                                rank_limit=rank)
        # kaiju_viz.kaiju_to_tax_summary_data(
        #     kaiju_output_file=data_dict['kaiju_output'],
        #     names_tax_file=data_dict['names_dmp'],
        #     nodes_tax_file=data_dict['nodes_dmp'],
        #     output_data_table=data_dict['order_comp'],
        #     rank_limit='order')
        # kaiju_viz.kaiju_to_tax_summary_data(
        #     kaiju_output_file=data_dict['kaiju_output'],
        #     names_tax_file=data_dict['names_dmp'],
        #     nodes_tax_file=data_dict['nodes_dmp'],
        #     output_data_table=data_dict['genus_comp'],
        #     rank_limit='genus')
    except Exception as e:
        print(e)
        sys.stderr.write(
            "[Error] Unable to produce domain/taxonomic rank summaries. \n")
    # Now, let's cleanup existing results and store current results in the TRAPID database
    db_connection = common.db_connect(*trapid_db_data)
    clear_db_content(exp_id, db_connection)
    db_connection.close()
    db_connection = common.db_connect(*trapid_db_data)
    kaiju_output_to_db(exp_id, data_dict['kaiju_output'], db_connection)
    db_connection.close()
    # Also update experiment log
    db_connection = common.db_connect(*trapid_db_data)
    common.update_experiment_log(exp_id, 'stop_tax_classification',
                                 'kaiju_mem', 2, db_connection)
    db_connection.close()
    sys.stderr.write('[Message] Finished Kaiju procedure: %s\n' %
                     time.strftime('%Y/%m/%d %H:%M:%S'))
示例#29
0
文件: app.py 项目: duchenerc/books.db
from flask import Flask, render_template
from common import db_connect, DATABASE_FILENAME, db_exec

db = db_connect(DATABASE_FILENAME)
app = Flask(__name__)


@app.route("/")
def index():
    c_books = db_exec(
        db, """
        select b.id, b.title, b.isbn
        from books b
        order by b.title;
    """)

    c_authors = db_exec(
        db, """
        select ba.book_id, a.id, a.surname, a.given_name
        from books_authors ba, authors a
        where ba.author_id = a.id;
    """)

    books = {x[0]: {"title": x[1], "isbn": x[2]} for x in c_books}

    authors = dict()

    for book_id, author_id, surname, given_name in c_authors:
        if book_id not in authors.keys():
            authors[book_id] = [{
                "id": int(author_id),
示例#30
0
	def run(self):
		"""
		03-18-05
			mapping_dict all changed to haiyan_no2gene_no
		04-12-05
			use min_cluster_size to cut off some small clusters
		07-03-05
			construct graph_modeling's cor_cut_off vector first
		10-14-05
			add calculate_unknown_gene_ratio()
		12-06-05
			add gene_no2incidence_array to parser_type ==4
		05-31-06
			add type 5 (haifeng's output)
			
			--db_connect()
			--get_haiyan_no2gene_no()
			--get_known_genes_dict()
			--get_gene_id2gene_no()
			--create_tables()
			--graph_modeling.cor_cut_off_vector_construct()
			(loop over inf)
				--parser_dict[parser_type]() (codense_parser(), copath_parser() )
					--get_combined_cor_vector
					--parse_recurrence
					--parse_connectivity
					--get_vertex_set_gim_array() (parser_type=4 only)
				--calculate_unknown_gene_ratio()
				--db_submit()
		"""
		
		inf = csv.reader(open(self.infname, 'r'), delimiter=self.delimiter)
		(conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
		
		#setup the haiyan_no2gene_no
		if self.mapping_file != None:
			haiyan_no2gene_no = get_haiyan_no2gene_no(self.mapping_file)
		else:
			haiyan_no2gene_no = {}	#a blank dictionary, 
		known_gene_no2go_no_set = get_known_genes_dict(curs)	#10-14-05	used to get unknown_gene_ratio
		
		if self.parser_type == 4 or self.parser_type==5:	#12-06-05
			if self.gim_inputfname == None:
				sys.stderr.write("\n parser_type = 4 needs gim_inputfname.\n")
				sys.exit(3)
			gene_id2gene_no = get_gene_id2gene_no(curs)
			gene_no2incidence_array = get_gene_no2incidence_array(self.gim_inputfname, gene_id2gene_no)
		else:
			gene_no2incidence_array = None
		
		mapping_dict = {1:haiyan_no2gene_no,
			2:haiyan_no2gene_no,
			3:None,
			4:gene_no2incidence_array,
			5:gene_no2incidence_array}
		self.create_tables(curs, self.table, self.mcl_table, self.pattern_table)
		no = 0
		
		graph_modeling.cor_cut_off_vector_construct(0, 0.8)	#07-03-05 compute the cor cutoff vector for graph_modeling, use 0.8 as cutoff
			#graph_modeling.ind_min_cor() requires the cor_cut_off vector to be constructed ahead.
		graph_modeling.set_jk_cut_off(6)	#07-03-05 haiyan's cutoff is 6, different from my default value, 7.
		for row in inf:
			cluster_list = self.parser_dict[self.parser_type](row, mapping_dict[self.parser_type], curs)
			for cluster in cluster_list:
				if self.parser_type!=5 and len(cluster.vertex_set)<self.min_cluster_size:
					#too small, ignore, 2006-08-29 if it's haifeng_output_parser, no restriction for cluster size, haifeng imposes 4
					continue
				#10-14-05 unknown_gene_ratio to submit to pattern_table
				cluster.unknown_gene_ratio = self.calculate_unknown_gene_ratio(cluster.vertex_set, known_gene_no2go_no_set)
				self.db_submit(curs, cluster, self.pattern_table)
				no+=1
				if self.report and no%1000==0:
					sys.stderr.write('%s%d'%('\x08'*20, no))
		if self.report:
			sys.stderr.write('%s%d'%('\x08'*20, no))
		if self.needcommit:
			conn.commit()
		sys.stderr.write('\n\tTotal patterns: %d\n'%no)
示例#31
0
def main():
    """
    Main function: run Infernal, filter results and flag RNA genes in TRAPID db.
    """
    cmd_args = parse_arguments()
    # Read experiment's initial processing configuration file
    config = common.load_config(cmd_args.ini_file_initial,
                                {"infernal", "trapid_db", "experiment"})
    # The web application sets the Rfam clan string to 'None' in case the user chose no clans
    # If this is the case, exit the script with an information message
    if config["infernal"]["rfam_clans"] == "None":
        sys.stderr.write(
            "[Message] No Rfam clans selected: skip ncRNA annotation step.\n")
        sys.exit()
    try:
        # Run Infernal, parse and export results to DB
        sys.stderr.write(
            '[Message] Starting ncRNA annotation procedure: %s\n' %
            time.strftime('%Y/%m/%d %H:%M:%S'))
        exp_id = config["experiment"]["exp_id"]
        tmp_exp_dir = config["experiment"]["tmp_exp_dir"]
        rfam_dir = config["infernal"]["rfam_dir"]
        exp_clans = config["infernal"]["rfam_clans"].split(",")
        # Lists containing all needed parameters for `common.db_connect()` (TRAPID + reference DB)
        trapid_db_data = common.get_db_connection_data(config, 'trapid_db')
        reference_db_data = common.get_db_connection_data(
            config, 'reference_db')
        db_connection = common.db_connect(*trapid_db_data)
        common.update_experiment_log(exp_id, 'start_nc_rna_search', 'Infernal',
                                     2, db_connection)
        db_connection.close()
        create_infernal_files(exp_id, tmp_exp_dir, rfam_dir, exp_clans,
                              trapid_db_data)
        # run_cmpress(exp_id=exp_id, tmp_exp_dir=tmp_exp_dir)
        total_m_nts = get_infernal_z_value(exp_id, trapid_db_data)
        infernal_tblout = run_infernal(exp_id, tmp_exp_dir, total_m_nts)
        # Filter Infernal tabulated output (keep best non-ovelrapping matches)
        # infernal_tblout_filtered = filter_out_overlaps(exp_id=exp_id, tmp_exp_dir=tmp_exp_dir, tblout_file=infernal_tblout)
        infernal_tblout_filtered = keep_best_results(exp_id, tmp_exp_dir,
                                                     infernal_tblout)
        # Get filtered results as list of dict and add clan information
        # Read Rfam clan information from `clanin` file. Would it make more sense to retrieve it when creating it?
        cm_clans = get_exp_cm_clans(exp_id, tmp_exp_dir)
        filtered_infernal_results = infernal_tblout_to_list(
            infernal_tblout_filtered, cm_clans)
        infernal_results = infernal_tblout_to_list(infernal_tblout, cm_clans)
        # Flag potential rna genes (set `is_rna_gene` value to 1 and `rf_ids` in `transcripts` table)
        flag_rna_genes(exp_id, filtered_infernal_results, trapid_db_data)
        # Store filtered results in `rna_similarities` ...
        store_rna_similarities(exp_id, infernal_results, trapid_db_data)
        # ... and `rna_families`
        store_rna_families(exp_id, filtered_infernal_results, trapid_db_data)
        # Annotate transcripts using GO terms from Rfam
        rfam_go = retrieve_rfam_go_data(trapid_db_data)
        go_data = get_go_data(reference_db_data)
        # perform_go_annotation(exp_id, infernal_results, rfam_go, go_data, tmp_exp_dir)
        perform_go_annotation(exp_id, filtered_infernal_results, rfam_go,
                              go_data, tmp_exp_dir)
        # That's it for now
        db_connection = common.db_connect(*trapid_db_data)
        common.update_experiment_log(exp_id, 'stop_nc_rna_search', 'Infernal',
                                     2, db_connection)
        db_connection.close()
        sys.stderr.write(
            '[Message] Finished ncRNA annotation procedure: %s\n' %
            time.strftime('%Y/%m/%d %H:%M:%S'))
    # If any exception was raised, update the experiment's log, set status to 'error', and exit
    except Exception:
        print_exc()
        common.stop_initial_processing_error(exp_id, trapid_db_data)