Exemplo n.º 1
0
    def test_fill_seed2kegg_mappings_table(self):
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM kegg2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2kegg_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 4)
 def test_uniref_import(self):
     uniref_data_util.create_uniref_proteins_table(self.cursor)
     uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
     uniref_data_util.create_uniref_proteins_indices(self.cursor)
     self.cursor.execute('SELECT protein_hash FROM uniref_proteins WHERE uniref_id = ?', ('UniRef100_Q92AT0',))
     self.assertEqual(self.cursor.fetchone()[0], u'B2E2EDF5A1AA957ADBAA08384F6BFB9D')
     self.cursor.execute('SELECT COUNT(*) FROM uniref_proteins')
     self.assertEqual(self.cursor.fetchone()[0], 7)
Exemplo n.º 3
0
 def setUp(self):
     self.conn = db_utils.connect_local_database(seed_db_file)
     self.cursor = self.conn.cursor()
     seed_data_util.create_seed_genomes_table(self.cursor)
     seed_data_util.import_seed_genomes(self.cursor, os.path.join(data_dir, 'test_seed_genomes.txt'))
     seed_data_util.create_seed_genes_table(self.cursor)
     seed_data_util.import_seed_genes(self.cursor, test_seed_dir)
     db_utils.attach_local_database(self.cursor, uniref_db_file, 'uniref_proteins')
     uniref_data_util.create_uniref_proteins_table(self.cursor)
     uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
     uniref_data_util.create_uniref_proteins_indices(self.cursor)
Exemplo n.º 4
0
    def test_find_seed2uniref_identical_mappings(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)

        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        data_analysis.find_seed2uniref_identical_mappings(self.cursor)

        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)
Exemplo n.º 5
0
    def test_load_diamond_search_results(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)

        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_file, 95.0, 5)

        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
Exemplo n.º 6
0
def main():
    args = get_args()
    
    conn = db_utils.connect_local_database(args.db)
    c = conn.cursor()
    
    uniref_data_util.drop_tables(c)
    uniref_data_util.drop_indices(c)
    uniref_data_util.create_uniref_proteins_table(c)
    uniref_data_util.import_uniref_fasta(c,args.fasta)
    uniref_data_util.create_uniref_proteins_indices(c)
    
    conn.commit()
    conn.close()
 def setUp(self):
     self.conn = db_utils.connect_local_database(
         os.path.join(data_dir, kegg_db_file))
     self.cursor = self.conn.cursor()
     db_utils.attach_local_database(self.cursor,
                                    os.path.join(data_dir, uniref_db_file),
                                    'uniref_proteins')
     kegg_data_util.create_kegg_genomes_table(self.cursor)
     kegg_data_util.import_kegg_genomes_list(
         self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
     kegg_data_util.create_kegg_genes_table(self.cursor)
     kegg_data_util.import_kegg_genes(
         self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
     uniref_data_util.create_uniref_proteins_table(self.cursor)
     uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
     uniref_data_util.create_uniref_proteins_indices(self.cursor)
Exemplo n.º 8
0
    def setUp(self):
        self.conn = db_utils.connect_local_database(db_file)
        self.cursor = self.conn.cursor()

        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)

        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_functional_roles_table(
            self.cursor, seed_roles_file)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.import_seed_gene2roles_mapping(self.cursor,
                                                      seed_gene2roles_dir,
                                                      'test')
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)

        kegg_data_util.create_kegg_orthologs_table(self.cursor)
        kegg_data_util.import_kegg_orthologs_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_ko_list.txt'))
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg_genes2ko_table(self.cursor)
        kegg_data_util.import_genes2ko_mappings(self.cursor, test_kegg_dir)
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)

        db_utils.create_collections_table(self.cursor)
        db_utils.create_collection2function_table(self.cursor)
        data_analysis.import_collection_tsv(self.cursor, collection_file,
                                            'nitrogen_test', 'test info', '0')
    def test_export_kegg_unmapped_proteins(self):
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM kegg2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2kegg_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 4)
        data_analysis.export_kegg_unmapped_proteins(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'),
            os.path.join(data_dir, 'out.fasta'))
        with open(os.path.join(data_dir, 'out.fasta'), 'r') as f:
            line = f.readline()
            self.assertEqual(line[:15], '>dml:Dmul_28240')
            f.closed