Пример #1
0
    def test_acba_simple_linear(self):
        replicon_filename = 'acba.007.p01.13'
        output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename)
        test_result_dir = os.path.join(self.out_dir, output_filename)
        command = "integron_finder --outdir {out_dir} --linear {replicon}".format(out_dir=self.out_dir,
                                                                                  replicon=self.find_data(
                                                                                      os.path.join('Replicons',
                                                                                         replicon_filename + '.fst'
                                                                                         )
                                                                                       )
                                                                                  )

        with self.catch_io(out=True, err=True):
            main(command.split()[1:], loglevel='WARNING')

        output_filename = '{}.integrons'.format(replicon_filename)
        expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_acba.007.p01.13.linear',
                                                           output_filename))
        test_result_path = os.path.join(test_result_dir, output_filename)
        self.assertIntegronResultEqual(expected_result_path, test_result_path)

        summary_file_name = '{}.summary'.format(replicon_filename)
        exp_summary_path = self.find_data(
            os.path.join('Results_Integron_Finder_acba.007.p01.13.linear', summary_file_name))
        exp_summary = pd.read_csv(exp_summary_path, sep="\t")
        test_summary_path = os.path.join(test_result_dir, summary_file_name)
        test_summary = pd.read_csv(test_summary_path, sep="\t")
        pdt.assert_frame_equal(exp_summary, test_summary)
Пример #2
0
    def test_acba_annot(self):
        replicon_filename = 'acba.007.p01.13'
        replicon_id = 'ACBA.007.P01_13'
        command = "integron_finder --outdir {out_dir} --func-annot --path-func-annot {annot_bank} --promoter-attI " \
                  "--gbk --keep-tmp " \
                  "{replicon}".format(out_dir=self.out_dir,
                                      annot_bank=self.resfams_dir,
                                      replicon=self.find_data(os.path.join('Replicons', '{}.fst'.format(replicon_filename)))
                                      )

        with self.catch_io(out=True, err=False):
            main(command.split()[1:], loglevel='WARNING')

        result_dir = os.path.join(self.out_dir, 'Results_Integron_Finder_{}'.format(replicon_filename))

        gbk = '{}.gbk'.format(replicon_id)
        expected_gbk = self.find_data(os.path.join('Results_Integron_Finder_{}.annot'.format(replicon_filename), gbk))
        gbk_test = os.path.join(result_dir, gbk)
        expected_gbk = SeqIO.read(expected_gbk, 'gb')
        gbk_test = SeqIO.read(gbk_test, 'gb')
        self.assertSeqRecordEqual(expected_gbk, gbk_test)

        output_filename = '{}.integrons'.format(replicon_filename)
        expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.annot'.format(replicon_filename),
                                                           output_filename))
        test_result_path = os.path.join(result_dir, output_filename)
        self.assertIntegronResultEqual(expected_result_path, test_result_path)

        output_filename = os.path.join('tmp_{}'.format(replicon_id), replicon_id + '_Resfams_fa_table.res')
        expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.annot'.format(replicon_filename),
                                                           output_filename))
        test_result_path = os.path.join(result_dir, output_filename)
        self.assertHmmEqual(expected_result_path, test_result_path)
Пример #3
0
    def test_acba_simple_with_gbk_without_promoter(self):
        replicon_filename = 'acba.007.p01.13'
        replicon_id = 'ACBA.007.P01_13'
        command = "integron_finder --outdir {out_dir} --gbk {replicon}".format(out_dir=self.out_dir,
                                                                               replicon=self.find_data(
                                                                                   os.path.join('Replicons',
                                                                                                '{}.fst'.format(replicon_filename))
                                                                                )
                                                                               )

        with self.catch_io(out=True, err=True):
            main(command.split()[1:], loglevel='WARNING')

        output_dirname = 'Results_Integron_Finder_{}'.format(replicon_filename)
        test_result_dir = os.path.join(self.out_dir, output_dirname)
        gbk = '{}.gbk'.format(replicon_id)
        expected_gbk = self.find_data(os.path.join(output_dirname + ".wo_promoter", gbk))
        gbk_test = os.path.join(test_result_dir, gbk)
        expected_gbk = SeqIO.read(expected_gbk, 'gb')
        gbk_test = SeqIO.read(gbk_test, 'gb')
        self.assertSeqRecordEqual(expected_gbk, gbk_test)

        output_filename = '{}.integrons'.format(replicon_filename)
        expected_result_path = self.find_data(os.path.join(output_dirname + ".wo_promoter", output_filename))
        test_result_path = os.path.join(test_result_dir, output_filename)
        self.assertIntegronResultEqual(expected_result_path, test_result_path)
Пример #4
0
    def test_acba_simple_gembase(self):
        replicon_filename = 'ACBA.0917.00019'
        contig_id = 'ACBA.0917.00019.0001'
        output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename)
        test_result_dir = os.path.join(self.out_dir, output_filename)
        command = "integron_finder --outdir {out_dir} --gembase {replicon}".format(out_dir=self.out_dir,
                                                                                   replicon=self.find_data(
                                                                                       os.path.join('Gembase',
                                                                                                    'Replicons',
                                                                                       replicon_filename + '.fna')
                                                                                   )
                                                                                   )
        with self.catch_io(out=True, err=True):
            main(command.split()[1:], loglevel='WARNING')

        output_filename = '{}.integrons'.format(replicon_filename)
        expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.gembase'.format(replicon_filename),
                                                           output_filename))
        test_result_path = os.path.join(test_result_dir, output_filename)
        self.assertIntegronResultEqual(expected_result_path, test_result_path)

        summary_file_name = '{}.summary'.format(replicon_filename)
        exp_summary_path = self.find_data(
            os.path.join('Results_Integron_Finder_{}.gembase'.format(replicon_filename), summary_file_name))
        exp_summary = pd.read_csv(exp_summary_path, sep="\t")
        test_summary_path = os.path.join(test_result_dir, summary_file_name)
        test_summary = pd.read_csv(test_summary_path, sep="\t")
        pdt.assert_frame_equal(exp_summary, test_summary)
Пример #5
0
    def test_acba_simple_no_gbk_no_pdf(self):
        replicon_filename = 'acba.007.p01.13'
        replicon_id = 'ACBA.007.P01_13'
        output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename)
        test_result_dir = os.path.join(self.out_dir, output_filename)
        command = "integron_finder --outdir {out_dir} --promoter-attI {replicon}".format(out_dir=self.out_dir,
                                                                         replicon=self.find_data(
                                                                             os.path.join('Replicons',
                                                                                          '{}.fst'.format(replicon_filename))
                                                                         )
                                                                         )

        with self.catch_io(out=True, err=True):
            main(command.split()[1:], loglevel='WARNING')

        output_filename = '{}.integrons'.format(replicon_filename)
        expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_acba.007.p01.13',
                                                           output_filename))
        test_result_path = os.path.join(test_result_dir, output_filename)
        self.assertIntegronResultEqual(expected_result_path, test_result_path)

        summary_file_name = '{}.summary'.format(replicon_filename)
        exp_summary_path = self.find_data(os.path.join('Results_Integron_Finder_acba.007.p01.13', summary_file_name))
        exp_summary = pd.read_csv(exp_summary_path, sep="\t")
        test_summary_path = os.path.join(test_result_dir, summary_file_name)
        test_summary = pd.read_csv(test_summary_path, sep="\t")
        pdt.assert_frame_equal(exp_summary, test_summary)

        gbk = '{}.gbk'.format(replicon_filename)
        gbk_test = os.path.join(test_result_dir, gbk)
        self.assertFalse(os.path.exists(gbk_test))

        pdf = '{}_1.pdf'.format(replicon_filename)
        pdf_test = os.path.join(test_result_dir, pdf)
        self.assertFalse(os.path.exists(pdf_test))
    def test_contig_with_empty_prot(self):
        replicon_filename = 'empty_prot_file'
        output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename)
        test_result_dir = os.path.join(self.out_dir, output_filename)
        command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=self.out_dir,
                                                                         replicon=self.find_data(
                                                                             os.path.join('Replicons',
                                                                                          replicon_filename + '.fna'
                                                                                          )
                                                                               )
                                                                         )

        with self.catch_io(out=True, err=True):
            main(command.split()[1:], loglevel='WARNING')

        output_filename = '{}.integrons'.format(replicon_filename)
        expected_result_dir = self.find_data(os.path.join('Results_Integron_Finder_{}'.format(replicon_filename)))
        expected_result_path = os.path.join(expected_result_dir, output_filename)
        test_result_path = os.path.join(test_result_dir, output_filename)
        self.assertIntegronResultEqual(expected_result_path, test_result_path)

        summary_file_name = '{}.summary'.format(replicon_filename)
        test_summary_path = os.path.join(test_result_dir, summary_file_name)
        test_summary = pd.read_csv(test_summary_path, sep='\t', comment="#")
        expected_summary_path = os.path.join(expected_result_dir, summary_file_name)
        expected_summary = pd.read_csv(expected_summary_path, sep='\t', comment="#")
        pdt.assert_frame_equal(test_summary, expected_summary)
Пример #7
0
    def test_acba_sequential_eq_isolate(self):
        """
        test if we find the same results if we run IF in sequential as isolated on each seq
        ACBA.0917.00019 contains 2 contigs 0001 and 0002.
        0002 does not contains integrons

        .integrons file should be identical
        .summary file should contains the 001 contig and 0 calin, 0 complete and 0 in0
        :return:
        """
        seq_replicon_filename = 'ACBA.0917.00019'
        seq_output_dir = 'Results_Integron_Finder_{}'.format(seq_replicon_filename)
        seq_test_result_dir = os.path.join(self.out_dir, seq_output_dir)
        seq_cmd = "integron_finder --outdir {out_dir} " \
                  "--keep-tmp {replicon}".format(out_dir=self.out_dir,
                                                 replicon=self.find_data(
                                                          os.path.join('Gembase',
                                                                       'Replicons',
                                                                       seq_replicon_filename + '.fna')
                                                          )
                                                 )
        with self.catch_io(out=True, err=True):
            main(seq_cmd.split()[1:], loglevel='WARNING')

        contig_name = 'ACBA.0917.00019.0001'
        iso_output_dir = 'Results_Integron_Finder_{}'.format(contig_name)
        iso_test_result_dir = os.path.join(self.out_dir, iso_output_dir)
        iso_cmd = "integron_finder --outdir {out_dir} " \
                  "--keep-tmp --lin {replicon}".format(out_dir=self.out_dir,
                                                 replicon=self.find_data(
                                                          os.path.join('Replicons',
                                                                        contig_name + '.fst')
                                                          )
                                                 )
        with self.catch_io(out=True, err=True):
            main(iso_cmd.split()[1:], loglevel='WARNING')

        seq_integron_result_path = os.path.join(seq_test_result_dir, seq_replicon_filename + '.integrons')
        iso_integron_result_path = os.path.join(iso_test_result_dir, contig_name + '.integrons')
        self.assertIntegronResultEqual(seq_integron_result_path, iso_integron_result_path)

        seq_summary_result_path = os.path.join(seq_test_result_dir, seq_replicon_filename + '.summary')
        iso_summary_result_path = os.path.join(iso_test_result_dir, contig_name + '.summary')
        seq_summary = pd.read_csv(seq_summary_result_path, sep="\t", comment='#')
        iso_summary = pd.read_csv(iso_summary_result_path, sep="\t", comment='#')
        summary_first_contig = seq_summary.loc[seq_summary['ID_replicon'] == 'ACBA.0917.00019.0001']
        pdt.assert_frame_equal(summary_first_contig, iso_summary)

        summary_2nd_contig = seq_summary.loc[seq_summary['ID_replicon'] == 'ACBA.0917.00019.0002']
        # the index are different 0/1 so I fix this by indexing by 'ID_replicon'
        summary_2nd_contig.set_index(['ID_replicon'], inplace=True)
        iso_summary = pd.DataFrame([['ACBA.0917.00019.0002', 0, 0, 0]],
                                   columns=['ID_replicon', 'CALIN', 'complete', 'In0'])
        iso_summary.set_index(['ID_replicon'], inplace=True)
        pdt.assert_frame_equal(summary_2nd_contig, iso_summary)
    def test_get_version(self):
        """
        test on having the version message when integron_finder is installed
        """
        real_exit = sys.exit
        sys.exit = self.fake_exit

        from numpy import __version__ as np_vers
        from pandas import __version__ as pd_vers
        from matplotlib import __version__ as mplt_vers
        from Bio import __version__ as bio_vers
        from integron_finder import __version__ as if_vers

        with self.catch_io(out=True, err=False):
            try:
                finder.main(['integron_finder', '-V'])
            except TypeError as err:
                version = sys.stdout.getvalue()
                # program exit with returncode = 0
                self.assertEqual(str(err), '0')
            finally:
                sys.exit = real_exit

        exp_version = """integron_finder version {i_f}
Using:    
 - Python {py}
 - numpy {np}
 - pandas {pd}
 - matplolib {mplt}
 - biopython {bio}

Authors:
 - Jean Cury, Bertrand Neron, Eduardo Rocha,

Citation:

 Identification and analysis of integrons and cassette arrays in bacterial genomes
 Jean Cury; Thomas Jove; Marie Touchon; Bertrand Neron; Eduardo PC Rocha
 Nucleic Acids Research 2016; doi: 10.1093/nar/gkw319
 """.format(i_f=if_vers,
            py=sys.version.replace('\n', ' '),
            np=np_vers,
            pd=pd_vers,
            mplt=mplt_vers,
            bio=bio_vers
            )
        self.assertEqual(exp_version.strip(), version.strip())
Пример #9
0
    def test_acba_simple_with_pdf(self):
        replicon_filename = 'acba.007.p01.13'
        replicon_id = 'ACBA.007.P01_13'
        output_dirname = 'Results_Integron_Finder_{}'.format(replicon_filename)
        test_result_dir = os.path.join(self.out_dir, output_dirname)
        command = "integron_finder --outdir {out_dir} --pdf " \
                  "--promoter-attI {replicon}".format(out_dir=self.out_dir,
                                                      replicon=self.find_data(
                                                          os.path.join('Replicons', '{}.fst'.format(replicon_filename))
                                                        )
                                                      )

        with self.catch_io(out=True, err=True):
            main(command.split()[1:], loglevel='WARNING')
        pdf = '{}_1.pdf'.format(replicon_id)
        pdf_test = os.path.join(test_result_dir, pdf)
        self.assertTrue(os.path.exists(pdf_test))
Пример #10
0
 def test_outdir_is_file(self):
     replicon_filename = 'acba.007.p01.13'
     bad_out_dir = os.path.join(self.out_dir, 'bad_out_dir')
     open(bad_out_dir, 'w').close()
     command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=bad_out_dir,
                                                                      replicon=self.find_data(
                                                                          os.path.join('Replicons',
                                                                                       '{}.fst'.format(replicon_filename))
                                                                      )
                                                                      )
     with self.assertRaises(IsADirectoryError) as ctx:
         with self.catch_io(out=True):
             # in case the error is not raised
             # anyway do not want to mess up the test output
             # I cannot catch log because loggers are reinitialized in main
             # I need to catch stdout as log are write on
             main(command.split()[1:])
     err_msg = "outdir '{}' already exists and is not a directory".format(bad_out_dir)
     self.assertEqual(err_msg, str(ctx.exception))
Пример #11
0
    def test_no_integron(self):
        replicon_filename = 'fake_seq'
        replicon_id = 'fake_seq'
        output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename)
        test_result_dir = os.path.join(self.out_dir, output_filename)
        command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=self.out_dir,
                                                                         replicon=self.find_data(
                                                                             os.path.join('Replicons',
                                                                                          '{}.fst'.format(replicon_filename))
                                                                         )
                                                                         )
        with self.catch_io(out=True, err=True):
            main(command.split()[1:], loglevel='WARNING')

        output_filename = 'fake_seq.integrons'
        expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_fake_seq',
                                                           output_filename))
        test_result_path = os.path.join(test_result_dir, output_filename)
        self.assertFileEqual(expected_result_path, test_result_path)
Пример #12
0
    def test_acba_local_max(self):
        replicon_filename = 'acba.007.p01.13'
        replicon_id = 'ACBA.007.P01_13'
        command = "integron_finder --outdir {out_dir} --func-annot --path-func-annot {annot_bank} --local-max --gbk " \
                  "--keep-tmp --promoter-attI {replicon}".format(
                                    out_dir=self.out_dir,
                                    annot_bank=self.resfams_dir,
                                    replicon=self.find_data(os.path.join('Replicons', '{}.fst'.format(replicon_filename)))
                                )
        with self.catch_io(out=True, err=True):
            main(command.split()[1:], loglevel='WARNING')

        result_dir = os.path.join(self.out_dir, 'Results_Integron_Finder_{}'.format(replicon_filename))

        gbk = '{}.gbk'.format(replicon_id)
        expected_gbk = self.find_data(os.path.join('Results_Integron_Finder_{}.local_max'.format(replicon_filename), gbk))
        gbk_test = os.path.join(result_dir, gbk)
        expected_gbk = SeqIO.read(expected_gbk, 'gb')
        gbk_test = SeqIO.read(gbk_test, 'gb')
        self.assertSeqRecordEqual(expected_gbk, gbk_test)

        output_filename = '{}.integrons'.format(replicon_filename)
        expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.local_max'.format(replicon_filename),
                                                           output_filename))
        test_result_path = os.path.join(result_dir, output_filename)
        self.assertIntegronResultEqual(expected_result_path, test_result_path)

        output_filename = os.path.join('tmp_{}'.format(replicon_id), '{}_Resfams_fa_table.res'.format(replicon_id))
        expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.local_max'.format(replicon_filename),
                                                           output_filename))

        test_result_path = os.path.join(result_dir, output_filename)
        self.assertHmmEqual(expected_result_path, test_result_path)

        output_filename = os.path.join('tmp_{}'.format(replicon_id), '{}_13825_1014_subseq_attc_table.res'.format(replicon_id))
        expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.local_max'.format(replicon_filename),
                                                           output_filename))
        test_result_path = os.path.join(result_dir, output_filename)
        with open(expected_result_path) as expected_result_file, open(test_result_path) as test_result_file:
            for expected_line, result_line in zip(expected_result_file, test_result_file):
                if result_line.startswith('# Program: '):
                    break
                self.assertEqual(expected_line, result_line)
Пример #13
0
    def test_acba_sequential_eq_isolate(self):
        """
        test if we find the same results if we run IF in sequential as isolated on each seq
        ACBA.0917.00019 contains 2 contigs 0001 and 0002.
        0002 does not contains integrons
        :return:
        """
        seq_replicon_filename = 'ACBA.0917.00019'
        seq_output_dir = 'Results_Integron_Finder_{}'.format(seq_replicon_filename)
        seq_test_result_dir = os.path.join(self.out_dir, seq_output_dir)
        seq_cmd = "integron_finder --outdir {out_dir} " \
                  "--keep-tmp {replicon}".format(out_dir=self.out_dir,
                                                 replicon=self.find_data(
                                                          os.path.join('Gembase',
                                                                       'Replicons',
                                                                       seq_replicon_filename + '.fna')
                                                          )
                                                 )
        with self.catch_io(out=True, err=True):
            main(seq_cmd.split()[1:], loglevel='WARNING')

        contig_name = 'ACBA.0917.00019.0001'
        iso_output_dir = 'Results_Integron_Finder_{}'.format(contig_name)
        iso_test_result_dir = os.path.join(self.out_dir, iso_output_dir)
        iso_cmd = "integron_finder --outdir {out_dir} " \
                  "--keep-tmp --lin {replicon}".format(out_dir=self.out_dir,
                                                 replicon=self.find_data(
                                                          os.path.join('Replicons',
                                                                        contig_name + '.fst')
                                                          )
                                                 )
        with self.catch_io(out=True, err=True):
            main(iso_cmd.split()[1:], loglevel='WARNING')

        seq_integron_result_path = os.path.join(seq_test_result_dir, seq_replicon_filename + '.integrons')
        iso_integron_result_path = os.path.join(iso_test_result_dir, contig_name + '.integrons')
        self.assertIntegronResultEqual(seq_integron_result_path, iso_integron_result_path)

        seq_summary_result_path = os.path.join(seq_test_result_dir, seq_replicon_filename + '.summary')
        iso_summary_result_path = os.path.join(iso_test_result_dir, contig_name + '.summary')
        seq_summary = pd.read_csv(seq_summary_result_path, sep="\t")
        iso_summary = pd.read_csv(iso_summary_result_path, sep="\t")
        pdt.assert_frame_equal(seq_summary, iso_summary)
Пример #14
0
 def test_acba_no_cmsearch(self):
     replicon_filename = 'acba.007.p01.13'
     decorator = hide_executable('cmsearch')
     finder.distutils.spawn.find_executable = decorator(finder.distutils.spawn.find_executable)
     command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=self.out_dir,
                                                                      replicon=self.find_data(
                                                                          os.path.join('Replicons',
                                                                                       '{}.fst'.format(replicon_filename))
                                                                      )
                                                                      )
     with self.assertRaises(RuntimeError) as ctx:
         with self.catch_io(out=True):
             # in case the error is not raised
             # anyway do not want to mess up the test output
             # I cannot catch log because loggers are reinitialized in main
             # I need to catch stdout as log are write on
             main(command.split()[1:])
     err_msg = "cannot find 'cmsearch' in PATH.\n" \
               "Please install infernal package or setup 'cmsearch' binary path with --cmsearch option"
     self.assertEqual(err_msg, str(ctx.exception))
Пример #15
0
    def test_no_integron(self):
        replicon_filename = 'fake_seq'
        replicon_id = 'fake_seq'
        output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename)
        test_result_dir = os.path.join(self.out_dir, output_filename)
        command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=self.out_dir,
                                                                         replicon=self.find_data(
                                                                             os.path.join('Replicons',
                                                                                          '{}.fst'.format(replicon_filename))
                                                                         )
                                                                         )
        with self.catch_io(out=True, err=True):
            main(command.split()[1:], loglevel='WARNING')

        output_filename = 'fake_seq.integrons'
        test_result_path = os.path.join(test_result_dir, output_filename)
        with open(test_result_path) as tested_file:
            test_line = next(tested_file)
            self.assertTrue(test_line.startswith('# cmd: integron_finder'))
            test_line = next(tested_file)
            self.assertEqual(test_line.strip(), '# No Integron found')
Пример #16
0
 def test_resultdir_not_writable(self):
     replicon_filename = 'acba.007.p01.13'
     args = argparse.Namespace()
     args.replicon = self.find_data(os.path.join('Replicons', '{}.fst'.format(replicon_filename)))
     args.outdir = self.out_dir
     cf = config.Config(args)
     os.mkdir(cf.result_dir, mode=0o500)
     command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=self.out_dir,
                                                                      replicon=self.find_data(
                                                                          os.path.join('Replicons',
                                                                                       '{}.fst'.format(replicon_filename))
                                                                      )
                                                                      )
     with self.assertRaises(PermissionError) as ctx:
         with self.catch_io(out=True):
             # in case the error is not raised
             # anyway do not want to mess up the test output
             # I cannot catch log because loggers are reinitialized in main
             # I need to catch stdout as log are write on
             main(command.split()[1:])
     err_msg = "result dir '{}' already exists and is not writable".format(self.out_dir)
     self.assertEqual(err_msg, str(ctx.exception))