Exemplo n.º 1
0
    def test_merge_with_extract(self):
        """
            Test streaming repack with intermediate extraction to disk
        """
        temp_dir = tempfile.gettempdir()
        out_tarball_file = os.path.join(temp_dir, "out.tar.gz")
        out_extracted_path = os.path.join(temp_dir, "extracted")

        file_utils.merge_tarballs(out_tarball_file,
                                  self.input_tgz_files,
                                  extract_to_disk_path=out_extracted_path)

        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        # inspect merged
        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir, "raw-input", self.raw_files[i])
            outf = os.path.join(temp_dir, self.raw_files[i])

            assert_equal_contents(self, inf, outf)

        # inspect extracted
        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir, "raw-input", self.raw_files[i])
            outf = os.path.join(out_extracted_path, self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 2
0
    def test_deplete_blastn(self) :
        tempDir = tempfile.mkdtemp()
        myInputDir = util.file.get_test_input_path(self)

        # Make blast databases
        makeblastdbPath = tools.blast.MakeblastdbTool().install_and_get_path()
        dbnames = ['humanChr1Subset.fa', 'humanChr9Subset.fa']
        refDbs = []
        for dbname in dbnames :
            refDb = os.path.join(tempDir, dbname)
            os.symlink(os.path.join(myInputDir, dbname), refDb)
            refDbs.append(refDb)
            subprocess.check_call([
                makeblastdbPath, '-dbtype', 'nucl', '-in', refDb])

        # Run deplete_blastn
        outFile = os.path.join(tempDir, 'out.fastq')
        args = taxon_filter.parser_deplete_blastn(argparse.ArgumentParser()).parse_args(
            [os.path.join(myInputDir, 'in.fastq'),
             outFile,
             refDbs[0],
             refDbs[1]])
        args.func_main(args)

        # Compare to expected
        assert_equal_contents(self, outFile,
                              os.path.join(myInputDir, 'expected.fastq'))
Exemplo n.º 3
0
    def test_blastn_db_build(self):
        commonInputDir = util.file.get_test_input_path()
        refFasta = os.path.join(commonInputDir, 'ebola.fasta')

        myInputDir = util.file.get_test_input_path(self)
        tempDir = tempfile.mkdtemp()

        output_prefix = self.__class__.__name__

        args = taxon_filter.parser_blastn_build_db(
            argparse.ArgumentParser()).parse_args([
                # input fasta
                refFasta,
                # output directory
                tempDir,
                "--outputFilePrefix",
                output_prefix
            ])
        args.func_main(args)

        # nhr=header. nin=index, nsq=sequence
        for ext in [".nhr", ".nsq"]:  # ".nin" can change
            assert_equal_contents(
                self, os.path.join(tempDir, output_prefix + ext),
                os.path.join(myInputDir, "expected", output_prefix + ext))
Exemplo n.º 4
0
    def test_bmtagger_db_build(self):
        commonInputDir = util.file.get_test_input_path()
        refFasta = os.path.join(commonInputDir, 'ebola.fasta')

        myInputDir = util.file.get_test_input_path(self)
        tempDir = tempfile.mkdtemp()

        output_prefix = self.__class__.__name__

        args = taxon_filter.parser_bmtagger_build_db(
            argparse.ArgumentParser()).parse_args([
                # input fasta
                refFasta,
                # output directory
                tempDir,
                "--outputFilePrefix",
                output_prefix
            ])
        args.func_main(args)

        for ext in [
                ".bitmask", ".srprism.amp", ".srprism.idx", ".srprism.imp",
                ".srprism.pmp", ".srprism.rmp", ".srprism.ss", ".srprism.ssa",
                ".srprism.ssd"
        ]:
            assert_equal_contents(
                self, os.path.join(tempDir, output_prefix + ext),
                os.path.join(myInputDir, "expected", output_prefix + ext))

        for ext in [".srprism.map"]:
            assert_md5_equal_to_line_in_file(
                self, os.path.join(tempDir, output_prefix + ext),
                os.path.join(myInputDir, "expected",
                             output_prefix + ext + ".md5"))
Exemplo n.º 5
0
    def test_merge_with_extract_repack_from_disk(self):
        """
            Test with repack from disk source after extraction
        """
        temp_dir = tempfile.gettempdir()
        out_tarball_file = os.path.join(temp_dir,"out.tar.gz")
        out_extracted_path = os.path.join(temp_dir,"extracted")

        util.file.repack_tarballs( out_tarball_file,
                                    self.input_tgz_files,
                                    extract_to_disk_path=out_extracted_path,
                                    avoid_disk_roundtrip=False
                                    )

        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        # inspect merged
        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir,"raw-input",self.raw_files[i])
            outf = os.path.join(temp_dir,self.raw_files[i])

            assert_equal_contents(self, inf, outf)

        # inspect extracted
        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir,"raw-input",self.raw_files[i])
            outf = os.path.join(out_extracted_path,self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 6
0
    def test_deplete_blastn(self):
        tempDir = tempfile.mkdtemp()
        myInputDir = util.file.get_test_input_path(self)

        # Make blast databases
        makeblastdbPath = tools.blast.MakeblastdbTool().install_and_get_path()
        dbnames = ['humanChr1Subset.fa', 'humanChr9Subset.fa']
        refDbs = []
        for dbname in dbnames:
            refDb = os.path.join(tempDir, dbname)
            os.symlink(os.path.join(myInputDir, dbname), refDb)
            refDbs.append(refDb)
            util.misc.run_and_print(
                [makeblastdbPath, '-dbtype', 'nucl', '-in', refDb], check=True)

        # Run deplete_blastn
        outFile = os.path.join(tempDir, 'out.fastq')
        args = taxon_filter.parser_deplete_blastn(
            argparse.ArgumentParser()).parse_args([
                os.path.join(myInputDir, 'in.fastq'), outFile, refDbs[0],
                refDbs[1]
            ])
        args.func_main(args)

        # Compare to expected
        assert_equal_contents(self, outFile,
                              os.path.join(myInputDir, 'expected.fastq'))
Exemplo n.º 7
0
    def test_blastn_db_build(self):
        commonInputDir = util.file.get_test_input_path()
        refFasta = os.path.join(commonInputDir, 'ebola.fasta')

        myInputDir = util.file.get_test_input_path(self)
        tempDir = tempfile.mkdtemp()

        output_prefix = self.__class__.__name__

        args = taxon_filter.parser_blastn_build_db(argparse.ArgumentParser()).parse_args(
            [
                # input fasta
                refFasta,
                # output directory
                tempDir,
                "--outputFilePrefix",
                output_prefix
            ]
        )
        args.func_main(args)

        # nhr=header. nin=index, nsq=sequence
        for ext in [".nhr", ".nsq"]: # ".nin" can change
            assert_equal_contents(
                self, os.path.join(tempDir, output_prefix + ext),
                os.path.join(myInputDir, "expected", output_prefix + ext)
            )
Exemplo n.º 8
0
    def test_merge_with_extract(self):
        """
            Test streaming repack with intermediate extraction to disk
        """
        temp_dir = tempfile.gettempdir()
        out_tarball_file = os.path.join(temp_dir,"out.tar.gz")
        out_extracted_path = os.path.join(temp_dir,"extracted")

        file_utils.merge_tarballs( out_tarball_file,
                                    self.input_tgz_files,
                                    extract_to_disk_path=out_extracted_path
                                    )

        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        # inspect merged
        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir,"raw-input",self.raw_files[i])
            outf = os.path.join(temp_dir,self.raw_files[i])

            assert_equal_contents(self, inf, outf)

        # inspect extracted
        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir,"raw-input",self.raw_files[i])
            outf = os.path.join(out_extracted_path,self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 9
0
    def test_merge_piped_in_and_out(self):
        """
            Test with streamed input and output
        """
        temp_dir = tempfile.gettempdir()
        
        out_tarball_file = os.path.join(temp_dir,"out.tar.gz")

        ps = subprocess.Popen("cat {files}".format(files=' '.join(self.input_tgz_files)).split(), stdout=subprocess.PIPE)
        with patch('sys.stdin', ps.stdout):
            with open(out_tarball_file, "wb", 0) as outf:
                # temporarily disable pytest's capture of sys.stdout
                with self.capsys.disabled():
                    with patch('sys.stdout', outf):
                        file_utils.merge_tarballs( "-",
                                                  ["-"],
                                                  pipe_hint_out="gz",
                                                  pipe_hint_in="gz" )
        ps.wait()
        
        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir,"raw-input",self.raw_files[i])
            outf = os.path.join(temp_dir,self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 10
0
    def test_bmtagger_db_build(self):
        commonInputDir = util.file.get_test_input_path()
        refFasta = os.path.join(commonInputDir, 'ebola.fasta')

        myInputDir = util.file.get_test_input_path(self)
        tempDir = tempfile.mkdtemp()

        output_prefix = self.__class__.__name__

        args = taxon_filter.parser_bmtagger_build_db(argparse.ArgumentParser()).parse_args(
            [
                # input fasta
                refFasta,
                # output directory
                tempDir,
                "--outputFilePrefix",
                output_prefix
            ]
        )
        args.func_main(args)

        for ext in [
            ".bitmask", ".srprism.amp", ".srprism.idx", ".srprism.imp", ".srprism.pmp", ".srprism.rmp",
            ".srprism.ss", ".srprism.ssa", ".srprism.ssd"
        ]:
            assert_equal_contents(
                self, os.path.join(tempDir, output_prefix + ext),
                os.path.join(myInputDir, "expected", output_prefix + ext)
            )

        for ext in [".srprism.map"]:
            assert_md5_equal_to_line_in_file(self, os.path.join(tempDir, output_prefix + ext), os.path.join(myInputDir, "expected", output_prefix + ext+".md5"))
Exemplo n.º 11
0
    def test_lastal_db_build(self):
        commonInputDir = util.file.get_test_input_path()
        refFasta = os.path.join(commonInputDir, 'ebola.fasta')

        myInputDir = util.file.get_test_input_path(self)
        tempDir = tempfile.mkdtemp()

        output_prefix = self.__class__.__name__

        args = taxon_filter.parser_lastal_build_db(argparse.ArgumentParser()).parse_args(
            [
                # input fasta
                refFasta,
                # output directory
                tempDir,
                "--outputFilePrefix",
                output_prefix
            ]
        )
        args.func_main(args)

        for ext in [".bck", ".des", ".prj", ".sds", ".ssp", ".suf", ".tis"]:
            assert_equal_contents(
                self, os.path.join(tempDir, output_prefix + ext),
                os.path.join(myInputDir, "expected", output_prefix + ext)
            )
Exemplo n.º 12
0
    def test_merge_piped_in_and_out(self):
        """
            Test with streamed input and output
        """
        temp_dir = tempfile.gettempdir()

        out_tarball_file = os.path.join(temp_dir, "out.tar.gz")

        ps = subprocess.Popen(
            "cat {files}".format(files=' '.join(self.input_tgz_files)).split(),
            stdout=subprocess.PIPE)
        with patch('sys.stdin', ps.stdout):
            with open(out_tarball_file, "wb", 0) as outf:
                # temporarily disable pytest's capture of sys.stdout
                with self.capsys.disabled():
                    with patch('sys.stdout', outf):
                        file_utils.merge_tarballs("-", ["-"],
                                                  pipe_hint_out="gz",
                                                  pipe_hint_in="gz")
        ps.wait()

        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir, "raw-input", self.raw_files[i])
            outf = os.path.join(temp_dir, self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 13
0
    def test_deplete_blastn(self) :
        tempDir = tempfile.mkdtemp()
        myInputDir = util.file.get_test_input_path(self)

        # Make blast databases
        makeblastdbPath = tools.blast.MakeblastdbTool().install_and_get_path()
        dbnames = ['humanChr1Subset.fa', 'humanChr9Subset.fa']
        refDbs = []
        for dbname in dbnames :
            refDb = os.path.join(tempDir, dbname)
            os.symlink(os.path.join(myInputDir, dbname), refDb)
            refDbs.append(refDb)
            makeblastdbCmd = '{makeblastdbPath} -dbtype nucl -in {refDb}'.\
                format(**locals())
            assert not os.system(makeblastdbCmd)

        # Run deplete_blastn
        outFile = os.path.join(tempDir, 'out.fastq')
        args = taxon_filter.parser_deplete_blastn().parse_args(
            [os.path.join(myInputDir, 'in.fastq'),
             outFile,
             refDbs[0],
             refDbs[1]])
        taxon_filter.main_deplete_blastn(args)

        # Compare to expected
        assert_equal_contents(self, outFile,
                              os.path.join(myInputDir, 'expected.fastq'))
Exemplo n.º 14
0
    def test_snpeff(self):
        temp_dir = tempfile.gettempdir()
        input_dir = util.file.get_test_input_path(self)

        ref_fasta      = os.path.join(input_dir,"ref-rabies-JQ685920.fasta")
        assembly_fasta = os.path.join(input_dir,"RBV16.fasta")
        isnv_calls     = os.path.join(input_dir,"vphaser2.RBV16.mapped.txt.gz")

        # align sample to reference to create MSA
        msa_fasta = util.file.mkstempfname('.fasta')
        expected_msa_fasta = os.path.join(input_dir,"msa.fasta")
        args = [ref_fasta, assembly_fasta, msa_fasta, "--localpair", "--preservecase"]
        args = interhost.parser_align_mafft(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        test.assert_equal_contents(self, msa_fasta, expected_msa_fasta)

        # merge (one) VCF to merged vcf
        merged_vcf = os.path.join(temp_dir,"merged.vcf.gz")
        expected_merged_vcf = os.path.join(input_dir,"merged.vcf.gz")
        args = [ref_fasta, merged_vcf, "--isnvs", isnv_calls, "--alignments", msa_fasta, "--strip_chr_version", "--parse_accession"]
        args = intrahost.parser_merge_to_vcf(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        vcf = util.vcf.VcfReader(merged_vcf)
        expected_vcf = util.vcf.VcfReader(expected_merged_vcf)
        rows = list(vcf.get())
        expected_rows = list(expected_vcf.get())
        #self.assertEqual(rows, expected_rows)

        # run snpEff against merged VCF to predict SNP effects
        eff_vcf = os.path.join(temp_dir,"ann_eff.vcf.gz")
        expected_eff_vcf = os.path.join(input_dir,"ann_eff.vcf.gz")
        args = [merged_vcf, "JQ685920", eff_vcf, "[email protected]"]
        with self.capsys.disabled():
            args = interhost.parser_snpEff(argparse.ArgumentParser()).parse_args(args)
            args.func_main(args)
            vcf = util.vcf.VcfReader(eff_vcf)
            expected_vcf = util.vcf.VcfReader(expected_eff_vcf)
            rows = list(vcf.get())
            expected_rows = list(expected_vcf.get())
        #self.assertEqual(rows, expected_rows)

        # create tabular iSNV output
        eff_txt = os.path.join(temp_dir,"ann_eff.txt.gz")
        expected_eff_txt = os.path.join(input_dir,"ann_eff.txt.gz")
        args = [eff_vcf, eff_txt]
        args = intrahost.parser_iSNV_table(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        for outrow, expectedrow in zip(util.file.read_tabfile(eff_txt),util.file.read_tabfile(expected_eff_txt)):
            for colout, colexpected in zip(outrow, expectedrow):
                # if it casts to float, perform approx comparison
                try:
                    f1=float(colout)
                    f2=float(colexpected)
                    self.assertAlmostEqual(f1, f1)
                except ValueError:
                    self.assertEqual(sorted(sorted(colout.split(","))), sorted(sorted(colexpected.split(","))))
Exemplo n.º 15
0
    def test_snpeff(self):
        temp_dir = tempfile.gettempdir()
        input_dir = util.file.get_test_input_path(self)

        ref_fasta      = os.path.join(input_dir,"ref-rabies-JQ685920.fasta")
        assembly_fasta = os.path.join(input_dir,"RBV16.fasta")
        isnv_calls     = os.path.join(input_dir,"vphaser2.RBV16.mapped.txt.gz")

        # align sample to reference to create MSA
        msa_fasta = util.file.mkstempfname('.fasta')
        expected_msa_fasta = os.path.join(input_dir,"msa.fasta")
        args = [ref_fasta, assembly_fasta, msa_fasta, "--localpair", "--preservecase"]
        args = interhost.parser_align_mafft(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        test.assert_equal_contents(self, msa_fasta, expected_msa_fasta)

        # merge (one) VCF to merged vcf
        merged_vcf = os.path.join(temp_dir,"merged.vcf.gz")
        expected_merged_vcf = os.path.join(input_dir,"merged.vcf.gz")
        args = [ref_fasta, merged_vcf, "--isnvs", isnv_calls, "--alignments", msa_fasta, "--strip_chr_version", "--parse_accession"]
        args = intrahost.parser_merge_to_vcf(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        vcf = util.vcf.VcfReader(merged_vcf)
        expected_vcf = util.vcf.VcfReader(expected_merged_vcf)
        rows = list(vcf.get())
        expected_rows = list(expected_vcf.get())
        #self.assertEqual(rows, expected_rows)

        # run snpEff against merged VCF to predict SNP effects
        eff_vcf = os.path.join(temp_dir,"ann_eff.vcf.gz")
        expected_eff_vcf = os.path.join(input_dir,"ann_eff.vcf.gz")
        args = [merged_vcf, "JQ685920", eff_vcf, "[email protected]"]
        args = interhost.parser_snpEff(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        vcf = util.vcf.VcfReader(eff_vcf)
        expected_vcf = util.vcf.VcfReader(expected_eff_vcf)
        rows = list(vcf.get())
        expected_rows = list(expected_vcf.get())
        #self.assertEqual(rows, expected_rows)

        # create tabular iSNV output
        eff_txt = os.path.join(temp_dir,"ann_eff.txt.gz")
        expected_eff_txt = os.path.join(input_dir,"ann_eff.txt.gz")
        args = [eff_vcf, eff_txt]
        args = intrahost.parser_iSNV_table(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        for outrow, expectedrow in zip(util.file.read_tabfile(eff_txt),util.file.read_tabfile(expected_eff_txt)):
            for colout, colexpected in zip(outrow, expectedrow):
                # if it casts to float, perform approx comparison
                try:
                    f1=float(colout)
                    f2=float(colexpected)
                    self.assertAlmostEqual(f1, f1)
                except ValueError:
                    self.assertEqual(sorted(sorted(colout.split(","))), sorted(sorted(colexpected.split(","))))
Exemplo n.º 16
0
    def test_deplete_bmtagger(self):
        myInputDir = util.file.get_test_input_path(self)
        args = taxon_filter.parser_partition_bmtagger(argparse.ArgumentParser()).parse_args(
            [os.path.join(myInputDir, 'in1.fastq'), os.path.join(myInputDir, 'in2.fastq'), os.path.join(
                self.tempDir, 'humanChr1Subset'), os.path.join(self.tempDir, 'humanChr9Subset'), '--outNoMatch',
             os.path.join(self.tempDir, 'deplete.1.fastq'), os.path.join(self.tempDir, 'deplete.2.fastq')])
        args.func_main(args)

        # Compare to expected
        for case in ['1', '2']:
            assert_equal_contents(self, os.path.join(self.tempDir, 'deplete.' + case + '.fastq'),
                                  os.path.join(myInputDir, 'expected.NoMatch.' + case + '.fastq'))
Exemplo n.º 17
0
    def test_lasv_oob_clip(self):
        input_dir = os.path.join(self.input_dir, "lasv", "input")
        expected_dir = os.path.join(self.input_dir, "lasv", "expected")
        temp_dir = tempfile.gettempdir()

        infastas = [
            os.path.join(input_dir, f) for f in [
                "align_mafft-ref-lasv-ISTH2376_1.fasta",
                "align_mafft-ref-lasv-ISTH2376_2.fasta"
            ]
        ]

        intables = [
            os.path.join(input_dir, f)
            for f in ["KM821997.1.tbl", "KM821998.1.tbl"]
        ]

        out_table_names = [
            "LASV_NGA_2018_0026-1.tbl", "LASV_NGA_2018_0026-2.tbl",
            "LASV_NGA_2018_0097-1.tbl", "LASV_NGA_2018_0097-2.tbl",
            "LASV_NGA_2018_0541-1.tbl", "LASV_NGA_2018_0541-2.tbl",
            "LASV_NGA_2018_0611-1.tbl", "LASV_NGA_2018_0611-2.tbl",
            "LASV_NGA_2018_0664-1.tbl", "LASV_NGA_2018_0664-2.tbl",
            "LASV_NGA_2018_0959-1.tbl", "LASV_NGA_2018_0959-2.tbl",
            "LASV_NGA_2018_0998-1.tbl", "LASV_NGA_2018_0998-2.tbl",
            "LASV_NGA_2018_1024-1.tbl", "LASV_NGA_2018_1024-2.tbl",
            "LASV_NGA_2018_1079-1.tbl", "LASV_NGA_2018_1079-2.tbl",
            "LASV_NGA_2018_1177-1.tbl", "LASV_NGA_2018_1177-2.tbl",
            "LASV_NGA_2018_1375-1.tbl", "LASV_NGA_2018_1375-2.tbl",
            "LASV_NGA_2018_1381-1.tbl", "LASV_NGA_2018_1381-2.tbl",
            "LASV_NGA_2018_1392-1.tbl", "LASV_NGA_2018_1392-2.tbl",
            "LASV_NGA_2018_1643-1.tbl", "LASV_NGA_2018_1643-2.tbl"
        ]
        out_tbls = [os.path.join(temp_dir, f) for f in out_table_names]
        expected_tbls = [
            os.path.join(expected_dir, f) for f in out_table_names
        ]

        for i in range(0, len(infastas)):
            ncbi.tbl_transfer_prealigned(infastas[i],
                                         os.path.join(
                                             input_dir,
                                             "ref-lasv-ISTH2376.fasta"),
                                         intables,
                                         temp_dir,
                                         oob_clip=True)

        for i in range(0, len(out_table_names)):
            out_tbl = out_tbls[i]
            expected_tbl = expected_tbls[i]
            assert_equal_contents(self, out_tbl, expected_tbl)
Exemplo n.º 18
0
    def test_partition_bmtagger(self):
        outMatch = [os.path.join(self.tempDir, 'outMatch.{}.fastq'.format(n)) for n in '12']
        outNoMatch = [os.path.join(self.tempDir, 'outNoMatch.{}.fastq'.format(n)) for n in '12']
        myInputDir = util.file.get_test_input_path(self)
        args = taxon_filter.parser_partition_bmtagger(argparse.ArgumentParser()).parse_args(
            [os.path.join(myInputDir, 'in1.fastq'), os.path.join(myInputDir, 'in2.fastq'), os.path.join(
                self.tempDir, 'humanChr1Subset'), os.path.join(self.tempDir, 'humanChr9Subset'), '--outMatch',
             outMatch[0], outMatch[1], '--outNoMatch', outNoMatch[0], outNoMatch[1]])
        args.func_main(args)

        # Compare to expected
        for case in ['Match.1', 'Match.2', 'NoMatch.1', 'NoMatch.2']:
            assert_equal_contents(self, os.path.join(self.tempDir, 'out' + case + '.fastq'),
                                  os.path.join(myInputDir, 'expected.' + case + '.fastq'))
Exemplo n.º 19
0
    def test_synthetic_feature_table(self):
        input_dir = os.path.join(self.input_dir, "synthetic", "input")
        expected_dir = os.path.join(self.input_dir, "synthetic", "expected")
        temp_dir = tempfile.gettempdir()

        in_tbl = os.path.join(input_dir, "ref.tbl")
        out_tbl = os.path.join(temp_dir, "sample.tbl")
        expected = os.path.join(expected_dir, "mapped.tbl")

        ncbi.tbl_transfer_prealigned(
            os.path.join(input_dir, "aligned_1.fasta"),
            os.path.join(input_dir, "ref.fasta"), [in_tbl], temp_dir)

        assert_equal_contents(self, out_tbl, expected)
    def test_trimmomatic_paired_maxinfo(self):
        myInputDir = util.file.get_test_input_path(self)
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        clipFasta = os.path.join(myInputDir, 'clip.fasta')
        with util.file.tempfnames(('.out1.fastq', '.out2.fastq')) as (pairedOutFastq1, pairedOutFastq2):
            tools.trimmomatic.TrimmomaticTool().execute(inFastq1, inFastq2, pairedOutFastq1, pairedOutFastq2, clipFasta,
                                                        maxinfo_target_length=30, maxinfo_strictness=.3)

            # Check that results match expected
            expected1Fastq = os.path.join(myInputDir, 'expected1.maxinfo.fastq')
            expected2Fastq = os.path.join(myInputDir, 'expected2.maxinfo.fastq')
            assert_equal_contents(self, pairedOutFastq1, expected1Fastq)
            assert_equal_contents(self, pairedOutFastq2, expected2Fastq)
    def test_trimmomatic_paired(self):
        myInputDir = util.file.get_test_input_path(self)
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        pairedOutFastq1 = util.file.mkstempfname('.out1.fastq')
        pairedOutFastq2 = util.file.mkstempfname('.out2.fastq')
        clipFasta = os.path.join(myInputDir, 'clip.fasta')
        tools.trimmomatic.TrimmomaticTool().execute(inFastq1, inFastq2, pairedOutFastq1, pairedOutFastq2, clipFasta)

        # Check that results match expected
        expected1Fastq = os.path.join(myInputDir, 'expected1.fastq')
        expected2Fastq = os.path.join(myInputDir, 'expected2.fastq')
        assert_equal_contents(self, pairedOutFastq1, expected1Fastq)
        assert_equal_contents(self, pairedOutFastq2, expected2Fastq)
Exemplo n.º 22
0
    def test_trimmomatic_paired(self):
        myInputDir = util.file.get_test_input_path(self)
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        pairedOutFastq1 = util.file.mkstempfname('.out1.fastq')
        pairedOutFastq2 = util.file.mkstempfname('.out2.fastq')
        clipFasta = os.path.join(myInputDir, 'clip.fasta')
        tools.trimmomatic.TrimmomaticTool().execute(inFastq1, inFastq2, pairedOutFastq1, pairedOutFastq2, clipFasta)

        # Check that results match expected
        expected1Fastq = os.path.join(myInputDir, 'expected1.fastq')
        expected2Fastq = os.path.join(myInputDir, 'expected2.fastq')
        assert_equal_contents(self, pairedOutFastq1, expected1Fastq)
        assert_equal_contents(self, pairedOutFastq2, expected2Fastq)
Exemplo n.º 23
0
    def test_synthetic_feature_table(self):
        input_dir = os.path.join(self.input_dir, "synthetic", "input")
        expected_dir = os.path.join(self.input_dir, "synthetic", "expected")
        temp_dir = tempfile.gettempdir()

        in_tbl = os.path.join(input_dir,"ref.tbl")
        out_tbl = os.path.join(temp_dir,"sample.tbl")
        expected = os.path.join(expected_dir, "mapped.tbl")

        ncbi.tbl_transfer_prealigned( 
                                    os.path.join(input_dir,"aligned_1.fasta"),
                                    os.path.join(input_dir,"ref.fasta"), 
                                    [in_tbl], 
                                    temp_dir)

        assert_equal_contents(self, out_tbl, expected)
Exemplo n.º 24
0
    def test_trimmomatic(self):
        myInputDir = util.file.get_test_input_path(self)
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        pairedOutFastq1 = util.file.mkstempfname()
        pairedOutFastq2 = util.file.mkstempfname()
        clipFasta = os.path.join(myInputDir, 'clip.fasta')
        parser = taxon_filter.parser_trim_trimmomatic(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1, inFastq2, pairedOutFastq1, pairedOutFastq2, clipFasta])
        args.func_main(args)

        # Check that results match expected
        expected1Fastq = os.path.join(myInputDir, 'expected1.fastq')
        expected2Fastq = os.path.join(myInputDir, 'expected2.fastq')
        assert_equal_contents(self, pairedOutFastq1, expected1Fastq)
        assert_equal_contents(self, pairedOutFastq2, expected2Fastq)
Exemplo n.º 25
0
    def test_trimmomatic(self):
        myInputDir = util.file.get_test_input_path(self)
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        pairedOutFastq1 = util.file.mkstempfname()
        pairedOutFastq2 = util.file.mkstempfname()
        clipFasta = os.path.join(myInputDir, 'clip.fasta')
        parser = taxon_filter.parser_trim_trimmomatic(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1, inFastq2, pairedOutFastq1, pairedOutFastq2, clipFasta])
        args.func_main(args)

        # Check that results match expected
        expected1Fastq = os.path.join(myInputDir, 'expected1.fastq')
        expected2Fastq = os.path.join(myInputDir, 'expected2.fastq')
        assert_equal_contents(self, pairedOutFastq1, expected1Fastq)
        assert_equal_contents(self, pairedOutFastq2, expected2Fastq)
Exemplo n.º 26
0
    def test_deplete_blastn_bam(self) :
        tempDir = tempfile.mkdtemp()
        myInputDir = util.file.get_test_input_path(self)

        # Make blast databases
        makeblastdbPath = tools.blast.MakeblastdbTool().install_and_get_path()
        dbnames = ['humanChr1Subset.fa', 'humanChr9Subset.fa']
        refDbs = []
        for dbname in dbnames :
            refDb = os.path.join(tempDir, dbname)
            os.symlink(os.path.join(myInputDir, dbname), refDb)
            refDbs.append(refDb)
            subprocess.check_call([
                makeblastdbPath, '-dbtype', 'nucl', '-in', refDb])

        # convert the input fastq's to a bam
        inFastq1 = os.path.join(myInputDir, "in1.fastq")
        inFastq2 = os.path.join(myInputDir, "in2.fastq")
        inBam = os.path.join(tempDir, 'in.bam')
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1, inFastq2, inBam,
            '--sampleName', 'FreeSample',
            '--JVMmemory', '1g',
            '--picardOptions',
            'LIBRARY_NAME=Alexandria',
            'PLATFORM=9.75',
            'SEQUENCING_CENTER=KareemAbdul-Jabbar',
            ])
        args.func_main(args)

        # Run deplete_blastn_bam
        outBam = os.path.join(tempDir, 'out.bam')
        args = taxon_filter.parser_deplete_blastn_bam(argparse.ArgumentParser()).parse_args(
            [inBam,
             refDbs[0],
             refDbs[1],
             outBam,
             "--chunkSize", "1"])
        args.func_main(args)

        # samtools view for out.sam and compare to expected
        outSam = os.path.join(tempDir, 'out.sam')
        samtools = tools.samtools.SamtoolsTool()
        samtools.view(['-h'], outBam, outSam)
        assert_equal_contents(self, outSam,
                              os.path.join(myInputDir, 'expected.sam'))
Exemplo n.º 27
0
    def test_simple_merge(self):
        """
            Simple repack test
        """
        temp_dir = tempfile.gettempdir()
        out_tarball_file = os.path.join(temp_dir, "out.tar.gz")

        file_utils.merge_tarballs(out_tarball_file, self.input_tgz_files)

        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir, "raw-input", self.raw_files[i])
            outf = os.path.join(temp_dir, self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 28
0
    def test_filter_lastal(self):
        # Create refDbs
        commonInputDir = util.file.get_test_input_path()
        myInputDir = util.file.get_test_input_path(self)
        refFasta = os.path.join(commonInputDir, 'ebola.fasta')
        dbsDir = tempfile.mkdtemp()
        refDbs = os.path.join(dbsDir, 'ebola')
        lastdbPath = tools.last.Lastdb().install_and_get_path()
        subprocess.check_call([lastdbPath, refDbs, refFasta])

        # Call main_filter_lastal
        inFastq = os.path.join(myInputDir, 'in.fastq')
        outFastq = util.file.mkstempfname('.fastq')
        args = taxon_filter.parser_filter_lastal(argparse.ArgumentParser()).parse_args([inFastq, refDbs, outFastq])
        args.func_main(args)

        # Check that results match expected
        expectedFastq = os.path.join(myInputDir, 'expected.fastq')
        assert_equal_contents(self, outFastq, expectedFastq)
Exemplo n.º 29
0
    def test_filter_lastal(self):
        # Create refDbs
        commonInputDir = util.file.get_test_input_path()
        myInputDir = util.file.get_test_input_path(self)
        refFasta = os.path.join(commonInputDir, 'ebola.fasta')
        dbsDir = tempfile.mkdtemp()
        refDbs = os.path.join(dbsDir, 'ebola')
        lastdbPath = tools.last.Lastdb().install_and_get_path()
        subprocess.check_call([lastdbPath, refDbs, refFasta])

        # Call main_filter_lastal
        inFastq = os.path.join(myInputDir, 'in.fastq')
        outFastq = util.file.mkstempfname('.fastq')
        args = taxon_filter.parser_filter_lastal(argparse.ArgumentParser()).parse_args([inFastq, refDbs, outFastq])
        args.func_main(args)

        # Check that results match expected
        expectedFastq = os.path.join(myInputDir, 'expected.fastq')
        assert_equal_contents(self, outFastq, expectedFastq)
Exemplo n.º 30
0
    def test_simple_merge(self):
        """
            Simple repack test
        """
        temp_dir = tempfile.gettempdir()
        out_tarball_file = os.path.join(temp_dir,"out.tar.gz")

        file_utils.merge_tarballs( out_tarball_file,
                                    self.input_tgz_files
                                    )

        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir,"raw-input",self.raw_files[i])
            outf = os.path.join(temp_dir,self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 31
0
    def test_filter_lastal(self) :
        # Create refDbs
        commonInputDir = util.file.get_test_input_path()
        myInputDir = util.file.get_test_input_path(self)
        refFasta = os.path.join(commonInputDir, 'ebola.fasta')
        dbsDir = tempfile.mkdtemp()
        refDbs = os.path.join(dbsDir, 'ebola')
        lastdbPath = tools.last.Lastdb().install_and_get_path()
        assert not os.system(
            '{lastdbPath} {refDbs} {refFasta}'.format(**locals()))

        # Call main_filter_lastal
        inFastq = os.path.join( myInputDir, 'in.fastq')
        outFastq = util.file.mkstempfname('.fastq')
        args = taxon_filter.parser_filter_lastal().parse_args([inFastq, refDbs,
            outFastq])
        taxon_filter.main_filter_lastal(args)

        # Check that results match expected
        expectedFastq = os.path.join(myInputDir, 'expected.fastq')
        assert_equal_contents(self, outFastq, expectedFastq)
Exemplo n.º 32
0
    def test_piped_in_merge(self):
        """
            Test with streamed input
        """
        temp_dir = tempfile.gettempdir()
        out_tarball_file = os.path.join(temp_dir,"out.tar.gz")

        ps = subprocess.Popen("cat {files}".format(files=' '.join(self.input_tgz_files)).split(), stdout=subprocess.PIPE)
        with patch('sys.stdin', ps.stdout):
            file_utils.merge_tarballs( out_tarball_file,
                                        ["-"],
                                        pipe_hint_in="gz" )
        ps.wait()

        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir,"raw-input",self.raw_files[i])
            outf = os.path.join(temp_dir,self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 33
0
    def test_piped_out_merge(self):
        """
            Test with streamed output
        """
        temp_dir = tempfile.gettempdir()
        out_tarball_file = os.path.join(temp_dir,"out.tar.gz")

        with open(out_tarball_file, "wb", 0) as outf:
            # temporarily disable pytest's capture of sys.stdout
            with self.capsys.disabled():
                with patch('sys.stdout', outf):
                    file_utils.merge_tarballs( "-",
                                                self.input_tgz_files,
                                                pipe_hint_out="gz" )
        
        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir,"raw-input",self.raw_files[i])
            outf = os.path.join(temp_dir,self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 34
0
    def test_piped_out_merge(self):
        """
            Test with streamed output
        """
        temp_dir = tempfile.gettempdir()
        out_tarball_file = os.path.join(temp_dir, "out.tar.gz")

        with open(out_tarball_file, "wb", 0) as outf:
            # temporarily disable pytest's capture of sys.stdout
            with self.capsys.disabled():
                with patch('sys.stdout', outf):
                    file_utils.merge_tarballs("-",
                                              self.input_tgz_files,
                                              pipe_hint_out="gz")

        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir, "raw-input", self.raw_files[i])
            outf = os.path.join(temp_dir, self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 35
0
    def test_piped_in_merge(self):
        """
            Test with streamed input
        """
        temp_dir = tempfile.gettempdir()
        out_tarball_file = os.path.join(temp_dir, "out.tar.gz")

        ps = subprocess.Popen(
            "cat {files}".format(files=' '.join(self.input_tgz_files)).split(),
            stdout=subprocess.PIPE)
        with patch('sys.stdin', ps.stdout):
            file_utils.merge_tarballs(out_tarball_file, ["-"],
                                      pipe_hint_in="gz")
        ps.wait()

        tb = tarfile.open(out_tarball_file)
        tb.extractall(path=temp_dir)

        for i in range(len(self.raw_files)):
            inf = os.path.join(self.input_dir, "raw-input", self.raw_files[i])
            outf = os.path.join(temp_dir, self.raw_files[i])

            assert_equal_contents(self, inf, outf)
Exemplo n.º 36
0
    def test_trimmomatic_paired_maxinfo(self):
        myInputDir = util.file.get_test_input_path(self)
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        clipFasta = os.path.join(myInputDir, 'clip.fasta')
        with util.file.tempfnames(
            ('.out1.fastq', '.out2.fastq')) as (pairedOutFastq1,
                                                pairedOutFastq2):
            tools.trimmomatic.TrimmomaticTool().execute(
                inFastq1,
                inFastq2,
                pairedOutFastq1,
                pairedOutFastq2,
                clipFasta,
                maxinfo_target_length=30,
                maxinfo_strictness=.3)

            # Check that results match expected
            expected1Fastq = os.path.join(myInputDir,
                                          'expected1.maxinfo.fastq')
            expected2Fastq = os.path.join(myInputDir,
                                          'expected2.maxinfo.fastq')
            assert_equal_contents(self, pairedOutFastq1, expected1Fastq)
            assert_equal_contents(self, pairedOutFastq2, expected2Fastq)
Exemplo n.º 37
0
    def test_lastal_db_build(self):
        commonInputDir = util.file.get_test_input_path()
        refFasta = os.path.join(commonInputDir, 'ebola.fasta')

        myInputDir = util.file.get_test_input_path(self)
        tempDir = tempfile.mkdtemp()

        output_prefix = self.__class__.__name__

        args = taxon_filter.parser_lastal_build_db(
            argparse.ArgumentParser()).parse_args([
                # input fasta
                refFasta,
                # output directory
                tempDir,
                "--outputFilePrefix",
                output_prefix
            ])
        args.func_main(args)

        for ext in [".bck", ".des", ".prj", ".sds", ".ssp", ".suf", ".tis"]:
            assert_equal_contents(
                self, os.path.join(tempDir, output_prefix + ext),
                os.path.join(myInputDir, "expected", output_prefix + ext))
    def test_trimmomatic_single(self):
        myInputDir = util.file.get_test_input_path(self)
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        pairedOutFastq1 = util.file.mkstempfname('.out1.fastq')
        pairedOutFastq2 = util.file.mkstempfname('.out2.fastq')
        unpairedOutFastq1 = util.file.mkstempfname('.out3.fastq')
        unpairedOutFastq2 = util.file.mkstempfname('.out4.fastq')
        clipFasta = os.path.join(myInputDir, 'clip.fasta')
        tools.trimmomatic.TrimmomaticTool().execute(inFastq1, None, pairedOutFastq1, pairedOutFastq2, clipFasta,
            unpairedOutFastq1=unpairedOutFastq1, unpairedOutFastq2=unpairedOutFastq2)

        # Check that results match expected
        emptyFastq = os.path.join(myInputDir, 'empty.fastq')
        expectedFastq = os.path.join(myInputDir, 'expected1.fastq')
        assert_equal_contents(self, pairedOutFastq1, emptyFastq)
        assert_equal_contents(self, pairedOutFastq2, emptyFastq)
        assert_equal_contents(self, unpairedOutFastq1, expectedFastq)
Exemplo n.º 39
0
    def test_trimmomatic_single(self):
        myInputDir = util.file.get_test_input_path(self)
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        pairedOutFastq1 = util.file.mkstempfname('.out1.fastq')
        pairedOutFastq2 = util.file.mkstempfname('.out2.fastq')
        unpairedOutFastq1 = util.file.mkstempfname('.out3.fastq')
        unpairedOutFastq2 = util.file.mkstempfname('.out4.fastq')
        clipFasta = os.path.join(myInputDir, 'clip.fasta')
        tools.trimmomatic.TrimmomaticTool().execute(inFastq1, None, pairedOutFastq1, pairedOutFastq2, clipFasta,
            unpairedOutFastq1=unpairedOutFastq1, unpairedOutFastq2=unpairedOutFastq2)

        # Check that results match expected
        emptyFastq = os.path.join(myInputDir, 'empty.fastq')
        expectedFastq = os.path.join(myInputDir, 'expected1.fastq')
        assert_equal_contents(self, pairedOutFastq1, emptyFastq)
        assert_equal_contents(self, pairedOutFastq2, emptyFastq)
        assert_equal_contents(self, unpairedOutFastq1, expectedFastq)
Exemplo n.º 40
0
    def test_lasv_oob_clip(self):
        input_dir    = os.path.join(self.input_dir, "lasv", "input")
        expected_dir = os.path.join(self.input_dir, "lasv", "expected")
        temp_dir     = tempfile.gettempdir()

        infastas = [os.path.join(input_dir, f) for f in [
                "align_mafft-ref-lasv-ISTH2376_1.fasta",
                "align_mafft-ref-lasv-ISTH2376_2.fasta"
            ]
        ]

        intables =[os.path.join(input_dir, f) for f in [
                "KM821997.1.tbl",
                "KM821998.1.tbl"
            ]
        ]

        out_table_names = [
            "LASV_NGA_2018_0026-1.tbl",
            "LASV_NGA_2018_0026-2.tbl",
            "LASV_NGA_2018_0097-1.tbl",
            "LASV_NGA_2018_0097-2.tbl",
            "LASV_NGA_2018_0541-1.tbl",
            "LASV_NGA_2018_0541-2.tbl",
            "LASV_NGA_2018_0611-1.tbl",
            "LASV_NGA_2018_0611-2.tbl",
            "LASV_NGA_2018_0664-1.tbl",
            "LASV_NGA_2018_0664-2.tbl",
            "LASV_NGA_2018_0959-1.tbl",
            "LASV_NGA_2018_0959-2.tbl",
            "LASV_NGA_2018_0998-1.tbl",
            "LASV_NGA_2018_0998-2.tbl",
            "LASV_NGA_2018_1024-1.tbl",
            "LASV_NGA_2018_1024-2.tbl",
            "LASV_NGA_2018_1079-1.tbl",
            "LASV_NGA_2018_1079-2.tbl",
            "LASV_NGA_2018_1177-1.tbl",
            "LASV_NGA_2018_1177-2.tbl",
            "LASV_NGA_2018_1375-1.tbl",
            "LASV_NGA_2018_1375-2.tbl",
            "LASV_NGA_2018_1381-1.tbl",
            "LASV_NGA_2018_1381-2.tbl",
            "LASV_NGA_2018_1392-1.tbl",
            "LASV_NGA_2018_1392-2.tbl",
            "LASV_NGA_2018_1643-1.tbl",
            "LASV_NGA_2018_1643-2.tbl"
        ]
        out_tbls =[os.path.join(temp_dir, f) for f in out_table_names]
        expected_tbls = [os.path.join(expected_dir, f) for f in out_table_names]


        for i in range(0, len(infastas)):
            ncbi.tbl_transfer_prealigned( 
                                        infastas[i],
                                        os.path.join(input_dir,"ref-lasv-ISTH2376.fasta"), 
                                        intables, 
                                        temp_dir,
                                        oob_clip=True)

        for i in range(0,len(out_table_names)):
            out_tbl = out_tbls[i]
            expected_tbl = expected_tbls[i]
            assert_equal_contents(self, out_tbl, expected_tbl)