def test_translated_search_unaligned_reads_annotations_bug(self):
        """
        Test the unaligned reads and the store alignments
        Test with a rapsearch2 output file
        Test the different annotation formats are recognized for bug
        Test without the coverage filter
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # set the coverage threshold to zero so as to not test with filter on
        current_coverage_threshold = config.translated_subject_coverage_threshold
        config.translated_subject_coverage_threshold = 0

        # load the rapsearch2 output with the unaligned reads function
        unaligned_file_fasta = translated.unaligned_reads(
            unaligned_reads_store, cfg.rapsearch_file_annotations, alignments)

        # remove temp file
        utils.remove_temp_file(unaligned_file_fasta)

        # reset the coverage threshold
        config.translated_subject_coverage_threshold = current_coverage_threshold

        # there should be one bug name and the other should be unclassified
        self.assertEqual(
            sorted(alignments.bug_list()),
            sorted(["s__Bacteroides_xylanisolvens", "unclassified"]))
Пример #2
0
    def test_determine_file_format_fastq_gzip(self):
        """
        Test the determine_file_format function with a fastq file that is gzipped
        """

        # create a small gzipped fastq file
        # read in the small fastq file
        file_handle = open(cfg.small_fastq_file, "rt")

        # create a temp file
        file_out, gzip_fastq_file = tempfile.mkstemp(suffix=".gz")
        os.close(file_out)

        # write the gzipped file
        file_handle_gzip = gzip.open(gzip_fastq_file, "wt")
        shutil.copyfileobj(file_handle, file_handle_gzip)
        file_handle.close()
        file_handle_gzip.close()

        format = utilities.determine_file_format(gzip_fastq_file)

        # remove the temp gzipped file
        utils.remove_temp_file(gzip_fastq_file)

        self.assertEqual(format, "fastq.gz")
 def test_nucleotide_search_unaligned_reads_read_count_unaligned_minimize_memory_use(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test for unaligned read counts
     Test with minimize memory use
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads(minimize_memory_use=True)
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # check the unaligned reads count
     self.assertEqual(unaligned_reads_store.count_reads(),cfg.sam_file_unaligned_reads_total_unaligned)
Пример #4
0
    def test_PathwaysDatabase_print_flat_file_reactions_list(self):
        """
        Pathways database class: Test the printing of a flat file from a recursive file
        Test the reactions list
        """
 
        pathways_database_store=store.PathwaysDatabase(cfg.pathways_file, True)
        pathways_database_flat_store=store.PathwaysDatabase(cfg.pathways_flat_file, True)       
        
        # write the flat file created from a recursive file to a temp file
        file_out, new_file=tempfile.mkstemp()
        os.write(file_out, pathways_database_store.get_database())
        os.close(file_out)
        
        # load in the flat file and compare with the correct flat file
        pathways_database_flat_store_write=store.PathwaysDatabase(new_file, True)
        
        # remove the temp file
        utils.remove_temp_file(new_file)
        
        # check for the same number of pathways
        pathway_list=pathways_database_flat_store_write.pathway_list()
        pathway_list_flat=pathways_database_flat_store.pathway_list()
        
        # check that the reactions list for each pathway is identical
        for pathway in pathway_list:
            self.assertEqual(sorted(pathways_database_flat_store_write.find_reactions(pathway)),
                sorted(pathways_database_flat_store.find_reactions(pathway)))
Пример #5
0
    def test_PathwaysDatabase_print_flat_file_pathways_count(self):
        """
        Pathways database class: Test the printing of a flat file from a recursive file
        Test for the total number of pathways
        """
 
        pathways_database_store=store.PathwaysDatabase(cfg.pathways_file, True)
        pathways_database_flat_store=store.PathwaysDatabase(cfg.pathways_flat_file, True)       
        
        # write the flat file created from a recursive file to a temp file
        file_out, new_file=tempfile.mkstemp()
        os.write(file_out, pathways_database_store.get_database())
        os.close(file_out)
        
        # load in the flat file and compare with the correct flat file
        pathways_database_flat_store_write=store.PathwaysDatabase(new_file, True)
        
        # remove the temp file
        utils.remove_temp_file(new_file)
        
        # check for the same number of pathways
        pathway_list=pathways_database_flat_store_write.pathway_list()
        pathway_list_flat=pathways_database_flat_store.pathway_list()
        
        self.assertEqual(len(pathway_list),len(pathway_list_flat))
Пример #6
0
    def test_translated_search_unaligned_reads_annotations_gene_length(self):
        """
        Test the unaligned reads and the store alignments
        Test with a rapsearch2 output file
        Test the different annotation formats are recognized for gene length
        """
 
         # create a set of alignments
        alignments=store.Alignments()
        unaligned_reads_store=store.Reads()
        
        # load the rapsearch2 output with the unaligned reads function
        unaligned_file_fasta=translated_search.unaligned_reads(unaligned_reads_store, 
            cfg.rapsearch_file_annotations, alignments)
        
        # remove temp file
        utils.remove_temp_file(unaligned_file_fasta)       

        # there should be 4 hits identified
        all_hits=alignments.get_hit_list()
        self.assertEqual(len(all_hits),4)
        
        # check for set and default gene lengths
        for hit in all_hits:
            query, bug, reference, evalue, length = hit
            if reference == "UniRef50":
                self.assertEqual(length,2000)
            else:
                self.assertEqual(length,1000)
Пример #7
0
    def test_PathwaysDatabase_print_flat_file_pathways_list(self):
        """
        Pathways database class: Test the printing of a flat file from a structured file
        Test for the pathways list
        """
 
        pathways_database_store=store.PathwaysDatabase(cfg.pathways_file)
        pathways_database_flat_store=store.PathwaysDatabase(cfg.pathways_flat_file)       
        
        # write the flat file created from a structured file to a temp file
        file_out, new_file=tempfile.mkstemp()
        os.close(file_out)
        with open(new_file, "w") as file_handle:
            file_handle.write(pathways_database_store.get_database())      
  
        # load in the flat file and compare with the correct flat file
        pathways_database_flat_store_write=store.PathwaysDatabase(new_file)
        
        # remove the temp file
        utils.remove_temp_file(new_file)
        
        # check for the same number of pathways
        pathway_list=pathways_database_flat_store_write.pathway_list()
        pathway_list_flat=pathways_database_flat_store.pathway_list()
        
        # check that the pathway ids are identical
        for pathway in pathway_list:
            self.assertTrue(pathway in pathway_list_flat)
    def test_nucleotide_search_unaligned_reads_output_fasta_format(self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test output file is of fasta format
        Test sam file is not removed
        """
        
        # create a set of alignments
        alignments=store.Alignments()
        unaligned_reads_store=store.Reads()
       
        # turn off query/subject filtering
        config.nucleotide_subject_coverage_threshold = 0
        config.nucleotide_query_coverage_threshold = 0
 
        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
            cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) 
        
        # reset query/subject filtering
        config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
        config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold

        # check for fasta output file format
        file_format=utilities.determine_file_format(unaligned_reads_file_fasta)
        self.assertEqual("fasta",file_format)
        
        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)
    def test_gene_families_tsv_output_with_names(self):
        """
        Test the gene families function and the blast config indexes
        Test UniRef50_unknown is read in and used for gene scores but not printed
        Test the tsv output
        Test that gene families have names applied to them
        Test unmapped reads total is written with the same precision as other lines
        """

        # update the max decimals to allow for rounding
        config.output_max_decimals = 7

        # set to a smaller mapping file
        original_gene_family_mapping_file = config.gene_family_name_mapping_file
        config.gene_family_name_mapping_file = cfg.gene_families_to_names_file

        # create a set of alignments
        alignments = store.Alignments()

        # load the usearch output
        file_handle = open(cfg.usearch_uniref50_file)

        for line in file_handle:
            if not re.search("^#", line):
                data = line.strip().split(config.blast_delimiter)

                referenceids = data[config.blast_reference_index].split("|")
                queryid = data[config.blast_query_index]
                identity = float(data[config.blast_identity_index])

                alignments.add(referenceids[1], 1, queryid, identity,
                               referenceids[0])

        file_handle.close()

        # set the output format
        config.output_format = "tsv"

        # set the location of the file to write to as a temp file
        file_out, gene_families_file = tempfile.mkstemp()
        os.close(file_out)
        config.genefamilies_file = gene_families_file

        # create gene_scores instance
        gene_scores = store.GeneScores()

        # obtain the gene families
        gene_families_file = families.gene_families(alignments, gene_scores, 1)

        # check the gene families output is as expected
        self.assertTrue(
            filecmp.cmp(gene_families_file,
                        cfg.gene_familes_uniref50_with_names_file,
                        shallow=False))

        # reset the mapping file
        config.gene_family_name_mapping_file = original_gene_family_mapping_file

        # delete the temp file
        utils.remove_temp_file(gene_families_file)
Пример #10
0
    def test_nucleotide_search_unaligned_reads_read_count_aligned(self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test for aligned read counts
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        # check the aligned reads count
        self.assertEqual(len(alignments.get_hit_list()),
                         cfg.sam_file_unaligned_reads_total_aligned)
Пример #11
0
    def test_nucleotide_search_unaligned_reads_read_count_aligned_evalue_threshold(
            self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test for aligned read counts
        Test the evalue threshold does not filter alignments
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # update the evalue threshold to a number less than those for the alignment file
        original_evalue_threshold = config.evalue_threshold
        config.evalue_threshold = 1e-15

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        # reset the evalue threshold back to the original
        config.evalue_threshold = original_evalue_threshold

        # check the aligned reads count (all reads should be aligned even though they do not
        # meet the threshold as the evalue threshold is not applied for this type of alignment)
        self.assertEqual(len(alignments.get_hit_list()),
                         cfg.sam_file_unaligned_reads_total_aligned)
Пример #12
0
    def test_nucleotide_search_unaligned_reads_read_count_aligned_identity_threshold(
            self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test for aligned read counts
        Test the identity threshold does filter alignments
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # update the identity threshold to a number larger than those in the alignments
        original_identity_threshold = config.identity_threshold
        config.identity_threshold = 101.0

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        # reset the identity threshold back to the original
        config.identity_threshold = original_identity_threshold

        # check the aligned reads count (it should be zero as none should pass the threshold)
        self.assertEqual(len(alignments.get_hit_list()), 0)
Пример #13
0
    def test_nucleotide_search_unaligned_reads_scores(self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test the scores are based on percent identities
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_annotations,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        # there should be 4 hits identified
        all_hits = alignments.get_hit_list()

        # check for set and default gene lengths
        expected_score = math.pow(151.0, config.match_power)

        for hit in all_hits:
            query, bug, reference, score, length = hit
            self.assertEqual(score, expected_score)
Пример #14
0
    def test_humann_unpack_pathways_remove_taxonomy_tsv(self):
        """
        Test the tsv gene families and pathway abundance file entries with humann_unpack_pathways
        Test with the remove taxonomy option which stratifies by pathway then gene instead of
        stratifying by pathway, taxonomy, then gene
        """

        # create a temp file
        file_out, new_file = tempfile.mkstemp(prefix="humann_temp")

        # run the command
        utils.run_command([
            "humann_unpack_pathways", "--input-genes",
            cfg.merge_abundance_genefamilies_input, "--input-pathways",
            cfg.merge_abundance_pathways_input, "--output", new_file,
            "--remove-taxonomy"
        ])

        # check the output file is as expected
        # allow for varying precision in the calculations with almost equal
        self.assertTrue(
            utils.files_almost_equal(
                new_file, cfg.merge_abundance_remove_taxonomy_output))

        # remove the temp file
        utils.remove_temp_file(new_file)
 def test_nucleotide_search_unaligned_reads_annotations_bug(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test the different annotation formats are recognized for bug
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # there should be one bug which is unclassified
     self.assertEqual(alignments.bug_list(),["unclassified"])
 def test_nucleotide_search_unaligned_reads_output_blast_format(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test the aligned reads file created is of the blastm8 format
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     config.file_basename="TEST"
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # test file is of the blastm8 format
     file_format=utilities.determine_file_format(reduced_aligned_reads_file)
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)           
     
     self.assertEqual(file_format,"blastm8")
    def test_translated_search_unaligned_reads_annotations_reference(self):
        """
        Test the unaligned reads and the store alignments
        Test with a rapsearch2 output file
        Test the different annotation formats are recognized for reference
        Test without the coverage filter
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # set the coverage threshold to zero so as to not test with filter on
        current_coverage_threshold = config.translated_subject_coverage_threshold
        config.translated_subject_coverage_threshold = 0

        # load the rapsearch2 output with the unaligned reads function
        unaligned_file_fasta = translated.unaligned_reads(
            unaligned_reads_store, cfg.rapsearch_file_annotations, alignments)

        # remove temp file
        utils.remove_temp_file(unaligned_file_fasta)

        # reset the coverage threshold
        config.translated_subject_coverage_threshold = current_coverage_threshold

        # three of the hits should be for gene "UniRef50"
        hits = alignments.hits_for_gene("UniRef50")
        self.assertEqual(len(hits), 3)
 def test_nucleotide_search_unaligned_reads_annotations_reference(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test the different annotation formats are recognized for reference
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # two of the hits should be for gene "UniRef50"
     hits=alignments.hits_for_gene("UniRef50")
     self.assertEqual(len(hits),2)
 def test_nucleotide_search_unaligned_reads_read_count_aligned_subject_coverage(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test for aligned read counts
     Test with subject coverage filtering
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off subject filtering
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset subject filtering
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # check the aligned reads count
     self.assertEqual(len(alignments.get_hit_list()),cfg.sam_file_unaligned_reads_total_aligned_subject_coverage)
Пример #20
0
    def test_pathways_abundance_with_names(self):
        """
        Test the pathways abundance computation (xipe and minpath are off)
        Test the pathways print function
        Test the pathways mapping to names
        Test the unmapped and unintegrated values are printed
        """

        # update the max decimals to allow for rounding
        config.output_max_decimals = 7

        # Load in the pathways databases
        reactions_database = store.ReactionsDatabase(
            config.pathways_database_part1)
        pathways_database = store.PathwaysDatabase(
            config.pathways_database_part2, reactions_database)

        # Load in the gene scores from the file
        # This file has the gene names included
        gene_scores = store.GeneScores()
        gene_scores.add_from_file(
            cfg.larger_gene_families_uniref50_with_names_file)

        # Turn off xipe and minpath
        minpath_toggle_original = config.minpath_toggle
        config.minpath_toggle = "off"
        xipe_toggle_original = config.xipe_toggle
        config.xipe_toggle = "off"

        pathways_and_reactions_store = modules.identify_reactions_and_pathways(
            gene_scores, reactions_database, pathways_database)

        # set the locations to write as temp files
        file_out, abundance_file = tempfile.mkstemp()
        os.close(file_out)
        config.pathabundance_file = abundance_file

        file_out, coverage_file = tempfile.mkstemp()
        os.close(file_out)
        config.pathcoverage_file = coverage_file

        unaligned_reads_count = 10
        abundance_file, coverage_file = modules.compute_pathways_abundance_and_coverage(
            gene_scores, reactions_database, pathways_and_reactions_store,
            pathways_database, unaligned_reads_count)

        # Reset xipe and minpath
        config.minpath_toggle = minpath_toggle_original
        config.xipe_toggle = xipe_toggle_original

        # check the output is as expected
        self.assertTrue(
            filecmp.cmp(abundance_file,
                        cfg.demo_pathabundance_file,
                        shallow=False))

        utils.remove_temp_file(abundance_file)
        utils.remove_temp_file(coverage_file)
Пример #21
0
    def test_fastq_to_fasta(self):
        """
        Test the fastq_to_fasta function
        """

        new_fasta_file = utilities.fastq_to_fasta(cfg.convert_fastq_file)
        self.assertTrue(
            filecmp.cmp(new_fasta_file, cfg.convert_fasta_file, shallow=False))
        utils.remove_temp_file(new_fasta_file)
Пример #22
0
 def test_fastq_to_fasta_with_pick_frames(self):
     """
     Test the fastq_to_fasta function with pick frames
     """
      
     new_fasta_file=utilities.fastq_to_fasta(
         cfg.convert_fastq_file, apply_pick_frames=True)
     self.assertTrue(filecmp.cmp(new_fasta_file,
         cfg.convert_fasta_pick_frames_file, shallow=False))
     utils.remove_temp_file(new_fasta_file)     
Пример #23
0
 def test_pick_frames_from_fasta(self):
     """
     Test the pick_frames_from_fasta function
     """
      
     new_fasta_file=utilities.pick_frames_from_fasta(
         cfg.convert_fasta_multiline_file)
     self.assertTrue(filecmp.cmp(new_fasta_file,
         cfg.convert_fasta_pick_frames_file, shallow=False))
     utils.remove_temp_file(new_fasta_file)     
                  
    def test_translated_search_unaligned_reads_blastm8(self):
        """
        Test the unaligned reads and the store alignments
        Test with a blastm8-like output file
        Test with empty reads structure
        Test that function does not require gene lengths in reference id
        Test without the coverage filter
        """

        # create a set of alignments
        alignments = store.Alignments()

        # set the coverage threshold to zero so as to not test with filter on
        current_coverage_threshold = config.translated_subject_coverage_threshold
        config.translated_subject_coverage_threshold = 0

        # load the blastm8-like output
        file_handle = open(cfg.rapsearch2_output_file_without_header)

        for line in file_handle:
            if not re.search("^#", line):
                data = line.strip().split(config.blast_delimiter)

                referenceid = data[config.blast_reference_index]
                queryid = data[config.blast_query_index]
                identity = float(data[config.blast_identity_index])
                alignment_length = float(
                    data[config.blast_aligned_length_index])

                alignments.add(referenceid, 0, queryid,
                               identity / 100.0 * alignment_length,
                               "unclassified", alignment_length)

        file_handle.close()

        alignments_test = store.Alignments()
        unaligned_reads_store = store.Reads()

        # load the blastm8-like output with the unaligned reads function
        unaligned_file_fasta = translated.unaligned_reads(
            unaligned_reads_store, cfg.rapsearch2_output_file_without_header,
            alignments_test)

        # remove temp file
        utils.remove_temp_file(unaligned_file_fasta)

        # reset the coverage threshold
        config.translated_subject_coverage_threshold = current_coverage_threshold

        # check the values are unchanged
        self.assertEqual(sorted(alignments.get_hit_list()),
                         sorted(alignments_test.get_hit_list()))
Пример #25
0
    def test_fastq_to_fasta(self):
        """
        Test the fastq_to_fasta function with a set of sequences
        which have the @ quality score as the first score
        This tests that the sequence id and sequence are selected correctly
        even though the @ starts a sequence id line and a quality score line
        """

        new_fasta_file = utilities.fastq_to_fasta(
            cfg.convert_fastq_at_character_file)
        self.assertTrue(
            filecmp.cmp(new_fasta_file, cfg.convert_fasta_file, shallow=False))
        utils.remove_temp_file(new_fasta_file)
Пример #26
0
    def test_break_up_fasta_file(self):
        """
        Test the break_up_fasta_file function
        """

        # Break up the file into smaller files each containing a single read
        new_fasta_files = utilities.break_up_fasta_file(
            cfg.small_fasta_file, 1)

        for file in new_fasta_files:
            sequence_count = utilities.count_reads(file)
            self.assertEqual(sequence_count, 1)
            utils.remove_temp_file(file)
    def test_gene_families_tsv_output(self):
        """
        Test the gene families function and the blast config indexes
        Test UniRef50_unknown is read in and used for gene scores but not printed
        Test the tsv output
        """

        # create a set of alignments
        alignments = store.Alignments()

        # load the usearch output
        file_handle = open(cfg.usearch_file)

        for line in file_handle:
            if not re.search("^#", line):
                data = line.strip().split(config.blast_delimiter)

                referenceids = data[config.blast_reference_index].split("|")
                queryid = data[config.blast_query_index]
                evalue = float(data[config.blast_evalue_index])

                alignments.add(referenceids[1], 1, queryid, evalue,
                               referenceids[0])

        file_handle.close()

        # set the output format
        config.output_format = "tsv"

        # set the location of the file to write to as a temp file
        file_out, gene_families_file = tempfile.mkstemp()
        os.close(file_out)
        config.genefamilies_file = gene_families_file

        # create gene_scores instance
        gene_scores = store.GeneScores()

        # obtain the gene families
        gene_families_file = quantify_families.gene_families(
            alignments, gene_scores)

        # check the gene families output is as expected
        self.assertTrue(
            filecmp.cmp(gene_families_file,
                        cfg.gene_familes_file,
                        shallow=False))

        # delete the temp file
        utils.remove_temp_file(gene_families_file)
Пример #28
0
    def test_remove_spaces_from_file(self):
        """
        Test the remove spaces from file function
        """

        new_file = utilities.remove_spaces_from_file(
            cfg.small_fastq_spaces_file)

        with open(cfg.small_fastq_no_spaces_file) as file_handle:
            expected_file_lines = file_handle.readlines()

        with open(new_file) as file_handle:
            actual_file_lines = file_handle.readlines()

        # remove the temp file
        utils.remove_temp_file(new_file)

        self.assertEqual(expected_file_lines, actual_file_lines)
Пример #29
0
    def test_sam_to_fastq(self):
        """
        Test the sam to fastq function
        Test sam file contains one read with two mappings (to test it is only
        written once to the fastq output file)
        """

        file_handle, temp_output_file = tempfile.mkstemp(
            prefix="kneaddata_test")

        utilities.sam_to_fastq(cfg.file_sam, temp_output_file)

        self.assertTrue(
            filecmp.cmp(temp_output_file,
                        cfg.fastq_file_matches_sam_and_bam,
                        shallow=False))

        utils.remove_temp_file(temp_output_file)
 def test_nucleotide_search_unaligned_reads_annotations_gene_length(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test the different annotation formats are recognized for gene length
     Test the gene length uses the read length from the sam file
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # there should be 4 hits identified
     all_hits=alignments.get_hit_list()
     self.assertEqual(len(all_hits),4)
     
     # check for set and default gene lengths
     read_length = 151
     expected_length_uniref50 = (abs(2000 - read_length)+1)/1000.0
     expected_length_other = (abs(1000 - read_length)+1)/1000.0
     
     for hit in all_hits:
         query, bug, reference, score, length = hit
         if reference == "UniRef50":
             self.assertEqual(length,expected_length_uniref50)
         else:
             self.assertEqual(length,expected_length_other)