Exemplo n.º 1
0
    def test_dna_strange_character_replace(self):
        create = Create(prerequisites)
        seqs = [Sequence('namer', 'ATGC')]
        create._mask_strange_sequence_letters(seqs,
                                              Create._NUCLEOTIDE_PACKAGE_TYPE)
        self.assertEqual(1, len(seqs))
        self.assertEqual('ATGC', str(seqs[0].seq))

        seqs = [Sequence('namer', 'ATGCRTWU')]
        create._mask_strange_sequence_letters(seqs,
                                              Create._NUCLEOTIDE_PACKAGE_TYPE)
        self.assertEqual(1, len(seqs))
        self.assertEqual('ATGCNTNT', str(seqs[0].seq))
Exemplo n.º 2
0
    def test_strange_character_replace(self):
        create = Create(prerequisites)
        seqs = [Sequence('namer', 'SEQWENCE')]
        create._mask_strange_sequence_letters(seqs,
                                              Create._PROTEIN_PACKAGE_TYPE)
        self.assertEqual(1, len(seqs))
        self.assertEqual('SEQWENCE', str(seqs[0].seq))

        seqs = [Sequence('namer', 'SEQUENCE')]
        create._mask_strange_sequence_letters(seqs,
                                              Create._PROTEIN_PACKAGE_TYPE)
        self.assertEqual(1, len(seqs))
        self.assertEqual('SEQXENCE', str(seqs[0].seq))
Exemplo n.º 3
0
    def extract_and_read(self, reads_to_extract, database_fasta_file):
        '''Extract the reads_to_extract from the database_fasta_file and return them.

        Parameters
        ----------
        reads_to_extract: Iterable of str
            IDs of reads to be extracted
        database_fasta_file: str
            path the fasta file that containing the reads

        Returns
        -------
        An array of graftm.sequence_io.Sequence objects'''
        cmd = "fxtract -XH -f /dev/stdin '%s'" % database_fasta_file

        process = subprocess.Popen(["bash", "-c", cmd],
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE)
        output, error = process.communicate('\n'.join(reads_to_extract))

        if process.returncode != 0:
            raise Exception(
                "Extraction command '%s' failed with exitstatus %i" %
                (cmd, process.returncode))

        seqs = []
        for name, seq, _ in SequenceIO().each(StringIO(output)):
            seqs.append(Sequence(name, seq))
        return seqs
Exemplo n.º 4
0
 def s(self, seqs):
     to_return = []
     current_seq = None
     for bit in split(seqs, ' '):
         if current_seq:
             current_seq.seq = bit
             to_return.append(current_seq)
             current_seq = None
         else:
             current_seq = Sequence(bit, '')
     return to_return
Exemplo n.º 5
0
    def extract_and_read(self, reads_to_extract, database_fasta_file):
        '''Extract the reads_to_extract from the database_fasta_file and return them.

        Parameters
        ----------
        reads_to_extract: Iterable of str
            IDs of reads to be extracted
        database_fasta_file: str
            path the fasta file that containing the reads

        Returns
        -------
        An array of graftm.sequence_io.Sequence objects'''
        cmd = "mfqe --output-uncompressed --fasta-read-name-lists /dev/stdin --input-fasta '{}' --output-fasta-files /dev/stdout".format(
            database_fasta_file)

        # Retrieve each sequence exactly once so mfqe does not croak
        output = extern.run(cmd, stdin='\n'.join(set(reads_to_extract)))

        seqs = []
        for name, seq, _ in SequenceIO().each(StringIO(output)):
            seqs.append(Sequence(name, seq))
        return seqs
Exemplo n.º 6
0
    def test_basic_split(self):
        input_alias_hash = {'0': {'place': []}, '1': {'place': []}}
        expected_placement = [
            'p__Proteobacteria', 'k__Bacteria', 'p__Proteobacteria'
        ]
        mock_cluster_hash = {
            '0': {
                "test_read1": [Sequence("test_read1", "SEQUENCE")]
            },
            '1': {
                "test_read2": [Sequence("test_read2", "SEQUENCE")]
            }
        }
        test_json = {
            "fields": [
                "classification", "distal_length", "edge_num",
                "like_weight_ratio", "likelihood", "pendant_length"
            ],
            "tree":
            "((696036:0.2205{0},229854:0.20827{1})1.000:0.14379{2},3190878:0.23845{3},2107103:0.32104{4}){5};",
            "placements": [{
                "p": [[
                    "p__Proteobacteria", 0.107586583111, 1, 0.970420466541,
                    -614.032176075, 0.22226616471
                ],
                      [
                          "k__Bacteria", 0.220493270874, 0, 0.0147918928965,
                          -618.21582627, 0.248671444337
                      ],
                      [
                          "p__Proteobacteria", 8.77624511719e-06, 2,
                          0.0147876405626, -618.216113788, 0.248672441848
                      ]],
                "nm": [["test_read1_0", 1], ["test_read2_1", 1]]
            }],
            "version":
            3,
            "metadata": {
                "invocation":
                "pplacer -c test_16S.gpkg\/test_16S.gpkg.refpkg\/ GraftM_output\/combined_alignment.aln.fa"
            }
        }

        pplacer = Pplacer("refpkg_decoy")

        observed_placement = pplacer.jplace_split(test_json, mock_cluster_hash)

        expected_placement = {
            '0': [{
                "p": [[
                    "p__Proteobacteria", 0.107586583111, 1, 0.970420466541,
                    -614.032176075, 0.22226616471
                ],
                      [
                          "k__Bacteria", 0.220493270874, 0, 0.0147918928965,
                          -618.21582627, 0.248671444337
                      ],
                      [
                          "p__Proteobacteria", 8.77624511719e-06, 2,
                          0.0147876405626, -618.216113788, 0.248672441848
                      ]],
                "nm": [["test_read1", 1]]
            }],
            '1': [{
                "p": [[
                    "p__Proteobacteria", 0.107586583111, 1, 0.970420466541,
                    -614.032176075, 0.22226616471
                ],
                      [
                          "k__Bacteria", 0.220493270874, 0, 0.0147918928965,
                          -618.21582627, 0.248671444337
                      ],
                      [
                          "p__Proteobacteria", 8.77624511719e-06, 2,
                          0.0147876405626, -618.216113788, 0.248672441848
                      ]],
                "nm": [["test_read2", 1]]
            }]
        }

        self.assertEqual(expected_placement, observed_placement)