def test_dna_strange_character_replace(self): create = Create(prerequisites) seqs = [Sequence('namer', 'ATGC')] create._mask_strange_sequence_letters(seqs, Create._NUCLEOTIDE_PACKAGE_TYPE) self.assertEqual(1, len(seqs)) self.assertEqual('ATGC', str(seqs[0].seq)) seqs = [Sequence('namer', 'ATGCRTWU')] create._mask_strange_sequence_letters(seqs, Create._NUCLEOTIDE_PACKAGE_TYPE) self.assertEqual(1, len(seqs)) self.assertEqual('ATGCNTNT', str(seqs[0].seq))
def test_strange_character_replace(self): create = Create(prerequisites) seqs = [Sequence('namer', 'SEQWENCE')] create._mask_strange_sequence_letters(seqs, Create._PROTEIN_PACKAGE_TYPE) self.assertEqual(1, len(seqs)) self.assertEqual('SEQWENCE', str(seqs[0].seq)) seqs = [Sequence('namer', 'SEQUENCE')] create._mask_strange_sequence_letters(seqs, Create._PROTEIN_PACKAGE_TYPE) self.assertEqual(1, len(seqs)) self.assertEqual('SEQXENCE', str(seqs[0].seq))
def extract_and_read(self, reads_to_extract, database_fasta_file): '''Extract the reads_to_extract from the database_fasta_file and return them. Parameters ---------- reads_to_extract: Iterable of str IDs of reads to be extracted database_fasta_file: str path the fasta file that containing the reads Returns ------- An array of graftm.sequence_io.Sequence objects''' cmd = "fxtract -XH -f /dev/stdin '%s'" % database_fasta_file process = subprocess.Popen(["bash", "-c", cmd], stdin=subprocess.PIPE, stdout=subprocess.PIPE) output, error = process.communicate('\n'.join(reads_to_extract)) if process.returncode != 0: raise Exception( "Extraction command '%s' failed with exitstatus %i" % (cmd, process.returncode)) seqs = [] for name, seq, _ in SequenceIO().each(StringIO(output)): seqs.append(Sequence(name, seq)) return seqs
def s(self, seqs): to_return = [] current_seq = None for bit in split(seqs, ' '): if current_seq: current_seq.seq = bit to_return.append(current_seq) current_seq = None else: current_seq = Sequence(bit, '') return to_return
def extract_and_read(self, reads_to_extract, database_fasta_file): '''Extract the reads_to_extract from the database_fasta_file and return them. Parameters ---------- reads_to_extract: Iterable of str IDs of reads to be extracted database_fasta_file: str path the fasta file that containing the reads Returns ------- An array of graftm.sequence_io.Sequence objects''' cmd = "mfqe --output-uncompressed --fasta-read-name-lists /dev/stdin --input-fasta '{}' --output-fasta-files /dev/stdout".format( database_fasta_file) # Retrieve each sequence exactly once so mfqe does not croak output = extern.run(cmd, stdin='\n'.join(set(reads_to_extract))) seqs = [] for name, seq, _ in SequenceIO().each(StringIO(output)): seqs.append(Sequence(name, seq)) return seqs
def test_basic_split(self): input_alias_hash = {'0': {'place': []}, '1': {'place': []}} expected_placement = [ 'p__Proteobacteria', 'k__Bacteria', 'p__Proteobacteria' ] mock_cluster_hash = { '0': { "test_read1": [Sequence("test_read1", "SEQUENCE")] }, '1': { "test_read2": [Sequence("test_read2", "SEQUENCE")] } } test_json = { "fields": [ "classification", "distal_length", "edge_num", "like_weight_ratio", "likelihood", "pendant_length" ], "tree": "((696036:0.2205{0},229854:0.20827{1})1.000:0.14379{2},3190878:0.23845{3},2107103:0.32104{4}){5};", "placements": [{ "p": [[ "p__Proteobacteria", 0.107586583111, 1, 0.970420466541, -614.032176075, 0.22226616471 ], [ "k__Bacteria", 0.220493270874, 0, 0.0147918928965, -618.21582627, 0.248671444337 ], [ "p__Proteobacteria", 8.77624511719e-06, 2, 0.0147876405626, -618.216113788, 0.248672441848 ]], "nm": [["test_read1_0", 1], ["test_read2_1", 1]] }], "version": 3, "metadata": { "invocation": "pplacer -c test_16S.gpkg\/test_16S.gpkg.refpkg\/ GraftM_output\/combined_alignment.aln.fa" } } pplacer = Pplacer("refpkg_decoy") observed_placement = pplacer.jplace_split(test_json, mock_cluster_hash) expected_placement = { '0': [{ "p": [[ "p__Proteobacteria", 0.107586583111, 1, 0.970420466541, -614.032176075, 0.22226616471 ], [ "k__Bacteria", 0.220493270874, 0, 0.0147918928965, -618.21582627, 0.248671444337 ], [ "p__Proteobacteria", 8.77624511719e-06, 2, 0.0147876405626, -618.216113788, 0.248672441848 ]], "nm": [["test_read1", 1]] }], '1': [{ "p": [[ "p__Proteobacteria", 0.107586583111, 1, 0.970420466541, -614.032176075, 0.22226616471 ], [ "k__Bacteria", 0.220493270874, 0, 0.0147918928965, -618.21582627, 0.248671444337 ], [ "p__Proteobacteria", 8.77624511719e-06, 2, 0.0147876405626, -618.216113788, 0.248672441848 ]], "nm": [["test_read2", 1]] }] } self.assertEqual(expected_placement, observed_placement)