def check_convert(in_filename, in_format, out_format, alphabet=None): # Write it out using parse/write handle = StringIO() aligns = list(AlignIO.parse(in_filename, in_format, None, alphabet)) try: count = AlignIO.write(aligns, handle, out_format) except ValueError: count = 0 # Write it out using convert passing filename and handle handle2 = StringIO() try: count2 = AlignIO.convert(in_filename, in_format, handle2, out_format, alphabet) except ValueError: count2 = 0 assert count == count2 assert handle.getvalue() == handle2.getvalue() # Write it out using convert passing handle and handle handle2 = StringIO() try: with open(in_filename) as handle1: count2 = AlignIO.convert(handle1, in_format, handle2, out_format, alphabet) except ValueError: count2 = 0 assert count == count2 assert handle.getvalue() == handle2.getvalue()
def check_convert(in_filename, in_format, out_format, alphabet=None): # Write it out using parse/write handle = StringIO() aligns = list(AlignIO.parse(in_filename, in_format, None, alphabet)) try: count = AlignIO.write(aligns, handle, out_format) except ValueError: count = 0 # Write it out using convert passing filename and handle handle2 = StringIO() try: count2 = AlignIO.convert(in_filename, in_format, handle2, out_format, alphabet) except ValueError: count2 = 0 assert count == count2 assert handle.getvalue() == handle2.getvalue() # Write it out using convert passing handle and handle handle2 = StringIO() try: with open(in_filename) as handle1: count2 = AlignIO.convert(handle1, in_format, handle2, out_format, alphabet) except ValueError: count2 = 0 assert count == count2 assert handle.getvalue() == handle2.getvalue()
def simple_check(self, base_name, in_variant): for out_variant in ["sanger", "solexa", "illumina"]: in_filename = "Quality/%s_original_%s.fastq" \ % (base_name, in_variant) self.assertTrue(os.path.isfile(in_filename)) # Load the reference output... with open("Quality/%s_as_%s.fastq" % (base_name, out_variant), _universal_read_mode) as handle: expected = handle.read() with warnings.catch_warnings(): if out_variant != "sanger": # Ignore data loss warnings from max qualities warnings.simplefilter("ignore", BiopythonWarning) warnings.simplefilter("ignore", UserWarning) # Check matches using convert... handle = StringIO() SeqIO.convert(in_filename, "fastq-" + in_variant, handle, "fastq-" + out_variant) self.assertEqual(expected, handle.getvalue()) # Check matches using parse/write handle = StringIO() SeqIO.write(SeqIO.parse(in_filename, "fastq-" + in_variant), handle, "fastq-" + out_variant) self.assertEqual(expected, handle.getvalue())
def simple_check(self, base_name, in_variant): for out_variant in ["sanger", "solexa", "illumina"]: in_filename = "Quality/%s_original_%s.fastq" \ % (base_name, in_variant) self.assertTrue(os.path.isfile(in_filename)) # Load the reference output... with open("Quality/%s_as_%s.fastq" % (base_name, out_variant), _universal_read_mode) as handle: expected = handle.read() with warnings.catch_warnings(): if out_variant != "sanger": # Ignore data loss warnings from max qualities warnings.simplefilter("ignore", BiopythonWarning) warnings.simplefilter("ignore", UserWarning) # Check matches using convert... handle = StringIO() SeqIO.convert(in_filename, "fastq-"+in_variant, handle, "fastq-"+out_variant) self.assertEqual(expected, handle.getvalue()) # Check matches using parse/write handle = StringIO() SeqIO.write(SeqIO.parse(in_filename, "fastq-"+in_variant), handle, "fastq-"+out_variant) self.assertEqual(expected, handle.getvalue())
def test_TaggingConsumer(self): h = StringIO() tc = ParserSupport.TaggingConsumer(handle=h, colwidth=5) tc.start_section() self.assertEqual(h.getvalue(), "***** start_section\n") h.seek(0) h.truncate(0) tc.test1("myline") self.assertEqual(h.getvalue(), "test1: myline\n") h.seek(0) h.truncate(0) tc.end_section() self.assertEqual(h.getvalue(), "***** end_section\n")
def test_widget(self): """Try widget derived functionality. """ test_widget = BasicChromosome.ChromosomeSegment() expected_string = "chr_percent = 0.25" # trick to write the properties to a string save_stdout = sys.stdout new_stdout = StringIO() sys.stdout = new_stdout test_widget.dumpProperties() properties = new_stdout.getvalue() sys.stdout = save_stdout self.assertTrue(expected_string in properties, "Unexpected results from dumpProperties: \n %s" % properties) properties = test_widget.getProperties() self.assertEqual(properties["label_size"], 6, "Unexpected results from getProperties: %s" % properties) test_widget.setProperties({"start_x_position": 12}) self.assertEqual(test_widget.start_x_position, 12, "setProperties doesn't seem to work right: %s" % test_widget.start_x_position)
def test_fastq_1000(self): """Read and write back simple example with mixed case 1000bp read""" data = "@%s\n%s\n+\n%s\n" \ % ("id descr goes here", "ACGTNncgta" * 100, "abcd!!efgh" * 100) handle = StringIO() self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq")) self.assertEqual(data, handle.getvalue())
def test_write(self): result = StringIO() e1 = EnrichmentEntry("9951", "structure-specific DNA binding", 0.032301032301) e1.corrections = {'bh_fdr': 1.0, 'bonferroni': 1.0} e1.attrs = {'plot': [0.1, 0.2, 1.0, 0.1]} e2 = EnrichmentEntry("9916", "polysomal ribosome", 0.025) e2.corrections = {'bh_fdr': 1.0, 'bonferroni': 1.0} e2.attrs = {} en = Enrichment("ranked parent-child", [e1, e2], ["Cycles found..."], ['bh_fdr', 'bonferroni']) writer = EnrichmentWriter(result) writer.write(en) expected = ( "# ranked parent-child\r\n" "# 2 1\r\n" "id\tname\tp-value\tbh_fdr|bonferroni\tattributes\r\n" "9951\tstructure-specific DNA binding\t0.032301032301\t1.0|1.0\t{'plot': [0.1, 0.2, 1.0, 0.1]}\r\n" "9916\tpolysomal ribosome\t0.025\t1.0|1.0\t{}\r\n" "!\tCycles found...\r\n") self.assertEqual(expected, result.getvalue())
def test_fasta_out(self): """Check FASTQ to FASTA output""" records = SeqIO.parse("Quality/example.fastq", "fastq") h = StringIO() SeqIO.write(records, h, "fasta") with open("Quality/example.fasta") as expected: self.assertEqual(h.getvalue(), expected.read())
def test_widget(self): """Try widget derived functionality. """ test_widget = BasicChromosome.ChromosomeSegment() expected_string = "chr_percent = 0.25" # trick to write the properties to a string save_stdout = sys.stdout new_stdout = StringIO() sys.stdout = new_stdout test_widget.dumpProperties() properties = new_stdout.getvalue() sys.stdout = save_stdout self.assertTrue( expected_string in properties, "Unexpected results from dumpProperties: \n %s" % properties) properties = test_widget.getProperties() self.assertEqual( properties["label_size"], 6, "Unexpected results from getProperties: %s" % properties) test_widget.setProperties({"start_x_position": 12}) self.assertEqual( test_widget.start_x_position, 12, "setProperties doesn't seem to work right: %s" % test_widget.start_x_position)
def test_qual_negative(self): """Check QUAL negative scores mapped to PHRED zero""" data = """>1117_10_107_F3 23 31 -1 -1 -1 29 -1 -1 20 32 -1 18 25 7 -1 6 -1 -1 -1 30 -1 20 13 7 -1 -1 21 30 -1 24 -1 22 -1 -1 22 14 -1 12 26 21 -1 5 -1 -1 -1 20 -1 -1 12 28 >1117_10_146_F3 20 33 -1 -1 -1 29 -1 -1 28 28 -1 7 16 5 -1 30 -1 -1 -1 14 -1 4 13 4 -1 -1 11 13 -1 5 -1 7 -1 -1 10 16 -1 4 12 15 -1 8 -1 -1 -1 16 -1 -1 10 4 >1117_10_1017_F3 33 33 -1 -1 -1 27 -1 -1 17 16 -1 28 24 11 -1 6 -1 -1 -1 29 -1 8 29 24 -1 -1 8 8 -1 20 -1 13 -1 -1 8 13 -1 28 10 24 -1 10 -1 -1 -1 4 -1 -1 7 6 >1117_11_136_F3 16 22 -1 -1 -1 33 -1 -1 30 27 -1 27 28 32 -1 29 -1 -1 -1 27 -1 18 9 6 -1 -1 23 16 -1 26 -1 5 7 -1 22 7 -1 18 14 8 -1 8 -1 -1 -1 11 -1 -1 4 24""" h = StringIO(data) h2 = StringIO() self.assertEqual(4, SeqIO.convert(h, "qual", h2, "fastq")) self.assertEqual(h2.getvalue(), """@1117_10_107_F3 ?????????????????????????????????????????????????? + 8@!!!>!!5A!3:(!'!!!?!5.(!!6?!9!7!!7/!-;6!&!!!5!!-= @1117_10_146_F3 ?????????????????????????????????????????????????? + 5B!!!>!!==!(1&!?!!!/!%.%!!,.!&!(!!+1!%-0!)!!!1!!+% @1117_10_1017_F3 ?????????????????????????????????????????????????? + BB!!!<!!21!=9,!'!!!>!)>9!!))!5!.!!).!=+9!+!!!%!!(' @1117_11_136_F3 ?????????????????????????????????????????????????? + 17!!!B!!?<!<=A!>!!!<!3*'!!81!;!&(!7(!3/)!)!!!,!!%9 """)
def test_fastq_2000(self): """Read and write back simple example with upper case 2000bp read""" data = "@%s\n%s\n+\n%s\n" \ % ("id descr goes here", "ACGT"*500, "!@a~"*500) handle = StringIO() self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq")) self.assertEqual(data, handle.getvalue())
def test_write(self): terms_to_write = [ OntologyTerm("GO:0009628", "response to abiotic stimulus", {"is_a": ["GO:0050896"]}), OntologyTerm("GO:0022627", "cytosolic small ribosomal subunit", {"is_a": ["GO:0015935", "GO:0044445"]}) ] f = StringIO() writer = OboWriter(f, version="1.2") writer.write(terms_to_write) expected_output = """format-version:1.2 [Term] id: GO:0009628 name: response to abiotic stimulus is_a: GO:0050896 [Term] id: GO:0022627 name: cytosolic small ribosomal subunit is_a: GO:0015935 is_a: GO:0044445 """ self.assertEqual(expected_output, f.getvalue())
def test_fastq_1000(self): """Read and write back simple example with mixed case 1000bp read""" data = "@%s\n%s\n+\n%s\n" \ % ("id descr goes here", "ACGTNncgta"*100, "abcd!!efgh"*100) handle = StringIO() self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq")) self.assertEqual(data, handle.getvalue())
def test_qual_negative(self): """Check QUAL negative scores mapped to PHRED zero""" data = """>1117_10_107_F3 23 31 -1 -1 -1 29 -1 -1 20 32 -1 18 25 7 -1 6 -1 -1 -1 30 -1 20 13 7 -1 -1 21 30 -1 24 -1 22 -1 -1 22 14 -1 12 26 21 -1 5 -1 -1 -1 20 -1 -1 12 28 >1117_10_146_F3 20 33 -1 -1 -1 29 -1 -1 28 28 -1 7 16 5 -1 30 -1 -1 -1 14 -1 4 13 4 -1 -1 11 13 -1 5 -1 7 -1 -1 10 16 -1 4 12 15 -1 8 -1 -1 -1 16 -1 -1 10 4 >1117_10_1017_F3 33 33 -1 -1 -1 27 -1 -1 17 16 -1 28 24 11 -1 6 -1 -1 -1 29 -1 8 29 24 -1 -1 8 8 -1 20 -1 13 -1 -1 8 13 -1 28 10 24 -1 10 -1 -1 -1 4 -1 -1 7 6 >1117_11_136_F3 16 22 -1 -1 -1 33 -1 -1 30 27 -1 27 28 32 -1 29 -1 -1 -1 27 -1 18 9 6 -1 -1 23 16 -1 26 -1 5 7 -1 22 7 -1 18 14 8 -1 8 -1 -1 -1 11 -1 -1 4 24""" h = StringIO(data) h2 = StringIO() self.assertEqual(4, SeqIO.convert(h, "qual", h2, "fastq")) self.assertEqual( h2.getvalue(), """@1117_10_107_F3 ?????????????????????????????????????????????????? + 8@!!!>!!5A!3:(!'!!!?!5.(!!6?!9!7!!7/!-;6!&!!!5!!-= @1117_10_146_F3 ?????????????????????????????????????????????????? + 5B!!!>!!==!(1&!?!!!/!%.%!!,.!&!(!!+1!%-0!)!!!1!!+% @1117_10_1017_F3 ?????????????????????????????????????????????????? + BB!!!<!!21!=9,!'!!!>!)>9!!))!5!.!!).!=+9!+!!!%!!(' @1117_11_136_F3 ?????????????????????????????????????????????????? + 17!!!B!!?<!<=A!>!!!<!3*'!!81!;!&(!7(!3/)!)!!!,!!%9 """)
def test_write_species(self): """Test writing species from annotation tags.""" record = SeqIO.read("SwissProt/sp016", "swiss") self.assertEqual(record.annotations["organism"], "H**o sapiens (Human)") self.assertEqual(record.annotations["ncbi_taxid"], ["9606"]) handle = StringIO() SeqIO.write(record, handle, "seqxml") handle.seek(0) output = handle.getvalue() self.assertIn("H**o sapiens (Human)", output) self.assertIn("9606", output) if '<species name="H**o sapiens (Human)" ncbiTaxID="9606"/>' in output: # Good, but don't get this (do we?) pass elif '<species name="H**o sapiens (Human)" ncbiTaxID="9606"></species>' in output: # Not as concise, but fine (seen on C Python) pass elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"></species>' in output: # Jython uses a different order pass elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"/>' in output: # This would be fine too, but don't get this (do we?) pass else: raise ValueError("Mising expected <species> tag: %r" % output)
def test_fasta_out(self): """Check FASTQ to FASTA output""" records = SeqIO.parse("Quality/example.fastq", "fastq") h = StringIO() SeqIO.write(records, h, "fasta") with open("Quality/example.fasta") as expected: self.assertEqual(h.getvalue(), expected.read())
def test_fastq_2000(self): """Read and write back simple example with upper case 2000bp read""" data = "@%s\n%s\n+\n%s\n" \ % ("id descr goes here", "ACGT" * 500, "!@a~" * 500) handle = StringIO() self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq")) self.assertEqual(data, handle.getvalue())
def test_write_species(self): """Test writing species from annotation tags.""" record = SeqIO.read("SwissProt/sp016", "swiss") self.assertEqual(record.annotations["organism"], "H**o sapiens (Human)") self.assertEqual(record.annotations["ncbi_taxid"], ["9606"]) handle = StringIO() SeqIO.write(record, handle, "seqxml") handle.seek(0) output = handle.getvalue() self.assertTrue("H**o sapiens (Human)" in output) self.assertTrue("9606" in output) if '<species name="H**o sapiens (Human)" ncbiTaxID="9606"/>' in output: # Good, but don't get this (do we?) pass elif '<species name="H**o sapiens (Human)" ncbiTaxID="9606"></species>' in output: # Not as concise, but fine (seen on C Python) pass elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"></species>' in output: # Jython uses a different order pass elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"/>' in output: # This would be fine too, but don't get this (do we?) pass else: raise ValueError("Mising expected <species> tag: %r" % output)
def test_format_branch_length(self): """Custom format string for Newick branch length serialization.""" tree = Phylo.read(StringIO("A:0.1;"), "newick") mem_file = StringIO() Phylo.write(tree, mem_file, "newick", format_branch_length="%.0e") # Py2.5 compat: Windows with Py2.5- represents this as 1e-001; # on all other platforms it's 1e-01 self.assertTrue(mem_file.getvalue().strip() in ["A:1e-01;", "A:1e-001;"])
def test_format_phylip(self): dm = DistanceMatrix(self.names, self.matrix) handle = StringIO() dm.format_phylip(handle) lines = handle.getvalue().splitlines() self.assertEqual(len(lines), len(dm) + 1) self.assertTrue(lines[0].endswith(str(len(dm)))) for name, line in zip(self.names, lines[1:]): self.assertTrue(line.startswith(name))
def test_fastq_rna(self): """Read and write back simple example with ambiguous RNA""" # First in upper case... data = "@%s\n%s\n+\n%s\n" \ % ("id descr goes here", ambiguous_rna_letters.upper(), "".join(chr(33 + q) for q in range(len(ambiguous_rna_letters)))) handle = StringIO() self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq")) self.assertEqual(data, handle.getvalue()) # Now in lower case... data = "@%s\n%s\n+\n%s\n" \ % ("id descr goes here", ambiguous_rna_letters.lower(), "".join(chr(33 + q) for q in range(len(ambiguous_rna_letters)))) handle = StringIO() self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq")) self.assertEqual(data, handle.getvalue())
def test_fastq_rna(self): """Read and write back simple example with ambiguous RNA""" #First in upper case... data = "@%s\n%s\n+\n%s\n" \ % ("id descr goes here", ambiguous_rna_letters.upper(), "".join(chr(33+q) for q in range(len(ambiguous_rna_letters)))) handle = StringIO() self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq")) self.assertEqual(data, handle.getvalue()) #Now in lower case... data = "@%s\n%s\n+\n%s\n" \ % ("id descr goes here", ambiguous_rna_letters.lower(), "".join(chr(33+q) for q in range(len(ambiguous_rna_letters)))) handle = StringIO() self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq")) self.assertEqual(data, handle.getvalue())
def test_format_branch_length(self): """Custom format string for Newick branch length serialization.""" tree = Phylo.read(StringIO('A:0.1;'), 'newick') mem_file = StringIO() Phylo.write(tree, mem_file, 'newick', format_branch_length='%.0e') # Py2.5 compat: Windows with Py2.5- represents this as 1e-001; # on all other platforms it's 1e-01 self.assertTrue( mem_file.getvalue().strip() in ['A:1e-01;', 'A:1e-001;'])
def test_format_phylip(self): dm = DistanceMatrix(self.names, self.matrix) handle = StringIO() dm.format_phylip(handle) lines = handle.getvalue().splitlines() self.assertEqual(len(lines), len(dm) + 1) self.assertTrue(lines[0].endswith(str(len(dm)))) for name, line in zip(self.names, lines[1:]): self.assertTrue(line.startswith(name))
def testParse(self): f = open("./SCOP/dir.cla.scop.txt_test") try: cla = f.read() f.close() f = open("./SCOP/dir.des.scop.txt_test") des = f.read() f.close() f = open("./SCOP/dir.hie.scop.txt_test") hie = f.read() finally: f.close() scop = Scop(StringIO(cla), StringIO(des), StringIO(hie)) cla_out = StringIO() scop.write_cla(cla_out) lines = zip(cla.rstrip().split('\n'), cla_out.getvalue().rstrip().split('\n')) for expected_line, line in lines: self.assertTrue(self._compare_cla_lines(expected_line, line)) des_out = StringIO() scop.write_des(des_out) self.assertEqual(des_out.getvalue(), des) hie_out = StringIO() scop.write_hie(hie_out) self.assertEqual(hie_out.getvalue(), hie) domain = scop.getDomainBySid("d1hbia_") self.assertEqual(domain.sunid, 14996) domains = scop.getDomains() self.assertEqual(len(domains), 14) self.assertEqual(domains[4].sunid, 14988) dom = scop.getNodeBySunid(-111) self.assertEqual(dom, None) dom = scop.getDomainBySid("no such domain") self.assertEqual(dom, None)
def testParse(self): f = open("./SCOP/dir.cla.scop.txt_test") try: cla = f.read() f.close() f = open("./SCOP/dir.des.scop.txt_test") des = f.read() f.close() f = open("./SCOP/dir.hie.scop.txt_test") hie = f.read() finally: f.close() scop = Scop(StringIO(cla), StringIO(des), StringIO(hie)) cla_out = StringIO() scop.write_cla(cla_out) lines = zip(cla.rstrip().split('\n'), cla_out.getvalue().rstrip().split('\n')) for expected_line, line in lines: self.assertTrue(self._compare_cla_lines(expected_line, line)) des_out = StringIO() scop.write_des(des_out) self.assertEqual(des_out.getvalue(), des) hie_out = StringIO() scop.write_hie(hie_out) self.assertEqual(hie_out.getvalue(), hie) domain = scop.getDomainBySid("d1hbia_") self.assertEqual(domain.sunid, 14996) domains = scop.getDomains() self.assertEqual(len(domains), 14) self.assertEqual(domains[4].sunid, 14988) dom = scop.getNodeBySunid(-111) self.assertEqual(dom, None) dom = scop.getDomainBySid("no such domain") self.assertEqual(dom, None)
def test_write(self): correct_output_a = """graph [ directed 1 node [ id 0 label "1" a 1 ] node [ id 1 label "2" ] edge [ source 0 target 1 x "x" ] edge [ source 1 target 0 label "zzzz" ] ]""" correct_output_b = """graph [ directed 1 node [ id 0 label "2" ] node [ id 1 label "1" a 1 ] edge [ source 1 target 0 x "x" ] edge [ source 0 target 1 label "zzzz" ] ]""" out = StringIO() writer = GmlWriter(out) graph = DiGraph() graph.add_node(1, {'a': 1}) graph.add_edge(1, 2, {'x': 'x'}) graph.add_edge(2, 1, "zzzz") writer.write(graph) self.assertIn(out.getvalue(), set([correct_output_a, correct_output_b]))
def test_locus_line_topogoly(self): """Test if chromosome topology is conserved""" record = SeqIO.read('GenBank/DS830848.gb', 'genbank') self.assertEqual(record.annotations['topology'], 'linear') out_handle = StringIO() SeqIO.write([record], out_handle, 'genbank') first_line = out_handle.getvalue().split('\n')[0] self.assertIn('linear', first_line) with open('GenBank/DS830848.gb', 'r') as fh: orig_first_line = fh.readline().strip() self.assertEqual(first_line, orig_first_line)
def test_locus_line_topogoly(self): """Test if chromosome topology is conserved""" record = SeqIO.read('GenBank/DS830848.gb', 'genbank') self.assertEqual(record.annotations['topology'], 'linear') out_handle = StringIO() SeqIO.write([record], out_handle, 'genbank') first_line = out_handle.getvalue().split('\n')[0] self.assertIn('linear', first_line) with open('GenBank/DS830848.gb', 'r') as fh: orig_first_line = fh.readline().strip() self.assertEqual(first_line, orig_first_line)
def check_convert(in_filename, in_format, out_format, alphabet=None): records = list(SeqIO.parse(in_filename, in_format, alphabet)) # Write it out... handle = StringIO() qual_truncate = truncation_expected(out_format) with warnings.catch_warnings(): if qual_truncate: warnings.simplefilter('ignore', UserWarning) SeqIO.write(records, handle, out_format) handle.seek(0) # Now load it back and check it agrees, records2 = list(SeqIO.parse(handle, out_format, alphabet)) compare_records(records, records2, qual_truncate) # Finally, use the convert function, and check that agrees: handle2 = StringIO() with warnings.catch_warnings(): if qual_truncate: warnings.simplefilter('ignore', UserWarning) SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet) # We could re-parse this, but it is simpler and stricter: assert handle.getvalue() == handle2.getvalue()
def test_write(self): correct_output_a = """graph [ directed 1 node [ id 0 label "1" a 1 ] node [ id 1 label "2" ] edge [ source 0 target 1 x "x" ] edge [ source 1 target 0 label "zzzz" ] ]""" correct_output_b = """graph [ directed 1 node [ id 0 label "2" ] node [ id 1 label "1" a 1 ] edge [ source 1 target 0 x "x" ] edge [ source 0 target 1 label "zzzz" ] ]""" out = StringIO() writer = GmlWriter(out) graph = DiGraph() graph.add_node(1, {'a' : 1 }) graph.add_edge(1, 2, {'x' : 'x'}) graph.add_edge(2, 1, "zzzz") writer.write(graph) self.assertIn(out.getvalue(), set([correct_output_a, correct_output_b]))
def check_convert(in_filename, in_format, out_format, alphabet=None): records = list(SeqIO.parse(in_filename, in_format, alphabet)) # Write it out... handle = StringIO() qual_truncate = truncation_expected(out_format) with warnings.catch_warnings(): if qual_truncate: warnings.simplefilter("ignore", BiopythonWarning) SeqIO.write(records, handle, out_format) handle.seek(0) # Now load it back and check it agrees, records2 = list(SeqIO.parse(handle, out_format, alphabet)) compare_records(records, records2, qual_truncate) # Finally, use the convert function, and check that agrees: handle2 = StringIO() with warnings.catch_warnings(): if qual_truncate: warnings.simplefilter("ignore", BiopythonWarning) SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet) # We could re-parse this, but it is simpler and stricter: assert handle.getvalue() == handle2.getvalue()
def __format__(self, format_spec): """Returns the alignment as a string in the specified file format. This method supports the python format() function added in Python 2.6/3.0. The format_spec should be a lower case string supported by Bio.AlignIO as an output file format. See also the alignment's format() method.""" if format_spec: from Bio._py3k import StringIO from Bio import AlignIO handle = StringIO() AlignIO.write([self], handle, format_spec) return handle.getvalue() else: #Follow python convention and default to using __str__ return str(self)
def __format__(self, format_spec): """Returns the alignment as a string in the specified file format. This method supports the python format() function added in Python 2.6/3.0. The format_spec should be a lower case string supported by Bio.AlignIO as an output file format. See also the alignment's format() method.""" if format_spec: from Bio._py3k import StringIO from Bio import AlignIO handle = StringIO() AlignIO.write([self], handle, format_spec) return handle.getvalue() else: # Follow python convention and default to using __str__ return str(self)
def __format__(self, format_spec): """Serialize the tree as a string in the specified file format. This method supports the ``format`` built-in function added in Python 2.6/3.0. :param format_spec: a lower-case string supported by `Bio.Phylo.write` as an output file format. """ if format_spec: from Bio._py3k import StringIO from Bio.Phylo import _io handle = StringIO() _io.write([self], handle, format_spec) return handle.getvalue() else: # Follow python convention and default to using __str__ return str(self)
def __format__(self, format_spec): """Serialize the tree as a string in the specified file format. This method supports the ``format`` built-in function added in Python 2.6/3.0. :param format_spec: a lower-case string supported by `Bio.Phylo.write` as an output file format. """ if format_spec: from Bio._py3k import StringIO from Bio.Phylo import _io handle = StringIO() _io.write([self], handle, format_spec) return handle.getvalue() else: # Follow python convention and default to using __str__ return str(self)
def write_to_string(self, output='PS', dpi=72): """ write(self, output='PS') o output String indicating output format, one of PS, PDF, SVG, JPG, BMP, GIF, PNG, TIFF or TIFF (as specified for the write method). o dpi Resolution (dots per inch) for bitmap formats. Return the completed drawing as a string in a prescribed format """ #The ReportLab drawToString method, which this function used to call, #just uses a cStringIO or StringIO handle with the drawToFile method. #In order to put all our complicated file format specific code in one #place we'll just use a StringIO handle here: from Bio._py3k import StringIO handle = StringIO() self.write(handle, output, dpi) return handle.getvalue()
def write_to_string(self, output='PS', dpi=72): """ write(self, output='PS') o output String indicating output format, one of PS, PDF, SVG, JPG, BMP, GIF, PNG, TIFF or TIFF (as specified for the write method). o dpi Resolution (dots per inch) for bitmap formats. Return the completed drawing as a string in a prescribed format """ #The ReportLab drawToString method, which this function used to call, #just uses a cStringIO or StringIO handle with the drawToFile method. #In order to put all our complicated file format specific code in one #place we'll just use a StringIO handle here: from Bio._py3k import StringIO handle = StringIO() self.write(handle, output, dpi) return handle.getvalue()
def test_write(self): result = StringIO() e1 = EnrichmentEntry("9951", "structure-specific DNA binding", 0.032301032301) e1.corrections = {'bh_fdr': 1.0, 'bonferroni': 1.0} e1.attrs = {'plot' : [0.1, 0.2, 1.0, 0.1]} e2 = EnrichmentEntry("9916", "polysomal ribosome", 0.025) e2.corrections = {'bh_fdr': 1.0, 'bonferroni': 1.0} e2.attrs = {} en = Enrichment("ranked parent-child", [e1, e2], ["Cycles found..."], ['bh_fdr', 'bonferroni']) writer = EnrichmentWriter(result) writer.write(en) expected = ("# ranked parent-child\r\n" "# 2 1\r\n" "id\tname\tp-value\tbh_fdr|bonferroni\tattributes\r\n" "9951\tstructure-specific DNA binding\t0.032301032301\t1.0|1.0\t{'plot': [0.1, 0.2, 1.0, 0.1]}\r\n" "9916\tpolysomal ribosome\t0.025\t1.0|1.0\t{}\r\n" "!\tCycles found...\r\n") self.assertEqual(expected, result.getvalue())
def test_write(self): terms_to_write = [OntologyTerm("GO:0009628", "response to abiotic stimulus", {"is_a" : ["GO:0050896"]}), OntologyTerm("GO:0022627", "cytosolic small ribosomal subunit", {"is_a" : ["GO:0015935", "GO:0044445"]})] f = StringIO() writer = OboWriter(f, version = "1.2") writer.write(terms_to_write) expected_output = """format-version:1.2 [Term] id: GO:0009628 name: response to abiotic stimulus is_a: GO:0050896 [Term] id: GO:0022627 name: cytosolic small ribosomal subunit is_a: GO:0015935 is_a: GO:0044445 """ self.assertEqual(expected_output, f.getvalue())
def test_read_write_clustal(self): """Test the base alignment stuff.""" path = os.path.join(os.getcwd(), "Clustalw", "opuntia.aln") alignment = AlignIO.read(path, "clustal", alphabet=Alphabet.Gapped(IUPAC.unambiguous_dna)) self.assertEqual(len(alignment), 7) seq_record = alignment[0] self.assertEqual(seq_record.description, "gi|6273285|gb|AF191659.1|AF191") self.assertEqual(seq_record.seq, Seq("TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCCATTGATTTAGTGTACCAGA")) seq_record = alignment[1] self.assertEqual(seq_record.description, "gi|6273284|gb|AF191658.1|AF191") self.assertEqual(seq_record.seq, "TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATA--------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA") seq_record = alignment[2] self.assertEqual(seq_record.description, "gi|6273287|gb|AF191661.1|AF191") self.assertEqual(seq_record.seq, "TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA") seq_record = alignment[3] self.assertEqual(seq_record.description, "gi|6273286|gb|AF191660.1|AF191") self.assertEqual(seq_record.seq, "TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTATAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA") seq_record = alignment[4] self.assertEqual(seq_record.description, "gi|6273290|gb|AF191664.1|AF191") self.assertEqual(seq_record.seq, "TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA") seq_record = alignment[5] self.assertEqual(seq_record.description, "gi|6273289|gb|AF191663.1|AF191") self.assertEqual(seq_record.seq, "TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTATACCAGA") seq_record = alignment[6] self.assertEqual(seq_record.description, "gi|6273291|gb|AF191665.1|AF191") self.assertEqual(seq_record.seq, "TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA") self.assertEqual(alignment.get_alignment_length(), 156) align_info = AlignInfo.SummaryInfo(alignment) consensus = align_info.dumb_consensus() self.assertIsInstance(consensus, Seq) self.assertEqual(consensus, "TATACATTAAAGXAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTXCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA") dictionary = align_info.replacement_dictionary(["N"]) self.assertEqual(len(dictionary), 16) self.assertAlmostEqual(dictionary[("A", "A")], 1395.0, places=1) self.assertAlmostEqual(dictionary[("A", "C")], 3.0, places=1) self.assertAlmostEqual(dictionary[("A", "G")], 13.0, places=1) self.assertAlmostEqual(dictionary[("A", "T")], 6.0, places=1) self.assertAlmostEqual(dictionary[("C", "A")], 3.0, places=1) self.assertAlmostEqual(dictionary[("C", "C")], 271.0, places=1) self.assertAlmostEqual(dictionary[("C", "G")], 0, places=1) self.assertAlmostEqual(dictionary[("C", "T")], 16.0, places=1) self.assertAlmostEqual(dictionary[("G", "A")], 5.0, places=1) self.assertAlmostEqual(dictionary[("G", "C")], 0, places=1) self.assertAlmostEqual(dictionary[("G", "G")], 480.0, places=1) self.assertAlmostEqual(dictionary[("G", "T")], 0, places=1) self.assertAlmostEqual(dictionary[("T", "A")], 6.0, places=1) self.assertAlmostEqual(dictionary[("T", "C")], 12.0, places=1) self.assertAlmostEqual(dictionary[("T", "G")], 0, places=1) self.assertAlmostEqual(dictionary[("T", "T")], 874.0, places=1) matrix = align_info.pos_specific_score_matrix(consensus, ["N"]) self.assertEqual(str(matrix), """\ A C G T T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 1.0 0.0 0.0 6.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 X 4.0 0.0 3.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 4.0 A 4.0 0.0 0.0 0.0 T 0.0 0.0 0.0 3.0 A 3.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 C 1.0 6.0 0.0 0.0 A 6.0 0.0 0.0 1.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 X 0.0 3.0 0.0 4.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 2.0 0.0 5.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 1.0 0.0 6.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 T 0.0 0.0 0.0 7.0 G 1.0 0.0 6.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 """) matrix = align_info.pos_specific_score_matrix(chars_to_ignore=["N"]) self.assertEqual(str(matrix), """\ A C G T T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 1.0 0.0 0.0 6.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 X 4.0 0.0 3.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 4.0 A 4.0 0.0 0.0 0.0 T 0.0 0.0 0.0 3.0 A 3.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 C 1.0 6.0 0.0 0.0 A 6.0 0.0 0.0 1.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 X 0.0 3.0 0.0 4.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 2.0 0.0 5.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 1.0 0.0 6.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 T 0.0 0.0 0.0 7.0 G 1.0 0.0 6.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 """) second_seq = alignment[1].seq matrix = align_info.pos_specific_score_matrix(second_seq, ["N"]) self.assertEqual(str(matrix), """\ A C G T T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 1.0 0.0 0.0 6.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 4.0 0.0 3.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 4.0 A 4.0 0.0 0.0 0.0 - 0.0 0.0 0.0 3.0 - 3.0 0.0 0.0 0.0 - 0.0 0.0 0.0 1.0 - 1.0 0.0 0.0 0.0 - 0.0 0.0 0.0 1.0 - 1.0 0.0 0.0 0.0 - 0.0 0.0 0.0 1.0 - 1.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 C 1.0 6.0 0.0 0.0 A 6.0 0.0 0.0 1.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 3.0 0.0 4.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 2.0 0.0 5.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 1.0 0.0 6.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 T 0.0 0.0 0.0 7.0 G 1.0 0.0 6.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 """) value = align_info.information_content(5, 50, chars_to_ignore=["N"]) self.assertAlmostEqual(value, 88.42, places=2) value = align_info.information_content(chars_to_ignore=["N"]) self.assertAlmostEqual(value, 287.55, places=2) e_freq = {"G": 0.25, "C": 0.25, "A": 0.25, "T": 0.25} e_freq_table = FreqTable.FreqTable(e_freq, FreqTable.FREQ, IUPAC.unambiguous_dna) value = align_info.information_content(e_freq_table=e_freq_table, chars_to_ignore=["N"]) self.assertAlmostEqual(value, 287.55, places=2) self.assertEqual(align_info.get_column(1), "AAAAAAA") self.assertAlmostEqual(align_info.ic_vector[1], 2.00, places=2) self.assertEqual(align_info.get_column(7), "TTTATTT") self.assertAlmostEqual(align_info.ic_vector[7], 1.41, places=2) handle = StringIO() AlignInfo.print_info_content(align_info, fout=handle) self.assertEqual(handle.getvalue(), """\ 0 T 2.000 1 A 2.000 2 T 2.000 3 A 2.000 4 C 2.000 5 A 2.000 6 T 2.000 7 T 1.408 8 A 2.000 9 A 2.000 10 A 2.000 11 G 2.000 12 A 1.015 13 A 2.000 14 G 2.000 15 G 2.000 16 G 2.000 17 G 2.000 18 G 2.000 19 A 2.000 20 T 2.000 21 G 2.000 22 C 2.000 23 G 2.000 24 G 2.000 25 A 2.000 26 T 2.000 27 A 2.000 28 A 2.000 29 A 2.000 30 T 2.000 31 G 2.000 32 G 2.000 33 A 2.000 34 A 2.000 35 A 2.000 36 G 2.000 37 G 2.000 38 C 2.000 39 G 2.000 40 A 2.000 41 A 2.000 42 A 2.000 43 G 2.000 44 A 2.000 45 A 2.000 46 A 2.000 47 G 2.000 48 A 2.000 49 A 2.000 50 T 2.000 51 A 2.000 52 T 2.000 53 A 2.000 54 T 2.000 55 A 2.000 56 - 0.682 57 - 0.682 58 - 0.333 59 - 0.333 60 - -0.115 61 - -0.115 62 - -0.115 63 - -0.115 64 - -0.115 65 - -0.115 66 A 2.000 67 T 2.000 68 A 2.000 69 T 2.000 70 A 2.000 71 T 2.000 72 T 2.000 73 T 2.000 74 C 1.408 75 A 1.408 76 A 2.000 77 A 2.000 78 T 2.000 79 T 2.000 80 T 1.015 81 C 2.000 82 C 2.000 83 T 2.000 84 T 2.000 85 A 2.000 86 T 2.000 87 A 2.000 88 T 2.000 89 A 2.000 90 C 1.137 91 C 2.000 92 C 2.000 93 A 2.000 94 A 2.000 95 A 2.000 96 T 2.000 97 A 2.000 98 T 2.000 99 A 2.000 100 A 2.000 101 A 2.000 102 A 2.000 103 A 2.000 104 T 2.000 105 A 2.000 106 T 2.000 107 C 2.000 108 T 2.000 109 A 2.000 110 A 2.000 111 T 2.000 112 A 2.000 113 A 2.000 114 A 2.000 115 T 2.000 116 T 2.000 117 A 2.000 118 G 2.000 119 A 2.000 120 T 2.000 121 G 2.000 122 A 2.000 123 A 2.000 124 T 2.000 125 A 2.000 126 T 2.000 127 C 2.000 128 A 2.000 129 A 2.000 130 A 2.000 131 G 2.000 132 A 2.000 133 A 2.000 134 T 2.000 135 C 2.000 136 C 1.408 137 A 2.000 138 T 2.000 139 T 2.000 140 G 2.000 141 A 2.000 142 T 2.000 143 T 2.000 144 T 2.000 145 A 2.000 146 G 2.000 147 T 2.000 148 G 1.408 149 T 2.000 150 A 2.000 151 C 2.000 152 C 2.000 153 A 2.000 154 G 2.000 155 A 2.000 """)
def check_simple_write_read(alignments, indent=" "): # print(indent+"Checking we can write and then read back these alignments") for format in test_write_read_align_with_seq_count: records_per_alignment = len(alignments[0]) for a in alignments: if records_per_alignment != len(a): records_per_alignment = None # Can we expect this format to work? if not records_per_alignment \ and format not in test_write_read_alignment_formats: continue print(indent + "Checking can write/read as '%s' format" % format) # Going to write to a handle... handle = StringIO() try: c = AlignIO.write(alignments, handle=handle, format=format) assert c == len(alignments) except ValueError as e: # This is often expected to happen, for example when we try and # write sequences of different lengths to an alignment file. print(indent + "Failed: %s" % str(e)) # Carry on to the next format: continue # First, try with the seq_count if records_per_alignment: handle.flush() handle.seek(0) try: alignments2 = list(AlignIO.parse(handle=handle, format=format, seq_count=records_per_alignment)) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError("%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(alignments2))) simple_alignment_comparison(alignments, alignments2, format) if format in test_write_read_alignment_formats: # Don't need the seq_count handle.flush() handle.seek(0) try: alignments2 = list(AlignIO.parse(handle=handle, format=format)) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError("%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(alignments2))) simple_alignment_comparison(alignments, alignments2, format) if len(alignments) > 1: # Try writing just one Alignment (not a list) handle = StringIO() SeqIO.write(alignments[0], handle, format) assert handle.getvalue() == alignments[0].format(format)
def check_simple_write_read(alignments, indent=" "): # print(indent+"Checking we can write and then read back these alignments") for format in test_write_read_align_with_seq_count: records_per_alignment = len(alignments[0]) for a in alignments: if records_per_alignment != len(a): records_per_alignment = None # Can we expect this format to work? if not records_per_alignment \ and format not in test_write_read_alignment_formats: continue print(indent + "Checking can write/read as '%s' format" % format) # Going to write to a handle... handle = StringIO() try: c = AlignIO.write(alignments, handle=handle, format=format) assert c == len(alignments) except ValueError as e: # This is often expected to happen, for example when we try and # write sequences of different lengths to an alignment file. print(indent + "Failed: %s" % str(e)) # Carry on to the next format: continue # First, try with the seq_count if records_per_alignment: handle.flush() handle.seek(0) try: alignments2 = list( AlignIO.parse(handle=handle, format=format, seq_count=records_per_alignment)) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError( "%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(alignments2))) simple_alignment_comparison(alignments, alignments2, format) if format in test_write_read_alignment_formats: # Don't need the seq_count handle.flush() handle.seek(0) try: alignments2 = list(AlignIO.parse(handle=handle, format=format)) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError( "%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(alignments2))) simple_alignment_comparison(alignments, alignments2, format) if len(alignments) > 1: # Try writing just one Alignment (not a list) handle = StringIO() AlignIO.write(alignments[0:1], handle, format) assert handle.getvalue() == alignments[0].format(format)
def CifAtomIterator(handle): """Return SeqRecord objects for each chain in a PDB file. The sequences are derived from the 3D structure (ATOM records), not the SEQRES lines in the PDB file header. Unrecognised three letter amino acid codes (e.g. "CSD") from HETATM entries are converted to "X" in the sequence. In addition to information from the PDB header (which is the same for all records), the following chain specific information is placed in the annotation: record.annotations["residues"] = List of residue ID strings record.annotations["chain"] = Chain ID (typically A, B ,...) record.annotations["model"] = Model ID (typically zero) Where amino acids are missing from the structure, as indicated by residue numbering, the sequence is filled in with 'X' characters to match the size of the missing region, and None is included as the corresponding entry in the list record.annotations["residues"]. This function uses the Bio.PDB module to do most of the hard work. The annotation information could be improved but this extra parsing should be done in parse_pdb_header, not this module. This gets called internally via Bio.SeqIO for the atom based interpretation of the PDB file format: >>> from Bio import SeqIO >>> for record in SeqIO.parse("PDB/1A8O.cif", "cif-atom"): ... print("Record id %s, chain %s" % (record.id, record.annotations["chain"])) ... Record id 1A8O:A, chain A Equivalently, >>> with open("PDB/1A8O.cif") as handle: ... for record in CifAtomIterator(handle): ... print("Record id %s, chain %s" % (record.id, record.annotations["chain"])) ... Record id 1A8O:A, chain A """ # TODO - Add record.annotations to the doctest, esp the residues (not working?) # Only import parser when needed, to avoid/delay NumPy dependency in SeqIO from Bio.PDB.MMCIFParser import MMCIFParser from Bio.PDB.MMCIF2Dict import MMCIF2Dict # The PdbAtomIterator uses UndoHandle to peek at the first line and get the # PDB ID. The equivalent for mmCIF is the _entry.id field. AFAIK, the mmCIF # format does not constrain the order of fields, so we need to parse the # entire file using MMCIF2Dict. We copy the contents of the handle into a # StringIO buffer first, so that both MMCIF2Dict and MMCIFParser can # consume the handle. buffer = StringIO() shutil.copyfileobj(handle, buffer) # check if file is empty if len(buffer.getvalue()) == 0: raise ValueError("Empty file.") buffer.seek(0) mmcif_dict = MMCIF2Dict(buffer) if "_entry.id" in mmcif_dict: pdb_id = mmcif_dict["_entry.id"] if isinstance(pdb_id, list): pdb_id = pdb_id[0] else: warnings.warn("Could not find the '_entry.id' field; can't determine " "PDB ID.", BiopythonParserWarning) pdb_id = '????' buffer.seek(0) struct = MMCIFParser().get_structure(pdb_id, buffer) for record in AtomIterator(pdb_id, struct): yield record
def CifAtomIterator(handle): """Return SeqRecord objects for each chain in a PDB file. The sequences are derived from the 3D structure (ATOM records), not the SEQRES lines in the PDB file header. Unrecognised three letter amino acid codes (e.g. "CSD") from HETATM entries are converted to "X" in the sequence. In addition to information from the PDB header (which is the same for all records), the following chain specific information is placed in the annotation: record.annotations["residues"] = List of residue ID strings record.annotations["chain"] = Chain ID (typically A, B ,...) record.annotations["model"] = Model ID (typically zero) Where amino acids are missing from the structure, as indicated by residue numbering, the sequence is filled in with 'X' characters to match the size of the missing region, and None is included as the corresponding entry in the list record.annotations["residues"]. This function uses the Bio.PDB module to do most of the hard work. The annotation information could be improved but this extra parsing should be done in parse_pdb_header, not this module. This gets called internally via Bio.SeqIO for the atom based interpretation of the PDB file format: >>> from Bio import SeqIO >>> for record in SeqIO.parse("PDB/1A8O.cif", "cif-atom"): ... print("Record id %s, chain %s" % (record.id, record.annotations["chain"])) ... Record id 1A8O:A, chain A Equivalently, >>> with open("PDB/1A8O.cif") as handle: ... for record in CifAtomIterator(handle): ... print("Record id %s, chain %s" % (record.id, record.annotations["chain"])) ... Record id 1A8O:A, chain A """ # TODO - Add record.annotations to the doctest, esp the residues (not working?) # Only import parser when needed, to avoid/delay NumPy dependency in SeqIO from Bio.PDB.MMCIFParser import MMCIFParser from Bio.PDB.MMCIF2Dict import MMCIF2Dict # The PdbAtomIterator uses UndoHandle to peek at the first line and get the # PDB ID. The equivalent for mmCIF is the _entry.id field. AFAIK, the mmCIF # format does not constrain the order of fields, so we need to parse the # entire file using MMCIF2Dict. We copy the contents of the handle into a # StringIO buffer first, so that both MMCIF2Dict and MMCIFParser can # consume the handle. buffer = StringIO() shutil.copyfileobj(handle, buffer) # check if file is empty if len(buffer.getvalue()) == 0: raise ValueError("Empty file.") buffer.seek(0) mmcif_dict = MMCIF2Dict(buffer) if "_entry.id" in mmcif_dict: pdb_id = mmcif_dict["_entry.id"] if isinstance(pdb_id, list): pdb_id = pdb_id[0] else: warnings.warn("Could not find the '_entry.id' field; can't determine " "PDB ID.", BiopythonParserWarning) pdb_id = "????" buffer.seek(0) struct = MMCIFParser().get_structure(pdb_id, buffer) for record in AtomIterator(pdb_id, struct): yield record