Exemplo n.º 1
0
def t_ensembl_locus():
    line = "LOCUS       HG531_PATCH 1000000 bp DNA HTG 18-JUN-2011\n"
    s = GenBank.Scanner.GenBankScanner()
    c = GenBank._FeatureConsumer(True)
    s._feed_first_line(c, line)
    assert c.data.name == "HG531_PATCH", c.data.name
    assert c._expected_size == 1000000, c._expected_size

    line = "LOCUS       HG531_PATCH 759984 bp DNA HTG 18-JUN-2011\n"
    s = GenBank.Scanner.GenBankScanner()
    c = GenBank._FeatureConsumer(True)
    s._feed_first_line(c, line)
    assert c.data.name == "HG531_PATCH", c.data.name
    assert c._expected_size == 759984, c._expected_size

    line = "LOCUS       HG506_HG1000_1_PATCH 814959 bp DNA HTG 18-JUN-2011\n"
    s = GenBank.Scanner.GenBankScanner()
    c = GenBank._FeatureConsumer(True)
    s._feed_first_line(c, line)
    assert c.data.name == "HG506_HG1000_1_PATCH", c.data.name
    assert c._expected_size == 814959, c._expected_size

    line = "LOCUS       HG506_HG1000_1_PATCH 1219964 bp DNA HTG 18-JUN-2011\n"
    s = GenBank.Scanner.GenBankScanner()
    c = GenBank._FeatureConsumer(True)
    s._feed_first_line(c, line)
    assert c.data.name == "HG506_HG1000_1_PATCH", c.data.name
    assert c._expected_size == 1219964, c._expected_size

    print("Done")
Exemplo n.º 2
0
def t_ensembl_locus():
    line = "LOCUS       HG531_PATCH 1000000 bp DNA HTG 18-JUN-2011\n"
    s = GenBank.Scanner.GenBankScanner()
    c = GenBank._FeatureConsumer(True)
    s._feed_first_line(c, line)
    assert c.data.name == "HG531_PATCH", c.data.name
    assert c._expected_size == 1000000, c._expected_size

    line = "LOCUS       HG531_PATCH 759984 bp DNA HTG 18-JUN-2011\n"
    s = GenBank.Scanner.GenBankScanner()
    c = GenBank._FeatureConsumer(True)
    s._feed_first_line(c, line)
    assert c.data.name == "HG531_PATCH", c.data.name
    assert c._expected_size == 759984, c._expected_size

    line = "LOCUS       HG506_HG1000_1_PATCH 814959 bp DNA HTG 18-JUN-2011\n"
    s = GenBank.Scanner.GenBankScanner()
    c = GenBank._FeatureConsumer(True)
    s._feed_first_line(c, line)
    assert c.data.name == "HG506_HG1000_1_PATCH", c.data.name
    assert c._expected_size == 814959, c._expected_size

    line = "LOCUS       HG506_HG1000_1_PATCH 1219964 bp DNA HTG 18-JUN-2011\n"
    s = GenBank.Scanner.GenBankScanner()
    c = GenBank._FeatureConsumer(True)
    s._feed_first_line(c, line)
    assert c.data.name == "HG506_HG1000_1_PATCH", c.data.name
    assert c._expected_size == 1219964, c._expected_size

    print("Done")
Exemplo n.º 3
0
 def test_topology_genbank(self):
     """Check GenBank LOCUS line parsing."""
     # This is a bit low level, but can test pasing the LOCUS line only
     tests = [
         ("LOCUS       U00096",
          None, None, None),
         # This example is actually fungal, accession U49845 from Saccharomyces cerevisiae:
         ("LOCUS       SCU49845     5028 bp    DNA             PLN       21-JUN-1999",
          None, "DNA", "PLN"),
         ("LOCUS       AB070938                6497 bp    DNA     linear   BCT 11-OCT-2001",
          "linear", "DNA", "BCT"),
         ("LOCUS       NC_005816               9609 bp    DNA     circular BCT 21-JUL-2008",
          "circular", "DNA", "BCT"),
         ("LOCUS       SCX3_BUTOC                64 aa            linear   INV 16-OCT-2001",
          "linear", None, "INV"),
     ]
     for (line, topo, mol_type, div) in tests:
         scanner = Scanner.GenBankScanner()
         consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
         scanner._feed_first_line(consumer, line)
         t = consumer.data.annotations.get('topology', None)
         self.assertEqual(t, topo,
                          "Wrong topology %r not %r from %r" % (t, topo, line))
         mt = consumer.data.annotations.get('molecule_type', None)
         self.assertEqual(mt, mol_type,
                          "Wrong molecule_type %r not %r from %r" %
                          (mt, mol_type, line))
         d = consumer.data.annotations.get('data_file_division', None)
         self.assertEqual(d, div,
                          "Wrong division %r not %r from %r" % (d, div, line))
Exemplo n.º 4
0
 def test_topology_genbank(self):
     """Check GenBank LOCUS line parsing."""
     # This is a bit low level, but can test pasing the LOCUS line only
     tests = [
         ("LOCUS       U00096", None, None, None),
         # This example is actually fungal, accession U49845 from Saccharomyces cerevisiae:
         ("LOCUS       SCU49845     5028 bp    DNA             PLN       21-JUN-1999",
          None, "DNA", "PLN"),
         ("LOCUS       AB070938                6497 bp    DNA     linear   BCT 11-OCT-2001",
          "linear", "DNA", "BCT"),
         ("LOCUS       NC_005816               9609 bp    DNA     circular BCT 21-JUL-2008",
          "circular", "DNA", "BCT"),
         ("LOCUS       SCX3_BUTOC                64 aa            linear   INV 16-OCT-2001",
          "linear", None, "INV"),
     ]
     for (line, topo, mol_type, div) in tests:
         scanner = Scanner.GenBankScanner()
         consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
         scanner._feed_first_line(consumer, line)
         t = consumer.data.annotations.get('topology', None)
         self.assertEqual(
             t, topo, "Wrong topology %r not %r from %r" % (t, topo, line))
         mt = consumer.data.annotations.get('molecule_type', None)
         self.assertEqual(
             mt, mol_type,
             "Wrong molecule_type %r not %r from %r" % (mt, mol_type, line))
         d = consumer.data.annotations.get('data_file_division', None)
         self.assertEqual(
             d, div, "Wrong division %r not %r from %r" % (d, div, line))
Exemplo n.º 5
0
 def test_topology_genbank(self):
     """Check GenBank topology parsing."""
     # This is a bit low level, but can test pasing the ID line only
     tests = [
         ("LOCUS       U00096", None),
         ("LOCUS       SCU49845     5028 bp    DNA             PLN       21-JUN-1999", None),
         ("LOCUS       AB070938                6497 bp    DNA     linear   BCT 11-OCT-2001", "linear"),
         ("LOCUS       NC_005816               9609 bp    DNA     circular BCT 21-JUL-2008", "circular"),
         ("LOCUS       SCX3_BUTOC                64 aa            linear   INV 16-OCT-2001", "linear"),
     ]
     for (line, topo) in tests:
         scanner = Scanner.GenBankScanner()
         consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
         scanner._feed_first_line(consumer, line)
         t = consumer.data.annotations.get('topology', None)
         self.assertEqual(t, topo,
                          "Wrong topology %r not %r from %r" % (t, topo, line))
Exemplo n.º 6
0
 def test_topology_embl(self):
     """Check EMBL topology parsing."""
     # This is a bit low level, but can test pasing the ID line only
     tests = [
         ("ID   X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP.", "linear"),
         ("ID   CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.", "linear"),
         ("ID   BSUB9999   standard; circular DNA; PRO; 4214630 BP.", "circular"),
         ("ID   SC10H5 standard; DNA; PRO; 4870 BP.", None),
         ("ID   NRP_AX000635; PRT; NR1; 15 SQ", None),
         ("ID   NRP0000016E; PRT; NR2; 5 SQ", None),
     ]
     for (line, topo) in tests:
         scanner = Scanner.EmblScanner()
         consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
         scanner._feed_first_line(consumer, line)
         t = consumer.data.annotations.get('topology', None)
         self.assertEqual(t, topo,
                          "Wrong topology %r not %r from %r" % (t, topo, line))
Exemplo n.º 7
0
 def test_topology_genbank(self):
     """Check GenBank LOCUS line parsing."""
     # This is a bit low level, but can test pasing the LOCUS line only
     tests = [
         ("LOCUS       U00096",
          None, None, None, None),
         # This example is actually fungal, accession U49845 from Saccharomyces cerevisiae:
         ("LOCUS       SCU49845     5028 bp    DNA             PLN       21-JUN-1999",
          None, "DNA", "PLN", None),
         ("LOCUS       AB070938                6497 bp    DNA     linear   BCT 11-OCT-2001",
          "linear", "DNA", "BCT", None),
         ("LOCUS       NC_005816               9609 bp    DNA     circular BCT 21-JUL-2008",
          "circular", "DNA", "BCT", None),
         ("LOCUS       SCX3_BUTOC                64 aa            linear   INV 16-OCT-2001",
          "linear", None, "INV", None),
         ("LOCUS       pEH010                  5743 bp    DNA     circular",
          "circular", "DNA", None, [BiopythonParserWarning]),
         # This is a test of the format > 80 chars long
         ("LOCUS       AZZZAA02123456789 1000000000 bp    DNA     linear   PRI 15-OCT-2018",
          "linear", "DNA", "PRI", None)
     ]
     for (line, topo, mol_type, div, warning_list) in tests:
         with warnings.catch_warnings(record=True) as caught:
             warnings.simplefilter("always")
             scanner = Scanner.GenBankScanner()
             consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
             scanner._feed_first_line(consumer, line)
             t = consumer.data.annotations.get('topology', None)
             self.assertEqual(t, topo,
                              "Wrong topology %r not %r from %r" % (t, topo, line))
             mt = consumer.data.annotations.get('molecule_type', None)
             self.assertEqual(mt, mol_type,
                              "Wrong molecule_type %r not %r from %r" %
                              (mt, mol_type, line))
             d = consumer.data.annotations.get('data_file_division', None)
             self.assertEqual(d, div,
                              "Wrong division %r not %r from %r" % (d, div, line))
             if warning_list is None:
                 self.assertEqual(len(caught), 0)
             else:
                 self.assertEqual(len(caught), len(warning_list))
                 for i, warning_class in enumerate(warning_list):
                     self.assertEqual(caught[i].category, warning_class)
Exemplo n.º 8
0
 def test_topology_embl(self):
     """Check EMBL ID line parsing."""
     # This is a bit low level, but can test pasing the ID line only
     tests = [
         # Modern examples with sequence version
         ("ID   X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP.", "linear",
          "mRNA", "PLN"),
         ("ID   CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.",
          "linear", "genomic DNA", "MAM"),
         # Example to match GenBank example used above:
         ("ID   U49845; SV 1; linear; genomic DNA; STD; FUN; 5028 BP.",
          "linear", "genomic DNA", "FUN"),
         # Old examples:
         ("ID   BSUB9999   standard; circular DNA; PRO; 4214630 BP.",
          "circular", "DNA", "PRO"),
         ("ID   SC10H5 standard; DNA; PRO; 4870 BP.", None, "DNA", "PRO"),
         # Patent example from 2016-06-10
         # ftp://ftp.ebi.ac.uk/pub/databases/embl/patent/
         ("ID   A01679; SV 1; linear; unassigned DNA; PAT; MUS; 12 BP.",
          "linear", "unassigned DNA", "MUS"),
         # Old patent examples
         ("ID   NRP_AX000635; PRT; NR1; 15 SQ", None, None, "NR1"),
         ("ID   NRP0000016E; PRT; NR2; 5 SQ", None, None, "NR2"),
         # KIPO patent examples
         ("ID   DI500001       STANDARD;      PRT;   111 AA.", None, None,
          None),
         ("ID   DI644510   standard; PRT;  1852 AA.", None, None, None),
     ]
     for (line, topo, mol_type, div) in tests:
         scanner = Scanner.EmblScanner()
         consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
         scanner._feed_first_line(consumer, line)
         t = consumer.data.annotations.get('topology', None)
         self.assertEqual(
             t, topo, "Wrong topology %r not %r from %r" % (t, topo, line))
         mt = consumer.data.annotations.get('molecule_type', None)
         self.assertEqual(
             mt, mol_type,
             "Wrong molecule_type %r not %r from %r" % (mt, mol_type, line))
         d = consumer.data.annotations.get('data_file_division', None)
         self.assertEqual(
             d, div, "Wrong division %r not %r from %r" % (d, div, line))
Exemplo n.º 9
0
 def test_topology_embl(self):
     """Check EMBL ID line parsing."""
     # This is a bit low level, but can test pasing the ID line only
     tests = [
         # Modern examples with sequence version
         ("ID   X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP.",
          "linear", "mRNA", "PLN"),
         ("ID   CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.",
          "linear", "genomic DNA", "MAM"),
         # Example to match GenBank example used above:
         ("ID   U49845; SV 1; linear; genomic DNA; STD; FUN; 5028 BP.",
          "linear", "genomic DNA", "FUN"),
         # Old examples:
         ("ID   BSUB9999   standard; circular DNA; PRO; 4214630 BP.",
          "circular", "DNA", "PRO"),
         ("ID   SC10H5 standard; DNA; PRO; 4870 BP.",
          None, "DNA", "PRO"),
         # Patent example from 2016-06-10
         # ftp://ftp.ebi.ac.uk/pub/databases/embl/patent/
         ("ID   A01679; SV 1; linear; unassigned DNA; PAT; MUS; 12 BP.",
          "linear", "unassigned DNA", "MUS"),
         # Old patent examples
         ("ID   NRP_AX000635; PRT; NR1; 15 SQ", None, None, "NR1"),
         ("ID   NRP0000016E; PRT; NR2; 5 SQ", None, None, "NR2"),
         # KIPO patent examples
         ("ID   DI500001       STANDARD;      PRT;   111 AA.", None, None, None),
         ("ID   DI644510   standard; PRT;  1852 AA.", None, None, None),
     ]
     for (line, topo, mol_type, div) in tests:
         scanner = Scanner.EmblScanner()
         consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
         scanner._feed_first_line(consumer, line)
         t = consumer.data.annotations.get('topology', None)
         self.assertEqual(t, topo,
                          "Wrong topology %r not %r from %r" % (t, topo, line))
         mt = consumer.data.annotations.get('molecule_type', None)
         self.assertEqual(mt, mol_type,
                          "Wrong molecule_type %r not %r from %r" %
                          (mt, mol_type, line))
         d = consumer.data.annotations.get('data_file_division', None)
         self.assertEqual(d, div,
                          "Wrong division %r not %r from %r" % (d, div, line))
Exemplo n.º 10
0
 def test_first_line_imgt(self):
     """Check IMGT ID line parsing."""
     # This is a bit low level, but can test pasing the ID line only
     tests = [
         ("ID   HLA00001   standard; DNA; HUM; 3503 BP.", None, "DNA",
          "HUM"),
         ("ID   HLA00001; SV 1; standard; DNA; HUM; 3503 BP.", None, "DNA",
          "HUM"),
     ]
     for (line, topo, mol_type, div) in tests:
         scanner = Scanner._ImgtScanner()
         consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
         scanner._feed_first_line(consumer, line)
         t = consumer.data.annotations.get('topology', None)
         self.assertEqual(
             t, topo, "Wrong topology %r not %r from %r" % (t, topo, line))
         mt = consumer.data.annotations.get('molecule_type', None)
         self.assertEqual(
             mt, mol_type,
             "Wrong molecule_type %r not %r from %r" % (mt, mol_type, line))
         d = consumer.data.annotations.get('data_file_division', None)
         self.assertEqual(
             d, div, "Wrong division %r not %r from %r" % (d, div, line))
Exemplo n.º 11
0
 def test_first_line_imgt(self):
     """Check IMGT ID line parsing."""
     # This is a bit low level, but can test pasing the ID line only
     tests = [
         ("ID   HLA00001   standard; DNA; HUM; 3503 BP.",
          None, "DNA", "HUM"),
         ("ID   HLA00001; SV 1; standard; DNA; HUM; 3503 BP.",
          None, "DNA", "HUM"),
     ]
     for (line, topo, mol_type, div) in tests:
         scanner = Scanner._ImgtScanner()
         consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
         scanner._feed_first_line(consumer, line)
         t = consumer.data.annotations.get('topology', None)
         self.assertEqual(t, topo,
                          "Wrong topology %r not %r from %r" % (t, topo, line))
         mt = consumer.data.annotations.get('molecule_type', None)
         self.assertEqual(mt, mol_type,
                          "Wrong molecule_type %r not %r from %r" %
                          (mt, mol_type, line))
         d = consumer.data.annotations.get('data_file_division', None)
         self.assertEqual(d, div,
                          "Wrong division %r not %r from %r" % (d, div, line))