Пример #1
0
        def test_records_begin(self):
            recs = {}

            recs[0] = SeqRecord(Seq("TCATAGGTATTTATTTTTAAATATGGTTTGCTTTATGGCTAGAA"
                                    "CACACCGATTACTTAAAATAGGATTAACC--CCCATACACTTTA"
                                    "AAAATGATTAAACAACATTTCTGCTGCTCGCTCACATTCTTCAT"
                                    "AGAAGATGACATAATGTATTTTCCTTTTGGTT"),
                                id="mm9.chr10",
                                name="mm9.chr10",
                                description="",
                                annotations={"start": 3009319,
                                             "srcSize": 129993255,
                                             "strand": 1,
                                             "size": 162})

            recs[1] = SeqRecord(Seq("TCACAGATATTTACTATTAAATATGGTTTGTTATATGGTTACGG"
                                    "TTCATAGGTTACTTGGAATTGGATTAACCTTCTTATTCATTGCA"
                                    "GAATTGGTTACACTGTGTTCTTGACCTTTGCTTGTTTTCTCCAT"
                                    "GGAAACTGATGTCAAATACTTTCCCTTTGGTT"),
                                id="oryCun1.scaffold_133159",
                                name="oryCun1.scaffold_133159",
                                description="",
                                annotations={"start": 11087,
                                             "srcSize": 13221,
                                             "strand": 1,
                                             "size": 164})

            fetched_recs = self.idx._get_record(34)

            for i in range(2):
                self.assertTrue(compare_record(recs[i], fetched_recs[i]))
Пример #2
0
 def loop(self, filename, format):
     original_records = list(SeqIO.parse(open(filename, "rU"), format))
     # now open a connection to load the database
     server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                           user = DBUSER, passwd = DBPASSWD,
                                           host = DBHOST, db = TESTDB)
     db_name = "test_loop_%s" % filename  # new namespace!
     db = server.new_database(db_name)
     count = db.load(original_records)
     self.assertEqual(count, len(original_records))
     server.commit()
     #Now read them back...
     biosql_records = [db.lookup(name=rec.name)
                       for rec in original_records]
     #And check they agree
     self.assertTrue(compare_records(original_records, biosql_records))
     #Now write to a handle...
     handle = StringIO()
     SeqIO.write(biosql_records, handle, "gb")
     #Now read them back...
     handle.seek(0)
     new_records = list(SeqIO.parse(handle, "gb"))
     #And check they still agree
     self.assertEqual(len(new_records), len(original_records))
     for old, new in zip(original_records, new_records):
         #TODO - remove this hack because we don't yet write these (yet):
         for key in ["comment", "references", "db_source"]:
             if key in old.annotations and key not in new.annotations:
                 del old.annotations[key]
         self.assertTrue(compare_record(old, new))
     #Done
     server.close()
    def get_raw_check(self, filename, format, alphabet):
        if format in SeqIO._BinaryFormats:
            #This means SFF at the moment, which does not get
            #implement the get_raw method
            return
        handle = open(filename, "rU")
        raw_file = handle.read()
        handle.close()
        #Also checking the key_function here
        id_list = [rec.id.lower() for rec in \
                   SeqIO.parse(filename, format, alphabet)]
        rec_dict = SeqIO.index(filename, format, alphabet,
                               key_function = lambda x : x.lower())
        self.assertEqual(set(id_list), set(rec_dict.keys()))
        self.assertEqual(len(id_list), len(rec_dict))
        for key in id_list:
            self.assert_(key in rec_dict)
            self.assertEqual(key, rec_dict[key].id.lower())
            self.assertEqual(key, rec_dict.get(key).id.lower())
            raw = rec_dict.get_raw(key)
            self.assert_(raw.strip())
            self.assert_(raw in raw_file)
            if format in ["ig"]:
               #These have a header structure and can't be parsed
               #individually (at least, not right now).
               continue
            rec1 = rec_dict[key]
            rec2 = SeqIO.read(StringIO(raw), format, alphabet)
	    self.assertEqual(True, compare_record(rec1, rec2))
 def get_raw_check(self, filename, format, alphabet):
     handle = open(filename, "rb")
     raw_file = handle.read()
     handle.close()
     #Also checking the key_function here
     id_list = [rec.id.lower() for rec in \
                SeqIO.parse(filename, format, alphabet)]
     rec_dict = SeqIO.index(filename, format, alphabet,
                            key_function = lambda x : x.lower())
     self.assertEqual(set(id_list), set(rec_dict.keys()))
     self.assertEqual(len(id_list), len(rec_dict))
     for key in id_list:
         self.assertTrue(key in rec_dict)
         self.assertEqual(key, rec_dict[key].id.lower())
         self.assertEqual(key, rec_dict.get(key).id.lower())
         raw = rec_dict.get_raw(key)
         self.assertTrue(raw.strip())
         self.assertTrue(raw in raw_file)
         rec1 = rec_dict[key]
         #Following isn't very elegant, but it lets me test the
         #__getitem__ SFF code is working.
         if format in SeqIO._BinaryFormats:
             handle = BytesIO(raw)
         else:
             handle = StringIO(_bytes_to_string(raw))
         if format == "sff":
             rec2 = SeqIO.SffIO._sff_read_seq_record(handle,
                         rec_dict._proxy._flows_per_read,
                         rec_dict._proxy._flow_chars,
                         rec_dict._proxy._key_sequence,
                         rec_dict._proxy._alphabet,
                         trim=False)
         elif format == "sff-trim":
             rec2 = SeqIO.SffIO._sff_read_seq_record(handle,
                         rec_dict._proxy._flows_per_read,
                         rec_dict._proxy._flow_chars,
                         rec_dict._proxy._key_sequence,
                         rec_dict._proxy._alphabet,
                         trim=True)
         elif format == "uniprot-xml":
             self.assertTrue(raw.startswith(_as_bytes("<entry ")))
             self.assertTrue(raw.endswith(_as_bytes("</entry>")))
             #Currently the __getitem__ method uses this
             #trick too, but we hope to fix that later
             raw = """<?xml version='1.0' encoding='UTF-8'?>
             <uniprot xmlns="http://uniprot.org/uniprot"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://uniprot.org/uniprot
             http://www.uniprot.org/support/docs/uniprot.xsd">
             %s
             </uniprot>
             """ % _bytes_to_string(raw)
             handle = StringIO(raw)
             rec2 = SeqIO.read(handle, format, alphabet)
         else:
             rec2 = SeqIO.read(handle, format, alphabet)
         self.assertEqual(True, compare_record(rec1, rec2))
     rec_dict._proxy._handle.close() #TODO - Better solution
     del rec_dict
Пример #5
0
 def test_multi_ex_index(self):
     """Index SwissProt text and uniprot XML versions of several examples."""
     txt_list = list(SeqIO.parse("SwissProt/multi_ex.txt", "swiss"))
     xml_list = list(SeqIO.parse("SwissProt/multi_ex.xml", "uniprot-xml"))
     ids = [x.strip() for x in open("SwissProt/multi_ex.list")]
     txt_index = SeqIO.index("SwissProt/multi_ex.txt", "swiss")
     xml_index = SeqIO.index("SwissProt/multi_ex.xml", "uniprot-xml")
     self.assertEqual(sorted(txt_index), sorted(ids))
     self.assertEqual(sorted(xml_index), sorted(ids))
     #Check SeqIO.parse() versus SeqIO.index() for plain text "swiss"
     for old in txt_list:
         new = txt_index[old.id]
         compare_record(old, new)
     #Check SeqIO.parse() versus SeqIO.index() for XML "uniprot-xml"
     for old in xml_list:
         new = xml_index[old.id]
         compare_record(old, new)
Пример #6
0
    def check(self, t_format, t_filename, t_count=1):
        db = self.db

        iterator = SeqIO.parse(handle=open(t_filename, "r"), format=t_format)
        count = db.load(iterator)
        assert count == t_count
        self.server.commit()

        iterator = SeqIO.parse(handle=open(t_filename, "r"), format=t_format)
        for record in iterator:
            #print " - %s, %s" % (checksum_summary(record), record.id)
            key = record.name
            #print " - Retrieving by name/display_id '%s'," % key,
            db_rec = db.lookup(name=key)
            compare_record(record, db_rec)
            db_rec = db.lookup(display_id=key)
            compare_record(record, db_rec)

            key = record.id
            if key.count(".") == 1 and key.split(".")[1].isdigit():
                #print " - Retrieving by version '%s'," % key,
                db_rec = db.lookup(version=key)
                compare_record(record, db_rec)

            if "accessions" in record.annotations:
                #Only expect FIRST accession to work!
                key = record.annotations["accessions"][0]
                assert key, "Blank accession in annotation %s" % repr(record.annotations)
                if key != record.id:
                    #print " - Retrieving by accession '%s'," % key,
                    db_rec = db.lookup(accession=key)
                    compare_record(record, db_rec)

            if "gi" in record.annotations:
                key = record.annotations['gi']
                if key != record.id:
                    #print " - Retrieving by GI '%s'," % key,
                    db_rec = db.lookup(primary_id=key)
                    compare_record(record, db_rec)
Пример #7
0
    def check_rewrite(self, filename):
        old = SeqIO.read(filename, "embl")

        #TODO - Check these properties:
        old.dbxrefs = []
        old.annotations['accessions'] = old.annotations['accessions'][:1]
        del old.annotations['references']

        buffer = StringIO()
        self.assertEqual(1, SeqIO.write(old, buffer, "embl"))
        buffer.seek(0)
        new = SeqIO.read(buffer, "embl")

        self.assertTrue(compare_record(old, new))
Пример #8
0
    iterator = SeqIO.parse(handle=open(t_filename,"r"), format=t_format)
    count = db.load(iterator)
    assert count == t_count
    
    #print " - Committing %i records" % count
    server.commit()
    
    iterator = SeqIO.parse(handle=open(t_filename,"r"), format=t_format)
    for record in iterator :
        print " - %s, %s" % (checksum_summary(record), record.id)

        key = record.name
        print " - Retrieving by name/display_id '%s'," % key,
        db_rec = db.lookup(name=key)
        compare_record(record, db_rec)
        db_rec = db.lookup(display_id=key)
        compare_record(record, db_rec)
        print "OK"

        key = record.id
        if key.count(".")==1 and key.split(".")[1].isdigit() :
            print " - Retrieving by version '%s'," % key,
            db_rec = db.lookup(version=key)
            compare_record(record, db_rec)
            print "OK"
        
        if "accessions" in record.annotations :
            accs = set(record.annotations["accessions"])
            for key in accs :
                assert key, "Blank accession in annotation %s" % repr(accs)
Пример #9
0
    def get_raw_check(self, filename, format, alphabet, comp):
        # Also checking the key_function here
        if comp:
            h = gzip.open(filename, "rb")
            raw_file = h.read()
            h.close()
            h = gzip_open(filename, format)
            id_list = [
                rec.id.lower() for rec in SeqIO.parse(h, format, alphabet)
            ]
            h.close()
        else:
            h = open(filename, "rb")
            raw_file = h.read()
            h.close()
            id_list = [
                rec.id.lower()
                for rec in SeqIO.parse(filename, format, alphabet)
            ]

        if format in ["sff"]:
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', BiopythonParserWarning)
                rec_dict = SeqIO.index(filename,
                                       format,
                                       alphabet,
                                       key_function=lambda x: x.lower())
        else:
            rec_dict = SeqIO.index(filename,
                                   format,
                                   alphabet,
                                   key_function=lambda x: x.lower())

        self.assertEqual(set(id_list), set(rec_dict))
        self.assertEqual(len(id_list), len(rec_dict))
        for key in id_list:
            self.assertTrue(key in rec_dict)
            self.assertEqual(key, rec_dict[key].id.lower())
            self.assertEqual(key, rec_dict.get(key).id.lower())
            raw = rec_dict.get_raw(key)
            self.assertTrue(isinstance(raw, bytes),
                            "Didn't get bytes from %s get_raw" % format)
            self.assertTrue(raw.strip())
            self.assertTrue(raw in raw_file)
            rec1 = rec_dict[key]
            # Following isn't very elegant, but it lets me test the
            # __getitem__ SFF code is working.
            if format in SeqIO._BinaryFormats:
                handle = BytesIO(raw)
            else:
                handle = StringIO(_bytes_to_string(raw))
            if format == "sff":
                rec2 = SeqIO.SffIO._sff_read_seq_record(
                    handle,
                    rec_dict._proxy._flows_per_read,
                    rec_dict._proxy._flow_chars,
                    rec_dict._proxy._key_sequence,
                    rec_dict._proxy._alphabet,
                    trim=False)
            elif format == "sff-trim":
                rec2 = SeqIO.SffIO._sff_read_seq_record(
                    handle,
                    rec_dict._proxy._flows_per_read,
                    rec_dict._proxy._flow_chars,
                    rec_dict._proxy._key_sequence,
                    rec_dict._proxy._alphabet,
                    trim=True)
            elif format == "uniprot-xml":
                self.assertTrue(raw.startswith(_as_bytes("<entry ")))
                self.assertTrue(raw.endswith(_as_bytes("</entry>")))
                # Currently the __getitem__ method uses this
                # trick too, but we hope to fix that later
                raw = """<?xml version='1.0' encoding='UTF-8'?>
                <uniprot xmlns="http://uniprot.org/uniprot"
                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                xsi:schemaLocation="http://uniprot.org/uniprot
                http://www.uniprot.org/support/docs/uniprot.xsd">
                %s
                </uniprot>
                """ % _bytes_to_string(raw)
                handle = StringIO(raw)
                rec2 = SeqIO.read(handle, format, alphabet)
            else:
                rec2 = SeqIO.read(handle, format, alphabet)
            self.assertEqual(True, compare_record(rec1, rec2))
        rec_dict.close()
        del rec_dict
    iterator = SeqIO.parse(handle=open(t_filename,"r"), format=t_format)
    count = db.load(iterator)
    assert count == t_count
    db_count += count
    
    #print " - Committing %i records" % count
    server.commit()
    
    iterator = SeqIO.parse(handle=open(t_filename,"r"), format=t_format)
    for record in iterator:
        print " - %s, %s" % (checksum_summary(record), record.id)

        key = record.name
        print " - Retrieving by name/display_id '%s'," % key,
        db_rec = db.lookup(name=key)
        compare_record(record, db_rec)
        db_rec = db.lookup(display_id=key)
        compare_record(record, db_rec)
        print "OK"

        key = record.id
        if key.count(".")==1 and key.split(".")[1].isdigit():
            print " - Retrieving by version '%s'," % key,
            db_rec = db.lookup(version=key)
            compare_record(record, db_rec)
            print "OK"
        
        if "accessions" in record.annotations:
            accs = sorted(set(record.annotations["accessions"]))
            for key in accs:
                assert key, "Blank accession in annotation %s" % repr(accs)
Пример #11
0
    def get_raw_check(self, filename, format, alphabet, comp):
        # Also checking the key_function here
        if comp:
            h = gzip.open(filename, "rb")
            raw_file = h.read()
            h.close()
            h = gzip_open(filename, format)
            id_list = [rec.id.lower() for rec in
                       SeqIO.parse(h, format, alphabet)]
            h.close()
        else:
            h = open(filename, "rb")
            raw_file = h.read()
            h.close()
            id_list = [rec.id.lower() for rec in
                       SeqIO.parse(filename, format, alphabet)]

        if format in ["sff"]:
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', BiopythonParserWarning)
                rec_dict = SeqIO.index(filename, format, alphabet,
                                       key_function=lambda x: x.lower())
                rec_dict_db = SeqIO.index_db(":memory:", filename, format, alphabet,
                                             key_function=lambda x: x.lower())
        else:
            rec_dict = SeqIO.index(filename, format, alphabet,
                                   key_function=lambda x: x.lower())
            rec_dict_db = SeqIO.index_db(":memory:", filename, format, alphabet,
                                         key_function=lambda x: x.lower())

        self.assertEqual(set(id_list), set(rec_dict))
        self.assertEqual(set(id_list), set(rec_dict_db))
        self.assertEqual(len(id_list), len(rec_dict))
        for key in id_list:
            self.assertIn(key, rec_dict)
            self.assertEqual(key, rec_dict[key].id.lower())
            self.assertEqual(key, rec_dict.get(key).id.lower())
            raw = rec_dict.get_raw(key)
            self.assertTrue(isinstance(raw, bytes),
                            "Didn't get bytes from %s get_raw" % format)
            self.assertTrue(raw.strip())
            self.assertIn(raw, raw_file)

            raw_db = rec_dict_db.get_raw(key)
            # Via index using format-specific get_raw which scans the file,
            # Via index_db in general using raw length found when indexing.
            self.assertEqual(raw, raw_db,
                             "index and index_db .get_raw() different for %s" % format)

            rec1 = rec_dict[key]
            # Following isn't very elegant, but it lets me test the
            # __getitem__ SFF code is working.
            if format in SeqIO._BinaryFormats:
                handle = BytesIO(raw)
            else:
                handle = StringIO(_bytes_to_string(raw))
            if format == "sff":
                rec2 = SeqIO.SffIO._sff_read_seq_record(handle,
                            rec_dict._proxy._flows_per_read,
                            rec_dict._proxy._flow_chars,
                            rec_dict._proxy._key_sequence,
                            rec_dict._proxy._alphabet,
                            trim=False)
            elif format == "sff-trim":
                rec2 = SeqIO.SffIO._sff_read_seq_record(handle,
                            rec_dict._proxy._flows_per_read,
                            rec_dict._proxy._flow_chars,
                            rec_dict._proxy._key_sequence,
                            rec_dict._proxy._alphabet,
                            trim=True)
            elif format == "uniprot-xml":
                self.assertTrue(raw.startswith(b"<entry "))
                self.assertTrue(raw.endswith(b"</entry>"))
                # Currently the __getitem__ method uses this
                # trick too, but we hope to fix that later
                raw = """<?xml version='1.0' encoding='UTF-8'?>
                <uniprot xmlns="http://uniprot.org/uniprot"
                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                xsi:schemaLocation="http://uniprot.org/uniprot
                http://www.uniprot.org/support/docs/uniprot.xsd">
                %s
                </uniprot>
                """ % _bytes_to_string(raw)
                handle = StringIO(raw)
                rec2 = SeqIO.read(handle, format, alphabet)
            else:
                rec2 = SeqIO.read(handle, format, alphabet)
            self.assertEqual(True, compare_record(rec1, rec2))
        rec_dict.close()
        del rec_dict
Пример #12
0
        def test_records_end(self):
            recs = {}

            recs[0] = SeqRecord(Seq("TGTTTAGTACC----ATGCTTAGGAATGATAAACTCACTTAGTGtt"),
                                id="mm9.chr10",
                                name="mm9.chr10",
                                description="",
                                annotations={"start": 3021494,
                                             "srcSize": 129993255,
                                             "strand": 1,
                                             "size": 42})

            recs[1] = SeqRecord(Seq("TGTTGCATGTCCTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
                                id="ponAbe2.chr6",
                                name="ponAbe2.chr6",
                                description="",
                                annotations={"start": 16173516,
                                             "srcSize": 174210431,
                                             "strand": -1,
                                             "size": 46})

            recs[2] = SeqRecord(Seq("TGTTGCATATCCTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
                                id="panTro2.chr6",
                                name="panTro2.chr6",
                                description="",
                                annotations={"start": 16393864,
                                             "srcSize": 173908612,
                                             "strand": -1,
                                             "size": 46})

            recs[3] = SeqRecord(Seq("TGTTGCATGTCGTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
                                id="hg18.chr6",
                                name="hg18.chr6",
                                description="",
                                annotations={"start": 15875298,
                                             "srcSize": 170899992,
                                             "strand": -1,
                                             "size": 46})

            recs[4] = SeqRecord(Seq("TGTTAAGTCTCACTTGCTGTTCAAAGTGATAGCTTCACTCCATCAT"),
                                id="canFam2.chr1",
                                name="canFam2.chr1",
                                description="",
                                annotations={"start": 78072287,
                                             "srcSize": 125616256,
                                             "strand": -1,
                                             "size": 46})

            recs[5] = SeqRecord(Seq("TGTTTAAAATG----ATTGCTAGAACTTCTA--CTCACTGGA----"),
                                id="ornAna1.chr2",
                                name="ornAna1.chr2",
                                description="",
                                annotations={"start": 14757144,
                                             "srcSize": 54797317,
                                             "strand": -1,
                                             "size": 36})

            fetched_recs = self.idx._get_record(99228)

            for i in range(6):
                self.assertTrue(compare_record(recs[i], fetched_recs[i]))
Пример #13
0
        def test_records_end(self):
            recs = {}

            recs[0] = SeqRecord(
                Seq("TGTTTAGTACC----ATGCTTAGGAATGATAAACTCACTTAGTGtt"),
                id="mm9.chr10",
                name="mm9.chr10",
                description="",
                annotations={
                    "start": 3021494,
                    "srcSize": 129993255,
                    "strand": 1,
                    "size": 42
                })

            recs[1] = SeqRecord(
                Seq("TGTTGCATGTCCTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
                id="ponAbe2.chr6",
                name="ponAbe2.chr6",
                description="",
                annotations={
                    "start": 16173516,
                    "srcSize": 174210431,
                    "strand": -1,
                    "size": 46
                })

            recs[2] = SeqRecord(
                Seq("TGTTGCATATCCTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
                id="panTro2.chr6",
                name="panTro2.chr6",
                description="",
                annotations={
                    "start": 16393864,
                    "srcSize": 173908612,
                    "strand": -1,
                    "size": 46
                })

            recs[3] = SeqRecord(
                Seq("TGTTGCATGTCGTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
                id="hg18.chr6",
                name="hg18.chr6",
                description="",
                annotations={
                    "start": 15875298,
                    "srcSize": 170899992,
                    "strand": -1,
                    "size": 46
                })

            recs[4] = SeqRecord(
                Seq("TGTTAAGTCTCACTTGCTGTTCAAAGTGATAGCTTCACTCCATCAT"),
                id="canFam2.chr1",
                name="canFam2.chr1",
                description="",
                annotations={
                    "start": 78072287,
                    "srcSize": 125616256,
                    "strand": -1,
                    "size": 46
                })

            recs[5] = SeqRecord(
                Seq("TGTTTAAAATG----ATTGCTAGAACTTCTA--CTCACTGGA----"),
                id="ornAna1.chr2",
                name="ornAna1.chr2",
                description="",
                annotations={
                    "start": 14757144,
                    "srcSize": 54797317,
                    "strand": -1,
                    "size": 36
                })

            fetched_recs = self.idx._get_record(99228)

            for i in range(6):
                self.assertTrue(compare_record(recs[i], fetched_recs[i]))
Пример #14
0
    iterator = SeqIO.parse(handle=open(t_filename, "r"), format=t_format)
    count = db.load(iterator)
    assert count == t_count
    db_count += count

    #print " - Committing %i records" % count
    server.commit()

    iterator = SeqIO.parse(handle=open(t_filename, "r"), format=t_format)
    for record in iterator:
        print " - %s, %s" % (checksum_summary(record), record.id)

        key = record.name
        print " - Retrieving by name/display_id '%s'," % key,
        db_rec = db.lookup(name=key)
        compare_record(record, db_rec)
        db_rec = db.lookup(display_id=key)
        compare_record(record, db_rec)
        print "OK"

        key = record.id
        if key.count(".") == 1 and key.split(".")[1].isdigit():
            print " - Retrieving by version '%s'," % key,
            db_rec = db.lookup(version=key)
            compare_record(record, db_rec)
            print "OK"

        if "accessions" in record.annotations:
            accs = set(record.annotations["accessions"])
            for key in accs:
                assert key, "Blank accession in annotation %s" % repr(accs)
Пример #15
0
    iterator = SeqIO.parse(handle=open(t_filename,"r"), format=t_format)
    count = db.load(iterator)
    assert count == t_count
    
    #print " - Committing %i records" % count
    server.commit()
    
    iterator = SeqIO.parse(handle=open(t_filename,"r"), format=t_format)
    for record in iterator :
        print " - %s, %s" % (checksum_summary(record), record.id)

        key = record.name
        print " - Retrieving by name/display_id '%s'," % key,
        db_rec = db.lookup(name=key)
        compare_record(record, db_rec)
        db_rec = db.lookup(display_id=key)
        compare_record(record, db_rec)
        print "OK"

        key = record.id
        if key.count(".")==1 and key.split(".")[1].isdigit() :
            print " - Retrieving by version '%s'," % key,
            db_rec = db.lookup(version=key)
            compare_record(record, db_rec)
            print "OK"
        
        if "accessions" in record.annotations :
            accs = set(record.annotations["accessions"])
            for key in accs :
                assert key, "Blank accession in annotation %s" % repr(accs)
Пример #16
0
    def get_raw_check(self, filename, format, alphabet, comp):
        # Also checking the key_function here
        if comp:
            with gzip.open(filename, "rb") as handle:
                raw_file = handle.read()
            with gzip_open(filename, format) as handle:
                id_list = [
                    rec.id.lower()
                    for rec in SeqIO.parse(handle, format, alphabet)
                ]
        else:
            with open(filename, "rb") as handle:
                raw_file = handle.read()
            id_list = [
                rec.id.lower()
                for rec in SeqIO.parse(filename, format, alphabet)
            ]

        if format in ["sff"]:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                rec_dict = SeqIO.index(
                    filename,
                    format,
                    alphabet,
                    key_function=lambda x: x.lower())  # noqa: E731
                if sqlite3:
                    rec_dict_db = SeqIO.index_db(
                        ":memory:",
                        filename,
                        format,
                        alphabet,
                        key_function=lambda x: x.lower())  # noqa: E731
        else:
            rec_dict = SeqIO.index(
                filename, format, alphabet,
                key_function=lambda x: x.lower())  # noqa: E731
            if sqlite3:
                rec_dict_db = SeqIO.index_db(
                    ":memory:",
                    filename,
                    format,
                    alphabet,
                    key_function=lambda x: x.lower())  # noqa: E731

        self.assertEqual(set(id_list), set(rec_dict))
        if sqlite3:
            self.assertEqual(set(id_list), set(rec_dict_db))
        self.assertEqual(len(id_list), len(rec_dict))
        for key in id_list:
            self.assertIn(key, rec_dict)
            self.assertEqual(key, rec_dict[key].id.lower())
            self.assertEqual(key, rec_dict.get(key).id.lower())
            raw = rec_dict.get_raw(key)
            self.assertTrue(isinstance(raw, bytes),
                            "Didn't get bytes from %s get_raw" % format)
            self.assertTrue(raw.strip())
            self.assertIn(raw, raw_file)

            if sqlite3:
                raw_db = rec_dict_db.get_raw(key)
                # Via index using format-specific get_raw which scans the file,
                # Via index_db in general using raw length found when indexing.
                self.assertEqual(
                    raw, raw_db,
                    "index and index_db .get_raw() different for %s" % format)

            rec1 = rec_dict[key]
            # Following isn't very elegant, but it lets me test the
            # __getitem__ SFF code is working.
            if format in SeqIO._BinaryFormats:
                handle = BytesIO(raw)
            else:
                handle = StringIO(raw.decode())
            if format == "sff":
                rec2 = SeqIO.SffIO._sff_read_seq_record(
                    handle,
                    rec_dict._proxy._flows_per_read,
                    rec_dict._proxy._flow_chars,
                    rec_dict._proxy._key_sequence,
                    rec_dict._proxy._alphabet,
                    trim=False)
            elif format == "sff-trim":
                rec2 = SeqIO.SffIO._sff_read_seq_record(
                    handle,
                    rec_dict._proxy._flows_per_read,
                    rec_dict._proxy._flow_chars,
                    rec_dict._proxy._key_sequence,
                    rec_dict._proxy._alphabet,
                    trim=True)
            elif format == "uniprot-xml":
                self.assertTrue(raw.startswith(b"<entry "))
                self.assertTrue(raw.endswith(b"</entry>"))
                # Currently the __getitem__ method uses this
                # trick too, but we hope to fix that later
                raw = """<?xml version='1.0' encoding='UTF-8'?>
                <uniprot xmlns="http://uniprot.org/uniprot"
                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                xsi:schemaLocation="http://uniprot.org/uniprot
                http://www.uniprot.org/support/docs/uniprot.xsd">
                %s
                </uniprot>
                """ % raw.decode()
                handle = StringIO(raw)
                rec2 = SeqIO.read(handle, format, alphabet)
            else:
                rec2 = SeqIO.read(handle, format, alphabet)
            self.assertEqual(True, compare_record(rec1, rec2))
        rec_dict.close()
        del rec_dict
Пример #17
0
    def get_raw_check(self, filename, fmt, comp):
        # Also checking the key_function here
        msg = "Test failure parsing file %s with format %s" % (filename, fmt)
        if comp:
            with gzip.open(filename, "rb") as handle:
                raw_file = handle.read()
            mode = "r" + self.get_mode(fmt)
            with gzip.open(filename, mode) as handle:
                id_list = [rec.id.lower() for rec in SeqIO.parse(handle, fmt)]
        else:
            with open(filename, "rb") as handle:
                raw_file = handle.read()
            id_list = [rec.id.lower() for rec in SeqIO.parse(filename, fmt)]

        if fmt in ["sff"]:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                rec_dict = SeqIO.index(filename, fmt, key_function=str.lower)
                if sqlite3:
                    rec_dict_db = SeqIO.index_db(
                        ":memory:",
                        filename,
                        fmt,
                        key_function=str.lower,
                    )
        else:
            rec_dict = SeqIO.index(filename, fmt, key_function=str.lower)
            if sqlite3:
                rec_dict_db = SeqIO.index_db(
                    ":memory:",
                    filename,
                    fmt,
                    key_function=str.lower,
                )

        self.assertEqual(set(id_list), set(rec_dict), msg=msg)
        if sqlite3:
            self.assertEqual(set(id_list), set(rec_dict_db), msg=msg)
        self.assertEqual(len(id_list), len(rec_dict), msg=msg)
        for key in id_list:
            self.assertIn(key, rec_dict, msg=msg)
            self.assertEqual(key, rec_dict[key].id.lower(), msg=msg)
            self.assertEqual(key, rec_dict.get(key).id.lower(), msg=msg)
            raw = rec_dict.get_raw(key)
            self.assertIsInstance(raw, bytes, msg=msg)
            self.assertTrue(raw.strip(), msg=msg)
            self.assertIn(raw, raw_file, msg=msg)

            if sqlite3:
                raw_db = rec_dict_db.get_raw(key)
                # Via index using format-specific get_raw which scans the file,
                # Via index_db in general using raw length found when indexing.
                self.assertEqual(raw, raw_db, msg=msg)

            rec1 = rec_dict[key]
            # Following isn't very elegant, but it lets me test the
            # __getitem__ SFF code is working.
            mode = self.get_mode(fmt)
            if mode == "b":
                handle = BytesIO(raw)
            elif mode == "t":
                handle = StringIO(raw.decode())
            else:
                raise RuntimeError("Unexpected mode %s" % mode)
            if fmt == "sff":
                rec2 = SeqIO.SffIO._sff_read_seq_record(
                    handle,
                    rec_dict._proxy._flows_per_read,
                    rec_dict._proxy._flow_chars,
                    rec_dict._proxy._key_sequence,
                    trim=False,
                )
            elif fmt == "sff-trim":
                rec2 = SeqIO.SffIO._sff_read_seq_record(
                    handle,
                    rec_dict._proxy._flows_per_read,
                    rec_dict._proxy._flow_chars,
                    rec_dict._proxy._key_sequence,
                    trim=True,
                )
            elif fmt == "uniprot-xml":
                self.assertTrue(raw.startswith(b"<entry "), msg=msg)
                self.assertTrue(raw.endswith(b"</entry>"), msg=msg)
                # Currently the __getitem__ method uses this
                # trick too, but we hope to fix that later
                raw = ("""<?xml version='1.0' encoding='UTF-8'?>
                <uniprot xmlns="http://uniprot.org/uniprot"
                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                xsi:schemaLocation="http://uniprot.org/uniprot
                http://www.uniprot.org/support/docs/uniprot.xsd">
                %s
                </uniprot>
                """ % raw.decode())
                handle = StringIO(raw)
                rec2 = SeqIO.read(handle, fmt)
            else:
                rec2 = SeqIO.read(handle, fmt)
            self.assertEqual(True, compare_record(rec1, rec2))
        rec_dict.close()
        del rec_dict