Пример #1
 def test_crc_checksum_collision(self):
     #Explicit testing of crc64 collision:
     self.assertNotEqual(self.str_light_chain_one, self.str_light_chain_two)
     self.assertNotEqual(crc32(self.str_light_chain_one), crc32(self.str_light_chain_two))
     self.assertEqual(crc64(self.str_light_chain_one), crc64(self.str_light_chain_two))
     self.assertNotEqual(gcg(self.str_light_chain_one), gcg(self.str_light_chain_two))
     self.assertNotEqual(seguid(self.str_light_chain_one), seguid(self.str_light_chain_two))
Пример #2
 def test_crc_checksum_collision(self):
     # Explicit testing of crc64 collision:
     self.assertNotEqual(self.str_light_chain_one, self.str_light_chain_two)
     self.assertNotEqual(crc32(self.str_light_chain_one), crc32(self.str_light_chain_two))
     self.assertEqual(crc64(self.str_light_chain_one), crc64(self.str_light_chain_two))
     self.assertNotEqual(gcg(self.str_light_chain_one), gcg(self.str_light_chain_two))
     self.assertNotEqual(seguid(self.str_light_chain_one), seguid(self.str_light_chain_two))
Пример #3
    def _load_reference(self, reference, rank, bioentry_id):
        """Record SeqRecord's annotated references in the database (PRIVATE).

        record - a SeqRecord object with annotated references
        bioentry_id - corresponding database identifier

        refs = None
        if reference.medline_id:
            refs = self.adaptor.execute_and_fetch_col0(
                "SELECT reference_id"
                "  FROM reference JOIN dbxref USING (dbxref_id)"
                " WHERE dbname = 'MEDLINE' AND accession = %s",
        if not refs and reference.pubmed_id:
            refs = self.adaptor.execute_and_fetch_col0(
                "SELECT reference_id"
                "  FROM reference JOIN dbxref USING (dbxref_id)"
                " WHERE dbname = 'PUBMED' AND accession = %s",
        if not refs:
            s = []
            for f in reference.authors, reference.title, reference.journal:
                s.append(f or "<undef>")
            crc = crc64("".join(s))
            refs = self.adaptor.execute_and_fetch_col0("SELECT reference_id FROM reference" r" WHERE crc = %s", (crc,))
        if not refs:
            if reference.medline_id:
                dbxref_id = self._add_dbxref("MEDLINE", reference.medline_id, 0)
            elif reference.pubmed_id:
                dbxref_id = self._add_dbxref("PUBMED", reference.pubmed_id, 0)
                dbxref_id = None
            authors = reference.authors or None
            title = reference.title or None
            # The location/journal field cannot be Null, so default
            # to an empty string rather than None:
            journal = reference.journal or ""
                "INSERT INTO reference (dbxref_id, location," " title, authors, crc)" " VALUES (%s, %s, %s, %s, %s)",
                (dbxref_id, journal, title, authors, crc),
            reference_id = self.adaptor.last_id("reference")
            reference_id = refs[0]

        if reference.location:
            start = 1 + int(str(reference.location[0].start))
            end = int(str(reference.location[0].end))
            start = None
            end = None

        sql = (
            "INSERT INTO bioentry_reference (bioentry_id, reference_id,"
            " start_pos, end_pos, rank) VALUES (%s, %s, %s, %s, %s)"
        self.adaptor.execute(sql, (bioentry_id, reference_id, start, end, rank + 1))
Пример #4
 def seq_checksums(self, seq_str, exp_crc32, exp_crc64, exp_gcg, exp_seguid,
                   exp_simple_LCC, exp_window_LCC):
     for s in [seq_str,
               Seq(seq_str, single_letter_alphabet),
               MutableSeq(seq_str, single_letter_alphabet)]:
         self.assertEqual(exp_crc32, u_crc32(s))
         self.assertEqual(exp_crc64, crc64(s))
         self.assertEqual(exp_gcg, gcg(s))
         self.assertEqual(exp_seguid, seguid(s))
         self.assertEqual(exp_simple_LCC, simple_LCC(s))
         self.assertEqual(exp_window_LCC, windowed_LCC(s))
Пример #5
 def seq_checksums(self, seq_str, exp_crc32, exp_crc64, exp_gcg, exp_seguid,
                   exp_simple_LCC, exp_window_LCC):
     for s in [seq_str,
               Seq(seq_str, single_letter_alphabet),
               MutableSeq(seq_str, single_letter_alphabet)]:
         self.assertEqual(exp_crc32, u_crc32(s))
         self.assertEqual(exp_crc64, crc64(s))
         self.assertEqual(exp_gcg, gcg(s))
         self.assertEqual(exp_seguid, seguid(s))
         self.assertEqual(exp_simple_LCC, simple_LCC(s))
         self.assertEqual(exp_window_LCC, windowed_LCC(s))
Пример #6
 def seq_checksums(self, seq_str, exp_crc32, exp_crc64, exp_gcg, exp_seguid,
                   exp_simple_LCC, exp_window_LCC):
     for s in [seq_str,
               Seq(seq_str, single_letter_alphabet),
               MutableSeq(seq_str, single_letter_alphabet)]:
         self.assertEqual(exp_crc32, u_crc32(s))
         self.assertEqual(exp_crc64, crc64(s))
         self.assertEqual(exp_gcg, gcg(s))
         self.assertEqual(exp_seguid, seguid(s))
         self.assertAlmostEqual(exp_simple_LCC, lcc_simp(s), places=2)
         values = lcc_mult(s, 20)
         self.assertEqual(len(exp_window_LCC), len(values))
         for value1, value2 in zip(exp_window_LCC, values):
             self.assertAlmostEqual(value1, value2, places=2)
Пример #7

# Example of crc64 collision from Sebastian Bassi using the
# immunoglobulin lambda light chain variable region from H**o sapiens
# Both sequences share the same CRC64 checksum: 44CAAD88706CC153

#Explicit testing of crc64 collision:
assert str_light_chain_one != str_light_chain_two
assert crc32(str_light_chain_one) != crc32(str_light_chain_two)
assert crc64(str_light_chain_one) == crc64(str_light_chain_two)
assert gcg(str_light_chain_one) != gcg(str_light_chain_two)
assert seguid(str_light_chain_one) != seguid(str_light_chain_two)

# main checksum/LCC tests #

#Print some output, which the test harness will check
examples = [str_light_chain_one, str_light_chain_two,

for i, seq_str in enumerate(examples):
    print "Example %i, length %i, %s..." % (i+1, len(seq_str), seq_str[:10])

    #Avoid cross platforms with printing floats by doing conversion explicitly
Пример #8
    def _load_reference(self, reference, rank, bioentry_id):
        """Record a SeqRecord's annotated references in the database (PRIVATE).

        record - a SeqRecord object with annotated references
        bioentry_id - corresponding database identifier

        refs = None
        if reference.medline_id:
            refs = self.adaptor.execute_and_fetch_col0(
                "SELECT reference_id"
                "  FROM reference JOIN dbxref USING (dbxref_id)"
                " WHERE dbname = 'MEDLINE' AND accession = %s",
                (reference.medline_id, ))
        if not refs and reference.pubmed_id:
            refs = self.adaptor.execute_and_fetch_col0(
                "SELECT reference_id"
                "  FROM reference JOIN dbxref USING (dbxref_id)"
                " WHERE dbname = 'PUBMED' AND accession = %s",
                (reference.pubmed_id, ))
        if not refs:
            s = []
            for f in reference.authors, reference.title, reference.journal:
                s.append(f or "<undef>")
            crc = crc64("".join(s))
            refs = self.adaptor.execute_and_fetch_col0(
                "SELECT reference_id FROM reference"
                r" WHERE crc = %s", (crc, ))
        if not refs:
            if reference.medline_id:
                dbxref_id = self._add_dbxref("MEDLINE", reference.medline_id,
            elif reference.pubmed_id:
                dbxref_id = self._add_dbxref("PUBMED", reference.pubmed_id, 0)
                dbxref_id = None
            authors = reference.authors or None
            title = reference.title or None
            # The location/journal field cannot be Null, so default
            # to an empty string rather than None:
            journal = reference.journal or ""
                "INSERT INTO reference (dbxref_id, location,"
                " title, authors, crc)"
                " VALUES (%s, %s, %s, %s, %s)",
                (dbxref_id, journal, title, authors, crc))
            reference_id = self.adaptor.last_id("reference")
            reference_id = refs[0]

        if reference.location:
            start = 1 + int(str(reference.location[0].start))
            end = int(str(reference.location[0].end))
            start = None
            end = None

        sql = "INSERT INTO bioentry_reference (bioentry_id, reference_id," \
              " start_pos, end_pos, rank)" \
              " VALUES (%s, %s, %s, %s, %s)"
                             (bioentry_id, reference_id, start, end, rank + 1))
Пример #9

# Example of crc64 collision from Sebastian Bassi using the
# immunoglobulin lambda light chain variable region from H**o sapiens
# Both sequences share the same CRC64 checksum: 44CAAD88706CC153

#Explicit testing of crc64 collision:
assert str_light_chain_one != str_light_chain_two
assert crc32(str_light_chain_one) != crc32(str_light_chain_two)
assert crc64(str_light_chain_one) == crc64(str_light_chain_two)
assert gcg(str_light_chain_one) != gcg(str_light_chain_two)
assert seguid(str_light_chain_one) != seguid(str_light_chain_two)

# main checksum/LCC tests #

#Print some output, which the test harness will check
examples = [str_light_chain_one, str_light_chain_two,

for i, seq_str in enumerate(examples) :
    print "Example %i, length %i, %s..." % (i+1, len(seq_str), seq_str[:10])

    #Avoid cross platforms with printing floats by doing conversion explicitly