def setUpClass(cls): # Example of crc64 collision from Sebastian Bassi using the # immunoglobulin lambda light chain variable region from H**o sapiens # Both sequences share the same CRC64 checksum: 44CAAD88706CC153 cls.str_light_chain_one = ( "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGKAPKLMIYEGSKRPSGV" "SNRFSGSKSGNTASLTISGLQAEDEADYYCSSYAGSSTLVFGGGTKLTVL") cls.str_light_chain_two = ( "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGKAPKLMIYEGSKRPSGV" "SNRFSGSKSGNTASLTISGLQAEDEADYYCCSYAGSSTWVFGGGTKLTVL") X = CodonAdaptationIndex() path = os.path.join("CodonUsage", "HighlyExpressedGenes.txt") X.generate_index(path) cls.X = X
def test_codon_usage_custom(self): """Test Codon Adaptation Index (CAI) using FASTA file for background.""" # We need a FASTA file of CDS sequences to count the codon usage... dna_fasta_filename = "fasta.tmp" dna_genbank_filename = "GenBank/NC_005816.gb" record = SeqIO.read(dna_genbank_filename, "genbank") records = [] for feature in record.features: if feature.type == "CDS" and len(feature.location.parts) == 1: start = feature.location.start.position end = feature.location.end.position table = int(feature.qualifiers["transl_table"][0]) if feature.strand == -1: seq = record.seq[start:end].reverse_complement() else: seq = record.seq[start:end] # Double check we have the CDS sequence expected # TODO - Use any cds_start option if/when added to deal with the met a = "M" + str(seq[3:].translate(table)) b = feature.qualifiers["translation"][0] + "*" self.assertEqual(a, b, "%r vs %r" % (a, b)) records.append( SeqRecord( seq, id=feature.qualifiers["protein_id"][0], description=feature.qualifiers["product"][0], ) ) with open(dna_fasta_filename, "w") as handle: SeqIO.write(records, handle, "fasta") CAI = CodonAdaptationIndex() # Note - this needs a FASTA file which containing non-ambiguous DNA coding # sequences - which should each be a whole number of codons. CAI.generate_index(dna_fasta_filename) # Now check codon usage index (CAI) using this species self.assertEqual( record.annotations["source"], "Yersinia pestis biovar Microtus str. 91001" ) value = CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG") self.assertAlmostEqual(value, 0.67213, places=5) os.remove(dna_fasta_filename)
def test_codon_usage_custom(self): """Test Codon Adaptation Index (CAI) using FASTA file for background.""" #We need a FASTA file of CDS sequences to count the codon usage... dna_fasta_filename = "fasta.tmp" dna_genbank_filename = "GenBank/NC_005816.gb" record = SeqIO.read(dna_genbank_filename, "genbank") records = [] for feature in record.features: if feature.type == "CDS" and not feature.sub_features: start = feature.location.start.position end = feature.location.end.position table = int(feature.qualifiers["transl_table"][0]) if feature.strand == -1: seq = record.seq[start:end].reverse_complement() else: seq = record.seq[start:end] #Double check we have the CDS sequence expected #TODO - Use any cds_start option if/when added to deal with the met a = "M" + str(seq[3:].translate(table)) b = feature.qualifiers["translation"][0] + "*" self.assertEqual(a, b, "%r vs %r" % (a, b)) records.append(SeqRecord(seq, id=feature.qualifiers["protein_id"][0], description=feature.qualifiers["product"][0])) with open(dna_fasta_filename, "w") as handle: SeqIO.write(records, handle, "fasta") CAI = CodonAdaptationIndex() # Note - this needs a FASTA file which containing non-ambiguous DNA coding # sequences - which should each be a whole number of codons. CAI.generate_index(dna_fasta_filename) # Now check codon usage index (CAI) using this species self.assertEqual(record.annotations["source"], "Yersinia pestis biovar Microtus str. 91001") self.assertEqual("%0.5f" % CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG"), "0.67213") os.remove(dna_fasta_filename)
#TODO - Use any cds_start option if/when added to deal with the met assert "M" + str(seq[3:].translate(table)) \ == feature.qualifiers["translation"][0]+"*" records.append(SeqRecord(seq, id=feature.qualifiers["protein_id"][0], description=feature.qualifiers["product"][0])) del start, end, table, seq if os.path.isfile(dna_fasta_filename): os.remove(dna_fasta_filename) handle = open(dna_fasta_filename, "w") SeqIO.write(records, handle, "fasta") handle.close() CAI = CodonAdaptationIndex() # Note - this needs a FASTA file which containing non-ambiguous DNA coding # sequences - which should each be a whole number of codons. CAI.generate_index(dna_fasta_filename) print "Example CAI %0.5f using %s" \ % (CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG"), record.annotations["source"]) os.remove(dna_fasta_filename) del record, records del dna_genbank_filename del dna_fasta_filename print ################### # crc64 collision # ###################
#TODO - Use any cds_start option if/when added to deal with the met assert "M" + str(seq[3:].translate(table)) \ == feature.qualifiers["translation"][0]+"*" records.append(SeqRecord(seq, id=feature.qualifiers["protein_id"][0], description=feature.qualifiers["product"][0])) del start, end, table, seq if os.path.isfile(dna_fasta_filename) : os.remove(dna_fasta_filename) handle = open(dna_fasta_filename, "w") SeqIO.write(records, handle, "fasta") handle.close() CAI = CodonAdaptationIndex() # Note - this needs a FASTA file which containing non-ambiguous DNA coding # sequences - which should each be a whole number of codons. CAI.generate_index(dna_fasta_filename) print "Example CAI %0.5f using %s" \ % (CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG"), record.annotations["source"]) os.remove(dna_fasta_filename) del record, records del dna_genbank_filename del dna_fasta_filename print ################### # crc64 collision # ###################