def nested(seq): shrt = 18 lng = 25 pr = '' maxa = 0 ok = 0 while ok != 1: if shrt == lng + 1: shrt = 18 seq = seq[1:] pr = seq[:shrt] if GC(pr) >= 60 or GC(pr) <= 40: shrt += 1 continue else: if GCend(pr) == 0: shrt += 1 else: if mt.Tm_Wallace(pr) > 60 or mt.Tm_Wallace(pr) < 56: shrt += 1 else: pr_revers = pr[::-1] complalig = SWalig(SWvlmtrx(pr, pr_revers), pr, pr_revers) if complalig[2].find("****") >= 0 or self_dimers( pr ) == 1 or complalig[2].find( "***-**") >= 0 or complalig[2].find("**-***") >= 0: shrt += 1 else: hpin = hairpin(pr) if hpin == 0: shrt += 1 else: return pr
def select_primers(circRNA_seq): for i in range(7, 14): print(i) five_end = circRNA_seq[0:i] three_end = circRNA_seq[(i - 20):] forward_primer = Seq(circRNA_seq[(i - 150):(i - 150) + 20]) Rev_Comp = Seq(three_end + five_end) Rev_Primer = Rev_Comp.reverse_complement() print(forward_primer, mt.Tm_Wallace(forward_primer)) print(Rev_Primer, mt.Tm_Wallace(Rev_Primer)) print(Rev_Comp) difference = mt.Tm_Wallace(forward_primer) - mt.Tm_Wallace(Rev_Primer) print(abs(difference))
def repeat_finder(seq): '''Finds largest repeat in a sequence and returns the position of the repeat.''' string = seq.upper() l = list(string) d = collections.deque(string[1:]) match = [] longest_match = [] while d: for i, item in enumerate(d): if l[i] == item: match.append(item) else: if len(longest_match) < len(match): longest_match = match match = [] d.popleft() repeat_sequence = ''.join(longest_match) if len(repeat_sequence) > 20: location = string.find(repeat_sequence) length = len(repeat_sequence) logger.debug("Found long {}bp repeat at location {}".format( length, location)) return location, length if len(repeat_sequence) < 20 and int(mt.Tm_Wallace( Seq(repeat_sequence))) > 55: location = string.find(repeat_sequence) length = len(repeat_sequence) logger.debug("Found long {}bp repeat at location {}".format( length, location)) return location, length else: return False
def get_quick_interaction_score(domains, domains_c,row,T=25,magnesium=0, sodium=1.0, tm_target = 60): no_domains = len(domains) no_domains_c = len(domains_c) e_vec_domains = np.zeros((no_domains)) for i in range(0,no_domains): e_vec_domains[i] = get_duplex_energy([domains[row], domains_c[i]], T = T, magnesium = magnesium, sodium=sodium) target_score = e_vec_domains[row] cross_reactions = np.sum(np.delete(e_vec_domains, row)) tm_wallace = mt.Tm_Wallace(domains[row]) tm_penalty = 1+np.abs(tm_target-tm_wallace) quick_score = target_score-cross_reactions*longest(domains[row])*get_most_represented_base_freq(domains[row])*tm_penalty return quick_score
def calc_tm_value(self): self.primer_fw_seq = Seq(self.primer_fw.upper()) self.primer_rv_seq = Seq(self.primer_rv.upper()) self.tm_value_Wallace = np.mean([ mt.Tm_Wallace(self.primer_fw_seq), mt.Tm_Wallace(self.primer_rv_seq) ]) # GC法で計算 self.tm_value_GC = np.mean([ mt.Tm_GC(self.primer_fw_seq, Na=50, valueset=7), mt.Tm_GC(self.primer_rv_seq, Na=50, valueset=7) ]) # GC法で計算 self.tm_value_NN = np.mean([ mt.Tm_NN(self.primer_fw_seq, Na=50, nn_table=mt.DNA_NN1), mt.Tm_NN(self.primer_rv_seq, Na=50, nn_table=mt.DNA_NN1) ]) # 最近接塩基法で計算 self.tm_table_column = ["計算手法", "Tm値 (°C)"] self.tm_list = [["Wallace法", round(self.tm_value_Wallace, 1)], ["GC法", round(self.tm_value_GC, 1)], ["最近接塩基法", round(self.tm_value_NN, 1)]] self.tm_table = pd.DataFrame(self.tm_list, columns=self.tm_table_column)
def gen_nested_primers(end3, end5): list_prF = [] list_prR = [] re_end3 = end3 re_end5 = end5 n = 10 maxa = 0 while maxa != n: try: prF = nested(re_end3) prR = nested(re_end5) list_prF.append(prF) list_prR.append(prR) re_end3 = re_end3[re_end3.find(prF) + len(prF):] re_end5 = re_end5[re_end5.find(prR) + len(prR):] maxa += 1 except: n -= 1 for i in list_prR: for j in list_prF: if abs(mt.Tm_Wallace(i) - mt.Tm_Wallace(j)) > 3: continue else: if compare(i, j) == 0: continue else: prF = j prR = i if len(end3[end3.find(prF) + len(prF):]) * 4 < len( end5[end5.find(prR) + len(prR):]): continue elif len(end3[end3.find(prF) + len(prF):]) > 4 * len( end5[end5.find(prR) + len(prR):]): continue else: end3 = end3[end3.find(prF) + len(prF) - 3:] end5 = end5[end5.find(prR) + len(prR) - 3:] return (prF, prR, end3, end5)
def _create_staple_max_melt_T(self) -> Dict[Strand, float]: """ max_melt_T is the staple domain with the highest metling temperature""" staple_domains_melt_t: Dict[Strand, List[float]] = dict() for staple in self.staples: domains = staple.domain_list for domain in domains: if "N" not in domain.sequence: # NOTE: using nearest neighbor for domain with length higher # than 14 using 'Wallace rule' else if len(domain.base_list) > 14: staple_domains_melt_t.setdefault(staple, []).append(MeltingTemp.Tm_NN( Seq(domain.sequence), Na=0, Mg=17.5)) else: staple_domains_melt_t.setdefault(staple, []).append(MeltingTemp.Tm_Wallace( Seq(domain.sequence))) max_staple_melt_t = {key: max(value) for ( key, value) in staple_domains_melt_t.items()} return max_staple_melt_t
def melt_temp_dict(): #Create melting temperature dictionary for all trimers #Create trimer list all_trimers=list(product(["A","C","G","T","N"], repeat=3)) all_trimers=["".join(x) for x in all_trimers] ACGT_trimers=filter(lambda x: "N" not in x, all_trimers) N_trimers=filter(lambda x: "N" in x, all_trimers) #Obtain melting temperature trimer_dict=dict((x,mt.Tm_Wallace(Seq(x))) for x in ACGT_trimers) #Add "NA" for "NNN" temperatures n_trimer_dict=dict((x,"NA") for x in N_trimers) trimer_dict.update(n_trimer_dict) #Return return trimer_dict
def Oligo(target_dna): ''' return should be dict type, GC_contents, Tm_value, Reverse compliment ''' result = { 'GC_contents': 0, 'Tm_value': 0, 'Complement_seq': 0, 'Reverse_complement_seq': 0, 'Length_of_oligo': 0 } dna = Seq(target_dna) # set biopython seq type result['GC_contents'] = '{:.2f} %'.format(GC(dna)) result['Tm_value'] = MeltingTemp.Tm_Wallace(dna) result['Complement_seq'] = str(dna.complement()) result['Reverse_complement_seq'] = str(dna.reverse_complement()) result['Length_of_oligo'] = str(len(dna)) return result
def melting_temp(input_str): seq = Seq(input_str, IUPAC.unambiguous_dna) return mt.Tm_Wallace(seq)
def main(sequences): # container for good oligos my_oligos = {} my_sites = {} my_names = {} labels = sequences.keys() # annealing overlaps #overlap5 = 'cttgtggaaaggacgaaacacc'.upper() # TM: 58-59 C #overlap3 = 'gttttagagctagaaatagcaagttaaaataaggc'.upper() # TM: 58-59 C # pcr overlaps overlap5 = 'TTTCTTGGCTTTATATATCTTGTGGAAAGGACGAAACACC'.upper( ) # TM: 58-59 C overlap3 = 'GTTTTAGAGCTAGAAATAGCAAGTTAAAATAAGGCTAGTC'.upper( ) # TM: 58-59 C mt_threshold = 55.0 gRNA_minimum_spacing = 5 gRNA_5prime_spacing = 10 print 'Starting oligo generation...' for label in labels: print 'Starting generation for {}...'.format(label) my_oligos[label] = [] my_sites[label] = [] my_names[label] = [] name_index = 1 for exon, intron in zip(sequences[label]['exons'], sequences[label]['introns']): full_seq = intron[0] + exon + intron[1] full_seq_rev = Seq(full_seq).reverse_complement() shift = len(intron[0]) # shift for indexing correct regions shift_rev = len(intron[1]) # shift for indexing correct regions # Forward read ind = gRNA_5prime_spacing while ind < len(exon): if full_seq[shift + ind:shift + ind + 2] == 'GG': site = full_seq[ind + shift - 20:ind + shift - 1] oligo = Seq(overlap5 + 'G' + site) oligo_rev = (Seq('G') + site + overlap3).reverse_complement() if site == '': ind += 1 continue if mt.Tm_Wallace('G' + site) <= mt_threshold: ind += 1 continue my_oligos[label].append(oligo) my_sites[label].append(site) my_names[label].append('gRNA-TCR{}C-{}F'.format( label, name_index)) my_oligos[label].append(oligo_rev) my_sites[label].append(site) my_names[label].append('gRNA-TCR{}C-{}F-RC'.format( label, name_index)) name_index += 1 ind += gRNA_minimum_spacing else: ind += 1 # Reverse read ind = gRNA_5prime_spacing while ind < len(exon): if full_seq_rev[shift_rev + ind:shift_rev + ind + 2] == 'GG': site = full_seq_rev[shift_rev + ind - 20:shift_rev + ind - 1] #oligo = overlap5 + 'G' + site + overlap3 oligo = overlap5 + 'G' + site oligo_rev = (Seq('G') + site + overlap3).reverse_complement() if site == '': ind += 1 continue if mt.Tm_Wallace('G' + site) <= mt_threshold: ind += 1 continue my_oligos[label].append(oligo) my_sites[label].append(site) my_names[label].append('gRNA-TCR{}C-{}R'.format( label, name_index)) my_oligos[label].append(oligo_rev) my_sites[label].append(site) my_names[label].append('gRNA-TCR{}C-{}F-RC'.format( label, name_index)) name_index += 1 ind += gRNA_minimum_spacing else: ind += 1 #raw_input() print 'Gene {}: {} sites'.format(label, len(my_oligos[label]) / 2) wells = ['{}{}'.format(a, b) for a in 'ABCDEFGH' for b in xrange(1, 13)] #'''# for label in ['A', 'B1', 'B2']: print '\nPlate; Well; Name; Sequence; MT (Wallace)' i = 0 for name, site, oligo in zip(my_names[label], my_sites[label], my_oligos[label]): #print '{}; {}; {}'.format(wells[i%96],name,oligo) print 'TCR{}-{}; {}; {}; {}; {}'.format(label, 1 + (i / 96), wells[i % 96], name, oligo, mt.Tm_Wallace('G' + site)) i += 1 #'''# print 'HERRREE' B_oligos = my_oligos['B1'] + my_oligos['B2'] print(len(B_oligos) - len(list(set(B_oligos)))) / 2 #'''# for label in ['A', 'B1', 'B2']: print '\nNew plate:' i = 0 for name, site, oligo in zip(my_names[label], my_sites[label], my_oligos[label]): if '-RC' in name: continue print '{}; {}; TM:{}'.format(name, site, mt.Tm_Wallace('G' + site))
tag_prefix = 'o' default_strandedness = 1 def get_melting_temp(self): from Bio.SeqUtils import MeltingTemp # If the Tm is encoded in the oligo name, use that. if m := re.search(r'[-_ ](TM|Tm|tm)=?(\d+)', self.name): return float(m.group(2)) # Otherwise, calculate a Tm using the Wallace rule. This isn't a # particularly accurate method, but I chose it because it agrees most # closely with NEB's Tm calculator, which is what I've been using for # everything. else: return MeltingTemp.Tm_Wallace(self.seq) def get_tm(self): return self.melting_temp class MakerInterface: # Maker classes are not required to actually inherit from this class, but # they are expected to implement this interface. # Note that I don't use autoprop on this class, because I don't want # getters to be part of the interface. Subclasses are free to use # autoprop, though. @classmethod def make(self, db, products):
from Bio.Seq import Seq from Bio.SeqUtils import GC from Bio.SeqUtils import MeltingTemp as mt seq = Seq("AAGTGACAGGGATTG") GC_per = GC(seq) mt_p = mt.Tm_Wallace(seq) print(GC_per) print(mt_p)
end3 = end3[:n] counter = 0 with open('file.txt', 'a') as file: file.write(i + '\n') while counter != 3: primers_and_seq = primers(end3, end5) prF = primers_and_seq[0] prR = primers_and_seq[1] end3 = primers_and_seq[2] end5 = primers_and_seq[3] if counter == 0: prF1 = prF prR1 = prR elif counter == 1: prF2 = prF prR2 = prR elif counter == 2: prF3 = prF prR3 = prR counter += 1 with open('file.txt', 'a') as file: file.write(' >Forward primer ' + str(counter) + ': ' + '\n' + str(prF) + ': Melting temperature' + '%0.2f' % mt.Tm_Wallace(prF) + '\n' + '>Revers primer ' + str(counter) + ': ' + '\n' + str(prR) + ': Melting temperature' + '%0.2f' % mt.Tm_Wallace(prR) + '\n') with open('file.txt', 'a') as file: file.write( rcr_vis(prF1, prF2, prF3, prR1, prR2, prR3, pcr_end3, pcr_end5))
def temperatures(dic): Tw = round(mt.Tm_Wallace(dic, strict=False), 2) Tgc = round(mt.Tm_GC(dic, strict=False), 2) Tnn = round(mt.Tm_NN(dic, strict=False), 2) return Tw, Tgc, Tnn
if __name__ == '__main__': args = parse_args() file, seq_format, fh = args.infile, args.format, None, if file: if not seq_format: found = re.search(r'(?i)(fasta|fa|fastq|fq)(.gz)?$', file) if not found: print( "invalid file name suffix.\nfile name should like this: infile.[fasfa|fa|fastq|fq][.gz]", file=sys.stderr) sys.exit(1) seq_format, is_gz = found.groups() if seq_format == 'fa': seq_format = 'fasta' if seq_format == 'fq': seq_format = 'fastq' fh = gzip.open(file, 'rt') if file.endswith('.gz') else open(file, 'r') else: fh = sys.stdin seq_format = args.format sys.stdout.write('{}\t{}\t{}\t{}\n'.format('seq_id', 'Tm_Wallace', 'Tm_GC', 'Tm_NN')) for seq in SeqIO.parse(fh, seq_format): sys.stdout.write('{}\t{:0.2f}\t{:0.2f}\t{:0.2f}\n'.format( seq.id, mt.Tm_Wallace(seq.seq), mt.Tm_GC(seq.seq), mt.Tm_NN(seq.seq))) fh.close()
#!/usr/bin/env python import sys from Bio.SeqUtils import molecular_weight from Bio.SeqUtils import MeltingTemp as mt print("python: " + sys.version, end="\n", file=sys.stderr) print(sys.argv[1], end="\n", file=sys.stderr) with open(sys.argv[1]) as file: for line in file: row = line.rstrip('\n').split("\t") seq = row[3] if seq == 'cdna': row.extend(["tm_nn", "tm_gc", "tm_wallace"]) print(",".join(row)) else: mw = molecular_weight(seq, 'DNA', False) row.append('%0.2f' % mt.Tm_NN(seq)) row.append('%0.2f' % mt.Tm_GC(seq)) row.append('%0.2f' % mt.Tm_Wallace(seq)) print(",".join(row))
1/1 A L K C V C L E M C M P * N V * atgccttgaaatgtgtag 38 % tacggaactttacacatc G Q F T Y H R S I H L A K F H T """ # 9 : calculating Tm # from Bio.Seq import Seq from Bio.SeqUtils import MeltingTemp as mt myseq = Seq("AGTCTGGGACGCGGCAATCGCA") print(mt.Tm_Wallace(myseq)) # 72.0 # 10 : amino acid 1 to 3 from Bio.SeqUtils import seq1 essential_amino_acid_3 = "LeuLysMetValIleThrTrpPhe" print(seq1(essential_amino_acid_3)) # LKMVITWF from Bio.SeqUtils import seq3 essential_amino_acid_1 = "LKMVITWF" print(seq3(essential_amino_acid_1)) # LeuLysMetValIleThrTrpPhe
# 4.5.4.calc_melting_temperature.py from Bio.SeqUtils import MeltingTemp as mt from Bio.Seq import Seq myseq = Seq("AGTCTGGGACGGCGCGGCAATCGCA") print(mt.Tm_Wallace(myseq)) # 84.0 이 출력된다.
def getTmWallace(seq): return mt.Tm_Wallace(seq)