def forms_dimer(primer1, primer2, struct_configs, heterodimer_threshold): heterodimer = primer3.calcHeterodimer(primer1.seq, primer2.seq, **struct_configs) if heterodimer.structure_found and heterodimer.dg < heterodimer_threshold: return True heterodimer2 = primer3.calcHeterodimer(primer1.seq, primer2.revcomp, **struct_configs) if heterodimer2.structure_found and heterodimer2.dg < heterodimer_threshold: return True return False
def SelfDimer(Seq): TemResult = primer3.calcHeterodimer(Seq, Seq) ## print (TemResult.dg/1000) if abs(TemResult.dg / 1000) > SelfDimerDeltaG: return 0 else: return 1
def findBestCommonPrimerIn3pEndRange(common_primer_end3p_range, strand, discriminatory_primer, genome_str, ref_genome_str, idx_lut, edge_lut, mismatch_lut, primer_finder_params): """Finds best candidate primer 3p given range of ends to search over. Returns: primercandidate.CandidatePrimer, or None. """ best_common_primer_candidate = None for j in common_primer_end3p_range: candidate = primercandidate.findCommonPrimer(j, strand, idx_lut, genome_str, ref_genome_str, edge_lut, mismatch_lut, primer_finder_params) if candidate is not None: heterodimer_tm = primer3.calcHeterodimer( discriminatory_primer.seq, candidate.seq, **primer_finder_params['thermo_params']).tm if heterodimer_tm > primer_finder_params['spurious_tm_clip']: continue if best_common_primer_candidate is None: best_common_primer_candidate = candidate elif best_common_primer_candidate.score < candidate.score: best_common_primer_candidate = candidate return best_common_primer_candidate
def evaluate(self, problem): if not PRIMER3_AVAILABLE: raise ImportError( "Using avoid_heterodimerization requires primer3" " installed (pip install primer3-py)" ) if len(self.other_primers_sequences) == 0: return SpecEvaluation( specification=self, problem=problem, score=0, locations=[self.location], message="No existing primer" ) sequence = self.location.extract_sequence(problem.sequence) melting_temps = [ primer3.calcHeterodimer(sequence, other_seq).tm for other_seq in self.other_primers_sequences ] largest_tm = max(melting_temps) # hackish penalty to guide optimization: penalty = 0.001 * sum(melting_temps) / len(melting_temps) score = self.tmax - largest_tm - penalty return SpecEvaluation( specification=self, problem=problem, score=score, locations=[self.location], message="Largest Tm = %.1f " % largest_tm, )
def findBestCommonPrimerIn3pEndRange( common_primer_end3p_range, strand, discriminatory_primer, genome_str, ref_genome_str, idx_lut, edge_lut, mismatch_lut, primer_finder_params): """Finds best candidate primer 3p given range of ends to search over. Returns: primercandidate.CandidatePrimer, or None. """ best_common_primer_candidate = None for j in common_primer_end3p_range: candidate = primercandidate.findCommonPrimer( j, strand, idx_lut, genome_str, ref_genome_str, edge_lut, mismatch_lut, primer_finder_params) if candidate is not None: heterodimer_tm = primer3.calcHeterodimer( discriminatory_primer.seq, candidate.seq, **primer_finder_params['thermo_params']).tm if heterodimer_tm > primer_finder_params['spurious_tm_clip']: continue if best_common_primer_candidate is None: best_common_primer_candidate = candidate elif best_common_primer_candidate.score < candidate.score: best_common_primer_candidate = candidate return best_common_primer_candidate
def primer3_check_heterodimer(primer1, primer2): thermoResult = primer3.calcHeterodimer(primer1, primer2) dg = thermoResult.dg / 1000 if (dg < -10): return False else: return True
def primer3_check_reverse_compliments(primer1, primer2): rprimer1 = reverse_complement(primer1) thermoResult = primer3.calcHeterodimer(primer2, rprimer1) dg = thermoResult.dg / 1000 if (dg < -10): return False else: return True
def pcalcHeteDimer(seq1, seq2): res = primer3.calcHeterodimer(seq1, seq2) dg = "{0:.2f}".format(res.dg) tm = "{0:.2f}".format(res.tm) if res.structure_found == True and float(dg) < -1: return "{0}{1}".format("HeteDimer:", "+:tm:" + tm + ";deltaG:" + dg) else: return ""
def find_structures(cls, folder, seq1, seq2=None, sodium=0.05, magnesium=0.0, temperature=25, concentration=0.00000025, **kwargs): """ Should return the list of 'Structure' objects with delta-G, deltaH, deltaS, and Tm values. Accepts 1 or 2 input sequences. Automatically runs either: * Hairpin (1 input sequence: A=seq1, UNAFold run on A) * Homodimer (2 identical input sequences: A=seq1=seq2, UNAFold run on A & A) * Heterodimer (2 input sequences: A=seq1 B=seq2, UNAFold run on A & B) """ import primer3 mv_conc = sodium * 1000 # 50.0 # in mM dv_conc = magnesium * 1000 # 0.0 # in mM dntp_conc = 0.6 # in mM dna_conc = concentration * 1000 * 1000 * 1000 # 250.0 # in nM #temperature = 25 # keep as-is if (seq1 == seq2): # Homodimer calculation t = primer3.calcHomodimer(seq1, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) elif (seq2 == None): # Hairpin calculation t = primer3.calcHairpin(seq1, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) else: # Heterodimer calculation, Tm calculation [seq1, rc(seq1)] t = primer3.calcHeterodimer(seq1, seq2, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) if t.structure_found: s = Structure(seq1, seq2, t.dg / 1000, t.dh / 1000, t.ds, t.tm, sodium, magnesium, temperature, concentration) else: s = Structure(seq1, seq2, math.inf, math.inf, math.inf, math.inf, sodium, magnesium, temperature, concentration) return [s]
def thermo(self): return { "heterodimer": primer3.calcHeterodimer(self.p1._safe_sequence, self.p2._safe_sequence), "left": self.p1.thermo(), "right": self.p2.thermo(), }
def heterodimer_dg(seq1, seq2, mv_cation=0, primer_conc=0): dg = (primer3.calcHeterodimer(seq1, seq2, mv_conc=mv_cation, dv_conc=0, dntp_conc=0, dna_conc=primer_conc, temp_c=60, max_loop=30)).tm return float(("{0:.2f}".format(round(dg, 2))))
def DeltaGsum(list): Gsum = 0 for i in range(0, len(list)): for j in range(0, len(list)): if (i != j): if (i == 1 or i == 4): Tem1 = primer3.calcHeterodimer(list[i][:ParametersR], list[j]) DeltaGTem1 = Tem1.dg Tem2 = primer3.calcHeterodimer(list[i][-ParametersR:], list[j]) DeltaGTem2 = Tem2.dg Gsum = DeltaGTem1 + DeltaGTem2 + Gsum else: Tem = primer3.calcHeterodimer(list[i][-ParametersR:], list[j]) DeltaGTem = Tem.dg Gsum = DeltaGTem + Gsum return (Gsum / 1000)
def max_dg(self, **kwargs) -> float: if self.length > 60: return 0 configs = Primer.STRUCT_CONFIGS.copy() if self.conc is not None: configs["dna_conc"] = self.conc return round( primer3.calcHeterodimer(self.seq, self.revcomp, **configs).dg, 1)
def _cross_dimer_check(self, seq_tuple): package, oligo_a, oligo_b, seq1, seq2 = seq_tuple try: tr = primer3.calcHeterodimer(seq1, seq2, mv_conc = self.primer_monovalent_cations, dv_conc = self.primer_divalent_cations, dntp_conc = self.primer_dntps, dna_conc = self.primer_annealing_oligo, temp_c = self.primer_annealing_temp) deltaG = tr.dg Tm = tr.tm except: deltaG = "NaN" Tm = "NaN" df = pd.DataFrame(columns=['Package', 'Primer1', 'Primer2', 'dG', 'Tm']) df.loc[len(df)] = [str(package), str(oligo_a), str(oligo_b), str(deltaG), str(Tm)] return df
def DeltaGmax(list): Gmax = [] for i in range(0, len(list)): for j in range(0, len(list)): if (i != j): if (i == 1 or i == 4): Tem1 = primer3.calcHeterodimer(list[i][:ParametersR], list[j]) DeltaGTem1 = Tem1.dg Tem2 = primer3.calcHeterodimer(list[i][-ParametersR:], list[j]) DeltaGTem2 = Tem2.dg Gmax.append(abs(DeltaGTem1)) Gmax.append(abs(DeltaGTem2)) else: Tem = primer3.calcHeterodimer(list[i][-ParametersR:], list[j]) DeltaGTem = Tem.dg Gmax.append(abs(DeltaGTem)) return max(Gmax) / 1000
def get_misprime_tm_inner(self, seq1, seq2, temp): heterodimer = primer3.calcHeterodimer(seq1, seq2, mv_conc=self.mv_conc, dv_conc=self.dv_conc, dntp_conc=self.dntp_conc, dna_conc=self.dna_conc, temp_c=temp, max_loop=MAX_LOOP) if not heterodimer.structure_found: return -100. return heterodimer.tm
def primer_pairing_inner(primer_position_index_result, primer_reverse_complement_position_index_result, min_amplicon_length, max_amplicon_length, reference_sequence, Tm=47): primer_ID = [] primer = [] primer_reverse_complement_ID = [] primer_reverse_complement = [] amplicons_length = [] amplicon = [] for ID, primer_seq, position in zip( primer_position_index_result[0], primer_position_index_result[1], primer_position_index_result[2]): # 正向引物信息提取 for reverse_complement_ID, reverse_complement_primer_seq, reverse_complement_position in zip( primer_reverse_complement_position_index_result[0], primer_reverse_complement_position_index_result[1], primer_reverse_complement_position_index_result[2] ): # 反向引物信息提取 amplicon_length = int(reverse_complement_position) - int( position) # 产物长度计算 if int(min_amplicon_length) <= amplicon_length <= int( max_amplicon_length): # 在产物长度满足限制条件时,检查正反向引物相互作用 heterodimer = primer3.calcHeterodimer( primer_seq, reverse_complement_primer_seq, mv_conc=50.0, dv_conc=3, dna_conc=200, temp_c=25) heterodimer_Tm = str(heterodimer).split('tm=')[1].split( ',')[0] if float(heterodimer_Tm) < float(Tm): primer_ID.append(str(ID)) primer.append(str(primer_seq)) primer_reverse_complement_ID.append( str(reverse_complement_ID)) primer_reverse_complement.append( str(reverse_complement_primer_seq)) amplicons_length.append(str(amplicon_length)) amplicon.append(reference_sequence[ position:reverse_complement_position]) primer_pairing_inner_result = [ primer_ID, primer, primer_reverse_complement_ID, primer_reverse_complement, amplicon, amplicons_length ] return primer_pairing_inner_result
def checkSetForHeterodimers(set_of_primer_sets, tm_max=40): all_primers = [] for bin_idx, primer_pair_idx in enumerate(set_of_primer_sets): primer_set_obj = combined_bins[bin_idx][primer_pair_idx] all_primers.append((bin_idx, primer_set_obj.d_primer)) all_primers.append((bin_idx, primer_set_obj.w_primer)) all_primers.append((bin_idx, primer_set_obj.c_primer)) for p1, p2 in itertools.combinations(all_primers, 2): if (primer3.calcHeterodimer(p1[1].seq, p2[1].seq, ** thermo_params).tm > tm_max): return (False, p1[0], p2[0]) return (True, )
def analyze_heterostructures(primers, structures, heterodimer_threshold=-5000, mv_conc=None, dv_conc=None): for primer1, primer2 in itertools.combinations(primers, 2): # At least one primer needs to be shorter than 60 bp. if len(primer1) > 60 and len(primer2) > 60: logging.warning( f"Primers {primer1.name} and {primer2.name} are too long (> 60 bp) for analysis of " f"heterostructures. ") continue keyword_args = { "mv_conc": mv_conc, "dv_conc": dv_conc, "dna_conc": max(primer1.conc, primer2.conc), } heterodimer = primer3.calcHeterodimer(primer1.seq, primer2.seq, **keyword_args) if heterodimer.structure_found and heterodimer.dg < heterodimer_threshold: structures.add_struture( f"{primer1.name} and {primer2.name}", "Heterodimer", round(heterodimer.dg, 1), round(heterodimer.tm, 1), ) heterodimer2 = primer3.calcHeterodimer(primer1.seq, primer2.revcomp, **keyword_args) if heterodimer2.structure_found and heterodimer2.dg < heterodimer_threshold: structures.add_struture( f"{primer1.name} and {primer2.name}'", "Heterodimer", round(heterodimer2.dg, 1), round(heterodimer2.tm, 1), )
def process(configd, desc_d, name, d_pos): l_f = sorted(set(d_pos['f'])) l_r = sorted(set(d_pos['r'])) a_f = numpy.array(l_f) a_r = numpy.array(l_r) range_st = range(configd['LEN_LOWER'], configd['LEN_UPPER']) for i, n in enumerate(a_f): f_name = '{}__{}'.format(name, n) f_d = desc_d['f'][f_name] f = f_d['seq'] for j, m in enumerate(l_r): diff = m - n if diff < 0: continue elif diff in range_st: r_name = '{}__{}'.format(name, m) r_d = desc_d['r'][r_name] r = r_d['seq'] # desc e.g: #contiglen__184765;di__'f';tm__56.9135107847;gc__0.6 if configd['SS']: # check heterodimer hetero = primer3.calcHeterodimer( f.encode('utf-8'), r.encode('utf-8'), ) if hetero.dg < configd['DI_DG_LIMIT']: continue # forward, f_tm, f_gc, reverse, r_tm, r_gc mes = ('{}\t{}\t' '{}\t{}\t{}\t' '{}\t{:.1f}\t{:.2f}\t' '{}\t{:.1f}\t{:.2f}\t') print( mes.format( name, f_d['contiglen'], n, m, diff, f, float(f_d['tm']), float(f_d['gc']), r, float(r_d['tm']), float(r_d['gc']), )) # break when out of range since already sorted elif diff > configd['LEN_UPPER']: break
def thermo(self): settings = self._thermo_settings if self._thermo is None: if len(self.sequence) > 60: warning = "sequence length greater than 60. Thermo results are limited to 60bp." else: warning = "" self._thermo = { "hairpin": primer3.calcHairpin(self._safe_sequence, **settings), "homodimer": primer3.calcHomodimer(self._safe_sequence, **settings), "annealing": primer3.calcHeterodimer(self.anneal, rc(self.anneal), **settings), "sequence": primer3.calcHeterodimer(self._safe_sequence, rc(self._safe_sequence), **settings), "warning": warning, } return self._thermo
def _calc_thermal_parameters(self, seq1, seq2, seqtype): if seqtype == "Primer" or seqtype == "Product": mv, dv, dntp, dna, temp = self.primer_monovalent_cations, self.primer_divalent_cations, self.primer_dntps, self.primer_annealing_oligo, self.primer_annealing_temp else: mv, dv, dntp, dna, temp = self.probe_monovalent_cations, self.probe_divalent_cations, self.probe_dntps, self.probe_annealing_oligo, self.probe_annealing_temp try: tr = primer3.calcHeterodimer(str(seq1), str(seq2), mv_conc = mv, dv_conc = dv, dntp_conc = dntp, dna_conc = dna, temp_c = temp) deltaG = round(tr.dg / 1000, 2) Tm = round(tr.tm, 2) except: deltaG = "NaN" Tm = "NaN" return Tm, deltaG
def checkDimerEnd(primer1,primer2=None): if not primer2: primer2=primer1 primersDimer=round(primer3.calcHeterodimer(primer1, primer2, mv_conc=args.mvConc, dv_conc=args.dvConc).dg/1000,2) primer1End_rc=revComplement(primer1[-5:]) primer2End_rc=revComplement(primer2[-5:]) if (primersDimer<-3 and (primer1End_rc in primer2[1:] or primer2End_rc in primer1[1:])): return(str(primersDimer)+'*') return(primersDimer)
def calcDimer(seq1, seq2, dg_Threshold=8, mv_conc=50.0, dv_conc=1.5, dntp_conc=0.25, dna_conc=50.0, temp_c=37, max_loop=30): thermoresult = primer3.calcHeterodimer(seq1, seq2, mv_conc, dv_conc, dntp_conc, dna_conc, temp_c, max_loop) if math.fabs(thermoresult.dg / 1000) >= dg_Threshold: return thermoresult.tm, thermoresult.dg / 1000 else: return -1, -1
def calc_dimer(df): ''' Calculate the dimer dG between each probe and each other probe. Add the min dG to the dataframe, along with the index of the dimer partner. ''' df['index_num'] = df.index a = df[['index_num', 'sequence']].to_numpy() max_ints = [] for i in range(0, len(a)): l = [] for j in range(0, len(a)): #This includes both homodimers and heterodimers. l.append((primer3.calcHeterodimer(a[i][1], a[j][1], mv_conc = 300).dg/1000, a[j][0])) maxinteraction = sorted(l, key = lambda x: x[0])[0] max_ints.append(maxinteraction) dimer_dG = pd.DataFrame(max_ints, index = df.index) return dimer_dG
def judge_two_oligo(oligo_pair): oligo_1 = oligo_pair['oligo_1'] oligo_2 = oligo_pair['oligo_2'] min_Tm = min(oligo_1['Tm'], oligo_2['Tm'], oligo_pair['min_Tm']) Heterodimer = primer3.calcHeterodimer(oligo_1['seq'], oligo_2['seq'], output_structure=True) if Heterodimer.tm > min_Tm: return ([ oligo_1, oligo_2, round(Heterodimer.tm, 2), Heterodimer.ascii_structure ]) EndStability = primer3.bindings.calcEndStability(oligo_1['seq'], oligo_2['seq']) if EndStability.tm > min_Tm: return ([oligo_1, oligo_2, round(EndStability.tm, 2), '']) return None
def heterodimer(self, primer: str, other_primer: str) -> float: """ Cached heterodimer computation with Primer3 library. :param primer: [str] primer sequence :return: melting temperature """ if len(primer) > 0: key = primer + '_' + other_primer if self.cached: if key in self.cache_hetero: return self.cache_hetero[key] temp = calcHeterodimer(primer, other_primer, self.mv, self.dv, self.dntp).tm if self.cached: self.cache_hetero[key] = temp return temp else: return 0
def __init__(self, oligomer, target): self.oligomer = oligomer self.target = target thermo = calcHeterodimer(self.oligomer, self.target, output_structure=True) self.tm = thermo.tm self.dg = thermo.dg self.dh = thermo.dh self.ds = thermo.ds # Structure and structure lines are given in the primer3 format self.structure = thermo.ascii_structure self.structure_lines = thermo.ascii_structure_lines # Intended for use by getters, setters, and 'magic' properties self._formatted_structure_lines = None self._formatted_structure = None self._reduced_structure = None
def judge_two_site(site_pair): if global_var.stop_run is True: return [] dimers = [] for (rank_1, primers_1) in site_pair['site_1'].items(): for (rank_2, primers_2) in site_pair['site_2'].items(): for (seq_1, seq_2) in product([primers_1['seq_L'], primers_1['seq_R']], [primers_2['seq_L'], primers_2['seq_R']]): min_Tm = min(primers_1['min_Tm'], primers_2['min_Tm'])-site_pair['Tm_diff'] Heterodimer = primer3.calcHeterodimer(seq_1, seq_2, output_structure=True) if Heterodimer.tm > min_Tm: dimers.append([site_pair['id_1'], site_pair['id_2'], rank_1, rank_2, seq_1, seq_2, Heterodimer.tm, Heterodimer.ascii_structure]) break EndStability = primer3.bindings.calcEndStability(seq_1, seq_2) if EndStability.tm > min_Tm: dimers.append([site_pair['id_1'], site_pair['id_2'], rank_1, rank_2, seq_1, seq_2, EndStability.tm, '']) break return dimers
def test_primers_collection_example(): def create_new_primer(existing_primers): """Create a new primer based on the primers created so far""" problem = DnaOptimizationProblem( sequence=random_dna_sequence(length=20), constraints=[ AvoidHeterodimerization(existing_primers, tmax=3), AvoidPattern("3x3mer"), AvoidPattern("4xG"), ], objectives=[EnforceGCContent(target=0.6)], logger=None, ) problem.resolve_constraints() problem.optimize() return problem.sequence # MAIN LOOP, WHERE PRIMERS ARE CREATED ONE BY ONE existing_primers = [] for i in range(10): new_primer = create_new_primer(existing_primers) existing_primers.append(new_primer) print("PRIMERS GENERATED: \n\n%s\n" % "\n".join(existing_primers)) for sequence in existing_primers: assert "GGGG" not in sequence assert "CCCC" not in sequence max_tm = max( primer3.calcHeterodimer(seq1, seq2).tm for seq1, seq2 in itertools.combinations(existing_primers, 2)) assert max_tm < 3 gc_contents = [gc_content(p) for p in existing_primers] assert min(gc_contents) > 0.55 assert max(gc_contents) < 0.65
#### Reverse complement the primers and test for criterias for TSO compatibility by Primer3. Select only the primers with Tm > 50. Remove any primers with "CC" or "TTT". rc_pr_list = [] for i in full_list: if ("CC" not in rc(i) and "TTT" not in rc(i) and primer3.calcTm(rc(i)) > 50): l = OH + rc(i) rc_pr_list.append(l) #### Select the primers with least tendency to form heterodimers with TSO. dG > -3000 was chosen acccording to Fabio's DENV2 primer. dg_3000 = [] for i in rc_pr_list: result = primer3.calcHeterodimer(i, TSO) pin = primer3.calcHairpin(i) if result.dg > -3000: #print(i, result.tm,result.dg, primer3.calcTm(i[23:])) #Check for the formation of hairpins. #print(i, primer3.calcHairpin(i)) tttt = [ i, primer3.calcTm(i[23:]), result.tm, result.dg, pin.tm, pin.dg ] dg_3000.append(tttt) dg_3000 = pd.DataFrame(np.array(dg_3000), columns=[ "Primer", "Annealing Tm", "HeteroDimer Tm", "HeteroDimer dG", "Hairpin Tm", "Hairpin dG" ]) dg_3000.iloc[:, 1:] = dg_3000.iloc[:, 1:].astype(float).round(2)
def heterodimer_dg(seq1, seq2, mv_cation=0,primer_conc=0): dg = (primer3.calcHeterodimer(seq1, seq2,mv_conc=mv_cation, dv_conc=0, dntp_conc=0, dna_conc=primer_conc, temp_c=60, max_loop=30)).tm return float(("{0:.2f}".format(round(dg,2))))