def calc_characteristics(infile): print("Using sequence file", infile, "to calculate Tm and structure potential") seqs = list(SeqIO.parse(infile, "fasta")) chars = dict() for myseq in seqs: # Forward forward = str(myseq.seq) hairpinF = calcHairpin(forward) homoF = calcHomodimer(forward) #Reverse complement reverse = str(myseq.seq.reverse_complement()) hairpinR = calcHairpin(reverse) homoR = calcHomodimer(reverse) # print(hairpinF,'\n', homoF,'\n', hairpinR,'\n', homoR, '\n', calcTm(forward)) # Values to save tm = calcTm(forward) hairpinF = hairpinF.tm if hairpinF.structure_found else "NA" hairpinR = hairpinR.tm if hairpinR.structure_found else "NA" homoF = homoF.tm if homoF.structure_found else "NA" homoR = homoR.tm if homoR.structure_found else "NA" output = [ str(x) for x in [forward, tm, hairpinF, homoF, reverse, hairpinR, homoR] ] chars[myseq.id] = "\t" + "\t".join(output) # print(chars[myseq.id]) return chars
def primer_homodimer_check_by_primer3_inner(primer_ID, primers, Tm): primer_homodimer_satisfied_ID = [] primer_homodimer_satisfied = [] primer_homodimer_not_satisfied_ID = [] primer_homodimer_not_satisfied = [] for ID, primer in zip(primer_ID, primers): # homodimer检查与Tm计算 homodimer = primer3.calcHomodimer((str(primer)), mv_conc=50.0, dv_conc=3, dna_conc=200, temp_c=25) homodimer_Tm = str(homodimer).split('tm=')[1].split(',')[ 0] # Tm值提取 if float(homodimer_Tm) <= float(Tm): primer_homodimer_satisfied_ID.append(ID) primer_homodimer_satisfied.append(primer) else: primer_homodimer_not_satisfied_ID.append( str(ID) + "@HoD_" + str(homodimer_Tm)) primer_homodimer_not_satisfied.append(primer) primer_homodimer_check_by_primer3_inner_result = [ primer_homodimer_satisfied_ID, primer_homodimer_satisfied, primer_homodimer_not_satisfied_ID, primer_homodimer_not_satisfied ] return primer_homodimer_check_by_primer3_inner_result
def primer3_check_homodimer(primer): thermoResult = primer3.calcHomodimer(primer) dg = thermoResult.dg / 1000 if (dg < -10): return False else: return True
def _find_self_binding_ranges(self) -> List[Tuple[int, int, float]]: """ Find segments of the gene which can form a hairpin or homodimer for given Tm. :return: A list of triples (from_offset, to_offset, Tm), each representing a start/end and mel;ting temperature for a hairpin or homodimer region. """ hairpin_lengths = [5, 10] # Lengths of DNA segments evaluated for possible hairpins homodimer_lengths = [25, 50] homodimer_offset_step = 10 # step, in bp, for the start for potential homodimers tested monovalent_conc = self.config.temperature_config.k divalent_conc = self.config.temperature_config.mg dntp_conc = self.config.temperature_config.dntp hairpin_segments = [] for length in hairpin_lengths: for start in range(0, self.gene_length - length): end = start + length - 1 thermo_result = calcHairpin(self.gene[start:end], monovalent_conc, divalent_conc, dntp_conc) if self._is_new_hairpin_or_homodimer(start, end, thermo_result, hairpin_segments): hairpin_segments.append((start, end, thermo_result.tm)) homodimer_segments = [] for length in homodimer_lengths: for start in range(0, self.gene_length - length, homodimer_offset_step): end = start + length - 1 thermo_result = calcHomodimer(self.gene[start:end], monovalent_conc, divalent_conc, dntp_conc) if thermo_result.structure_found: if self._is_new_hairpin_or_homodimer(start, end, thermo_result, hairpin_segments): homodimer_segments.append((start, end, thermo_result.tm)) return hairpin_segments + homodimer_segments
def pcalcHomodimer(seq): res = primer3.calcHomodimer(seq) dg = "{0:.2f}".format(res.dg) tm = "{0:.2f}".format(res.tm) if res.structure_found == True and float(dg) < -1: return "{0}{1}".format("Self_Dimer:", "+:tm:" + tm + ";deltaG:" + dg) else: return ""
def homodimer_Tm(primer_sequence, mv_cation=0, primer_conc=0): Tm_homodimer = (primer3.calcHomodimer(primer_sequence, mv_conc=mv_cation, dv_conc=0, dntp_conc=0, dna_conc=primer_conc, temp_c=37, max_loop=30)).tm return ("{0:.2f}".format(round(Tm_homodimer, 2)))
def find_structures(cls, folder, seq1, seq2=None, sodium=0.05, magnesium=0.0, temperature=25, concentration=0.00000025, **kwargs): """ Should return the list of 'Structure' objects with delta-G, deltaH, deltaS, and Tm values. Accepts 1 or 2 input sequences. Automatically runs either: * Hairpin (1 input sequence: A=seq1, UNAFold run on A) * Homodimer (2 identical input sequences: A=seq1=seq2, UNAFold run on A & A) * Heterodimer (2 input sequences: A=seq1 B=seq2, UNAFold run on A & B) """ import primer3 mv_conc = sodium * 1000 # 50.0 # in mM dv_conc = magnesium * 1000 # 0.0 # in mM dntp_conc = 0.6 # in mM dna_conc = concentration * 1000 * 1000 * 1000 # 250.0 # in nM #temperature = 25 # keep as-is if (seq1 == seq2): # Homodimer calculation t = primer3.calcHomodimer(seq1, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) elif (seq2 == None): # Hairpin calculation t = primer3.calcHairpin(seq1, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) else: # Heterodimer calculation, Tm calculation [seq1, rc(seq1)] t = primer3.calcHeterodimer(seq1, seq2, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) if t.structure_found: s = Structure(seq1, seq2, t.dg / 1000, t.dh / 1000, t.ds, t.tm, sodium, magnesium, temperature, concentration) else: s = Structure(seq1, seq2, math.inf, math.inf, math.inf, math.inf, sodium, magnesium, temperature, concentration) return [s]
def __init__(self, direction, name, seq): self.direction = direction self.name = name self.seq = seq self.tm = calcTm(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6) self.homodimer = calcHomodimer(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6).tm self.hairpin = calcHairpin(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6).tm self.gc = 100.0 * (seq.count('G') + seq.count('C')) / len(seq)
def __init__(self, oligomer): self.oligomer = oligomer thermo = calcHomodimer(self.oligomer, output_structure=True) self.tm = thermo.tm self.dg = thermo.dg self.dh = thermo.dh self.ds = thermo.ds # Structure and structure lines are given in the primer3 format self.structure = thermo.ascii_structure self.structure_lines = thermo.ascii_structure_lines self._formatted_structure_lines = None self._formatted_structure = None
def homodimer(self, primer: str) -> float: """ Cached homodimer computation with Primer3 library. :param primer: [str] primer sequence :return: melting temperature """ if len(primer) > 0: if self.cached: if primer in self.cache_homo: return self.cache_homo[primer] temp = calcHomodimer(primer, self.mv, self.dv, self.dntp).tm if self.cached: self.cache_homo[primer] = temp return temp else: return 0
def structure_filter(df, hairpin_min, dimer_min, Na_conc, filter=True): ''' Use primer3 to calculate energy of hairpin structure. https://libnano.github.io/primer3-py/quickstart.html#thermodynamic-analysis ''' df['hairpin_dG'] = df['sequence'].apply( lambda x: primer3.calcHairpin(x, mv_conc=Na_conc).dg / 1000) df['homodimer_dG'] = df['sequence'].apply( lambda x: primer3.calcHomodimer(x, mv_conc=Na_conc).dg / 1000) df['passed_structure'] = (df['hairpin_dG'] >= hairpin_min) & (df['homodimer_dG'] >= dimer_min) if filter == True: df = df[df['passed_structure']].copy() return df
def expanded_primer_stats(degen, oligo_DNA=50, dNTPs=0.2, salt_monovalent=50, salt_divalent=1.5): """ Calculating per-non-degen-primer stats (Tm) and averaging """ logging.info('Calculating stats on primer sets...') if degen is None: return None for num in degen.keys(): for cat in degen[num].keys(): for degen_seq in degen[num][cat].keys(): stats = {'Tm': [], 'GC': [], 'hairpin': [], 'homodimer': []} # stats on each expanded primer for seq in list(degen[num][cat][degen_seq]['expanded']): # degeneracies # melting temp stats['Tm'].append( primer3.calcTm(seq, dna_conc=oligo_DNA, dntp_conc=dNTPs, mv_conc=salt_monovalent, dv_conc=salt_divalent)) # GC stats['GC'].append(calc_GC(seq)) # hairpin stats['hairpin'].append( primer3.calcHairpin(seq, dna_conc=oligo_DNA, dntp_conc=dNTPs, mv_conc=salt_monovalent, dv_conc=salt_divalent).tm) # homodimer stats['homodimer'].append( primer3.calcHomodimer(seq, dna_conc=oligo_DNA, dntp_conc=dNTPs, mv_conc=salt_monovalent, dv_conc=salt_divalent).tm) # summarizing stats (average & std) for k, v in stats.items(): degen[num][cat][degen_seq][k] = [avg(v), sd(v)] return degen
def write_probe_map_counts(self, fn): """Write number of sequences mapped by each probe to a file. Args: fn: path to file to write to """ with open(fn, 'w') as f: header = [ "Probe identifier", "Probe sequence", "Number sequences mapped to", "Melt temp", "Homodimer Tm", "Hairpin Tm" ] f.write('\t'.join(header) + '\n') # Create an row for every probe for p, count in self.probe_map_counts.items(): row = [ p.identifier(), p.seq_str, count, # primer3 ionic concentrations from Primal Scheme round( primer3.calcTm(p.seq_str, mv_conc=50, dv_conc=1.5, dntp_conc=0.6), 2), round( primer3.calcHomodimer(p.seq_str, mv_conc=50, dv_conc=1.5, dntp_conc=0.6).tm, 2), round( primer3.calcHairpin(p.seq_str, mv_conc=50, dv_conc=1.5, dntp_conc=0.6).tm, 2) ] line = '\t'.join([str(x) for x in row]) f.write(line + '\n')
def analyze_homostructures( primers, structures, homodimer_threshold=-5000, hairpin_threshold=-1000, mv_conc=None, dv_conc=None, ): for primer in primers: checked_seq = primer.seq if len(primer) > 60: logger.warning( f"Primer {primer.name} is too long ({len(primer)}bp > 60 bp) for analysis of " f"homostructures. ") logger.info( f"Trimming primer to 60 bp by removing bases from 5'-end") checked_seq = primer.seq[-60:] keyword_args = { "mv_conc": mv_conc, "dv_conc": dv_conc, "dna_conc": primer.conc } hairpin = primer3.calcHairpin(checked_seq, **keyword_args) if hairpin.structure_found and hairpin.dg < hairpin_threshold: structures.add_struture(primer.name, "Hairpin", round(hairpin.dg, 1), round(hairpin.tm, 1)) homodimer = primer3.calcHomodimer(checked_seq, **keyword_args) if homodimer.structure_found and homodimer.dg < homodimer_threshold: structures.add_struture( primer.name, "Self-dimer", round(homodimer.dg, 1), round(homodimer.tm, 1), )
def thermo(self): settings = self._thermo_settings if self._thermo is None: if len(self.sequence) > 60: warning = "sequence length greater than 60. Thermo results are limited to 60bp." else: warning = "" self._thermo = { "hairpin": primer3.calcHairpin(self._safe_sequence, **settings), "homodimer": primer3.calcHomodimer(self._safe_sequence, **settings), "annealing": primer3.calcHeterodimer(self.anneal, rc(self.anneal), **settings), "sequence": primer3.calcHeterodimer(self._safe_sequence, rc(self._safe_sequence), **settings), "warning": warning, } return self._thermo
def check(self, seq): aseq = str(seq).upper() p3rlt = primer3.calcHomodimer(aseq, output_structure=True) rtn = p3rlt.tm < self.max_tm return rtn
def main(): if len(sys.argv) != 4: mes = '*** python {} size <check-kmer-distance-py-output> <contigs.fa>' print(mes.format(os.path.basename(sys.argv[0])), file=sys.stderr) sys.exit(1) size = int(sys.argv[1]) infile = sys.argv[2] contigf = sys.argv[3] d = parse_kmer_distance(infile) print(('#contig_name\tcontig_len\tf_start\tr_start\tf_seq\tf_tm\tf_gc\t' 'r_seq\tr_tm\tr_gc\tta\tamp_size')) pair_pass = 0 for rec in screed.open(contigf): name = rec.name if not name in d: continue seq = rec.sequence for f_p, r_p in d[name]: assert len(seq) > r_p, '*** seq length < forward primer position' f = seq[f_p:(f_p + size)] r = RC(seq[r_p:(r_p + size)]) # primer3 functions only accept byte-strings f = f.encode('utf-8') #f = bytes(f, 'utf-8') r = r.encode('utf-8') #r = bytes(r, 'utf-8') if has_ambiguous(f) or has_ambiguous(r): continue # check tm f_tm = primer3.calcTm(f) if f_tm < TM_LOWER or f_tm > TM_UPPER: continue r_tm = primer3.calcTm(r) if r_tm < TM_LOWER or r_tm > TM_UPPER: continue if abs(f_tm - r_tm) > TM_DIFF_MAX: continue # check gc f_gc = check_GC(f) if f_gc < GC_LOWER or f_gc > GC_UPPER: continue r_gc = check_GC(r) if r_gc < GC_LOWER or r_gc > GC_UPPER: continue amp = seq[f_p:(r_p + size)].encode('utf-8') amp_tm = primer3.calcTm(amp) ta = 0.3 * min(f_tm, r_tm) + 0.7 * amp_tm - 14.9 # premierbiosoft #ta = 0.3*min(f_tm,r_tm) + 0.7*amp_tm - 25 # IDT recommendation ### thermodynamics check ### skipping here as loose filter # check hairpin and homodimer if SS: f_hp = primer3.calcHairpin(f) f_ho = primer3.calcHomodimer(f) if f_hp.dg < HP_DG_LIMIT or f_hp.dg > 0: continue if f_hp.tm > ta: continue if f_ho.dg < DI_DG_LIMIT or f_ho.dg > 0: #print('+++++>', f_ho.dg) continue r_hp = primer3.calcHairpin(r) r_ho = primer3.calcHomodimer(r) if r_hp.dg < HP_DG_LIMIT or r_ho.dg > 0: continue if r_hp.tm > ta: continue if r_ho.dg < DI_DG_LIMIT or r_ho.dg > 0: #print('=====>', r_ho.dg) continue # check heterodimer hetero = primer3.calcHeterodimer(f, r) if hetero.dg < DI_DG_LIMIT: continue pair_pass += 1 # forward, f_tm, f_gc, reverse, r_tm, r_gc, ta, amp_size mes = ('{}\t{}\t{}\t{}\t{}\t{:.1f}\t{:.2f}\t{}\t{:.1f}\t' '{:.2f}\t{}\t{}') print( mes.format(name, len(seq), f_p, r_p, f, f_tm, f_gc, r, r_tm, r_gc, ta, len(amp))) print('*** Pairs passed: {}'.format(pair_pass), file=sys.stderr)
def main(): if len(sys.argv) != 3: mes = '*** Usage: python {} params.config file.uniq2ref.primer' print( mes.format(os.path.basename(sys.argv[0])), file=sys.stderr, ) sys.exit(1) configf = sys.argv[1] primerfile = sys.argv[2] d = yaml.load(open(configf)) pass_cnt = 0 cnt = 0 for rec in screed.open(primerfile): cnt += 1 _name = rec.name name, _contig = _name.split(None, 1) contig_len = _contig.split('__', 1)[1] seq = rec.sequence # primer3 functions only accept byte-strings seq = seq.encode('utf-8') #seq = bytes(seq, 'utf-8') seq_rc = RC(seq) a_ambi = numpy.array(has_ambiguous(seq), has_ambiguous(seq_rc)) if sum(a_ambi) == 2: continue # check tm tm = primer3.calcTm(seq) tm_rc = primer3.calcTm(seq_rc) a_tm = numpy.array( (tm < d['TM_LOWER'] or tm > d['TM_UPPER']), (tm_rc < d['TM_LOWER'] or tm_rc > d['TM_UPPER']), ) if sum(a_tm) == 2: continue # check gc gc = check_gc(seq) gc_rc = check_gc(seq_rc) a_gc = numpy.array( (gc < d['GC_LOWER'] or gc > d['GC_UPPER']), (gc_rc < d['GC_LOWER'] or gc_rc > d['GC_UPPER']), ) if sum(a_gc) == 2: continue if d['GC_CLAMP']: c = end_gc_count(seq) c_rc = end_gc_count(seq_rc) a_endgc = numpy.array( c > 3 or c < 1, c_rc > 3 or c_rc < 1, ) if sum(a_endgc) == 2: continue if d['SS']: hp = primer3.calcHairpin(seq) ho = primer3.calcHomodimer(seq) hp_rc = primer3.calcHairpin(seq_rc) ho_rc = primer3.calcHomodimer(seq_rc) orig_pass = ((hp.dg < d['HP_DG_LIMIT'] or hp.dg > 0) & (ho.dg < d['DI_DG_LIMIT'] or ho.dg > 0)) rc_pass = ((hp_rc.dg < d['HP_DG_LIMIT'] or hp_rc.dg > 0) & (ho_rc.dg < d['DI_DG_LIMIT'] or ho_rc.dg > 0)) if ho.dg < d['DI_DG_LIMIT'] or ho.dg > 0: continue pass_cnt += 1 mes = '>{} contiglen__{};tm__{};gc__{}\n{}' print(mes.format(name, contig_len, tm, gc, seq), file=sys.stdout) if cnt == 0: mes = '*** Empty file detected: {} (file.uniq2ref.primer), skipping..' print( mes.format(os.path.basename(primerfile)), file=sys.stderr, ) sys.exit(0)
#print(i, primer3.calcHairpin(i)) tttt = [ i, primer3.calcTm(i[23:]), result.tm, result.dg, pin.tm, pin.dg ] dg_3000.append(tttt) dg_3000 = pd.DataFrame(np.array(dg_3000), columns=[ "Primer", "Annealing Tm", "HeteroDimer Tm", "HeteroDimer dG", "Hairpin Tm", "Hairpin dG" ]) dg_3000.iloc[:, 1:] = dg_3000.iloc[:, 1:].astype(float).round(2) print(dg_3000) # Create a Pandas Excel writer using XlsxWriter as the engine. writer = pd.ExcelWriter('dv4_primers.xlsx', engine='xlsxwriter') # Convert the dataframe to an XlsxWriter Excel object. dg_3000.to_excel(writer, sheet_name='Sheet1') # Close the Pandas Excel writer and output the Excel file. writer.save() ##### DENV primers used in eLife paper for comparison. DENV2 = 'AAGCAGTGGTATCAACGCAGAGTACGAACCTGTTGATTCAACAGC' print(DENV2, 'Heterodimer', primer3.calcHeterodimer(DENV2, TSO)) print(DENV2, 'Homodimer', primer3.calcHomodimer(DENV2)) print(DENV2, 'Hairpin', primer3.calcHairpin(DENV2)) print(DENV2, 'Tm', primer3.calcTm(DENV2[23:]))
def homodimer_Tm(primer_sequence, mv_cation=0,primer_conc=0): Tm_homodimer = (primer3.calcHomodimer(primer_sequence,mv_conc=mv_cation, dv_conc=0, dntp_conc=0, dna_conc=primer_conc, temp_c=37, max_loop=30)).tm return ("{0:.2f}".format(round(Tm_homodimer,2)))
long_seq_list.append(seq_record.id) idx = 1 short = 0 for seq_probe in probe_list: probe = (prefix_nt * poly_nt) + str(seq_probe) probe_length = len(probe) #Tm_ori = primer3.calcTm(probe) #Tm_ori2 = ("%.2f" % Tm_ori) #GC_percent_ori = GC(probe) #GC_per_2 = ("%.2f" % GC_percent_ori) #print(seq_record.id + "_" + str(idx) + "\t" + probe + "\t" + str(GC_per_2) + "\t" + str(Tm_ori2) + "\t", end = '') #if(probe_length <=40): # short = short+1 # else: # print("\n") if (probe_length <= 60): Tm = primer3.calcTm(probe) Tm2 = ("%.2f" % Tm) GC_percent = GC(probe) GC_per = ("%.2f" % GC_percent) Homodimer = primer3.calcHomodimer(probe) Hairpin = primer3.calcHairpin(probe) print(seq_record.id + "_" + str(idx) + "\t" + probe + "\t" + "\t" + str(GC_per) + "\t" + str(Tm2) + "\t" + str(Hairpin.structure_found) + "\t" + str(Homodimer.structure_found)) idx = idx + 1 output.close()
def main(): if len(sys.argv) != 3: mes = '*** Usage: python {} params.config file.uniq2ref.primer' print( mes.format(os.path.basename(sys.argv[0])), file=sys.stderr, ) sys.exit(1) configf = sys.argv[1] primerfile = sys.argv[2] d = yaml.load(open(configf)) pass_cnt = 0 total_cnt = 0 for rec in screed.open(primerfile): total_cnt += 1 _name = rec.name name, _contig = _name.split(None, 1) contig_len = _contig.split('__', 1)[1] seq = rec.sequence seq_rc = RC(seq) # primer3 functions only accept byte-strings seq = seq.encode('utf-8') seq_rc = seq_rc.encode('utf-8') #seq = bytes(seq, 'utf-8') trig = False for di, seq in zip(('f', 'r'), (seq, seq_rc)): if has_ambiguous(seq): continue # check tm tm = primer3.calcTm(seq) if tm < d['TM_LOWER'] or tm > d['TM_UPPER']: continue # check gc gc = check_gc(seq) if gc < d['GC_LOWER'] or gc > d['GC_UPPER']: continue if d['GC_CLAMP']: cnt = end_gc_count(seq) if cnt > 3 or cnt < 1: continue if d['SS']: hp = primer3.calcHairpin(seq) ho = primer3.calcHomodimer(seq) if hp.dg < d['HP_DG_LIMIT'] or hp.dg > 0: continue if ho.dg < d['DI_DG_LIMIT'] or ho.dg > 0: continue trig = True mes = '>{}__{} contiglen__{};di__{};tm__{};gc__{}\n{}' print( mes.format(name, di, contig_len, di, tm, gc, seq), file=sys.stdout, ) if trig: pass_cnt += 1 mes = '*** # of primers (at least one direction) passed filter: {}' print(mes.format(pass_cnt), file=sys.stderr) if total_cnt == 0: mes = ('*** Empty file detected: {} (file.uniq2ref.primer), ' 'skipping..') print( mes.format(os.path.basename(primerfile)), file=sys.stderr, ) sys.exit(0)
def __call__(self, primer_seq: str) -> SelfBindingTemps: hairpin_tm = calcHairpin(primer_seq, self.mv, self.dv, self.dntp).tm homodimer_tm = calcHomodimer(primer_seq, self.mv, self.dv, self.dntp).tm return SelfBindingTemps(hairpin_tm, homodimer_tm)
def HomodimerTm(primer, mv_cation=0, primer_conc=0): tm_homodimer = (primer3.calcHomodimer(primer,mv_conc=mv_cation, \ dv_conc=0, dntp_conc=0, dna_conc=primer_conc, temp_c=37, max_loop=30)).tm return ("{0:.2f}".format((round(tm_homodimer, 2))))
def run_filter( input: str, output: str, repeats_threshold: int, gc_upper_threshold: float, gc_lower_threshold: float, gc_clamp: bool, gc_uniformity_threshold: float, hairpin_threshold: int, homodimer_threshold: int, dna_conc: float, mv_conc: float, dv_conc: float, ): summary = Counter() struct_configs = { "mv_conc": mv_conc, "dv_conc": dv_conc, "dna_conc": dna_conc, } with smart_open(input, mode="r") as infile, smart_open(output, mode="w") as outfile: for entry in tqdm(SeqIO.parse(infile, "fasta"), desc="Parsing primers"): summary["Primers reads"] += 1 primer = Primer(name=entry.name, seq=str(entry.seq)) if not (gc_lower_threshold <= primer.gc <= gc_upper_threshold): summary["Out of GC-range"] += 1 continue if primer.has_repeats(threshold=repeats_threshold): summary["Repeats"] += 1 continue if gc_clamp and not primer.has_gc_clamp(): summary["No GC-clamp"] += 1 continue if primer.has_uniform_gc(window=5, threshold=gc_uniformity_threshold): summary["Non-uniform GC rate"] += 1 continue hairpin = primer3.calcHairpin(primer.seq, **struct_configs) if hairpin.structure_found and hairpin.dg < hairpin_threshold: summary["Hairpin"] += 1 continue homodimer = primer3.calcHomodimer(primer.seq, **struct_configs) if homodimer.structure_found and homodimer.dg < homodimer_threshold: summary["Homodimer"] += 1 continue summary["Primers written"] += 1 SeqIO.write(entry, outfile, format="fasta") print_stats(summary)
import primer3 import time start_time = time.time() regionID = "SM_V7_1_r0" left = "CCCCCAAACACAACATCGTC" right = "ACATGACGTTTTGGGACAGT" cross = primer3.calcHeterodimer(left, right) print(cross.dg) self1 = primer3.calcHomodimer(left) print(self1.dg) self2 = primer3.calcHomodimer(right) print(self2.dg) print("Runtime: " + str(round(time.time() - start_time, 3)) + " s")
def scan_sequence(self, seq, primer_size=(18, 26), amplicon_size=(50, 60)): if (__name__ == "__main__"): from oligo import Primer, PrimerPair else: from .oligo import Primer, PrimerPair import primer3 number_records = 20 mv_conc = 50.0 # in mM dv_conc = 0.0 # in mM dntp_conc = 0.6 # in mM dna_conc = 250.0 # in nM temperature = 25 seq_args = {'SEQUENCE_ID': 'TEST', 'SEQUENCE_TEMPLATE': seq} global_args = { # Parameters for design #'PRIMER_TASK': 'generic', # generic # check_primers # pick_primer_list # pick_sequencing_primers # pick_cloning_primers # pick_discriminative_primers 'PRIMER_PICK_LEFT_PRIMER': 1, 'PRIMER_PICK_INTERNAL_OLIGO': 0, 'PRIMER_PICK_RIGHT_PRIMER': 1, 'PRIMER_NUM_RETURN': number_records, # in output records 'PRIMER_THERMODYNAMIC_OLIGO_ALIGNMENT': 1, # Parameters for LEFT/RIGHT oligos 'PRIMER_OPT_SIZE': 20, # in nt 'PRIMER_MIN_SIZE': primer_size[0], # in nt 'PRIMER_MAX_SIZE': primer_size[1], # in nt 'PRIMER_MAX_POLY_X': 4, # in nt 'PRIMER_MAX_NS_ACCEPTED': 0, # in nt 'PRIMER_MAX_END_GC': 3, # in nt 'PRIMER_GC_CLAMP': 1, # in nt (1 or more trailing G or C nt) 'PRIMER_PRODUCT_OPT_SIZE': 0, # in nt (0 means don't prefer any one size) #'PRIMER_PRODUCT_SIZE_RANGE': [(31,40),(41,50),(51,60),(61,70)], # in nt 'PRIMER_PRODUCT_SIZE_RANGE': [amplicon_size], # in nt 'PRIMER_OPT_TM': 60.0, # in degrees C 'PRIMER_MIN_TM': 55.0, # in degrees C 'PRIMER_MAX_TM': 65.0, # in degrees C 'PRIMER_PAIR_MAX_DIFF_TM': 2.0, # in degrees C 'PRIMER_OPT_GC_PERCENT': 50.0, # in percent 'PRIMER_MIN_GC': 40.0, # in percent 'PRIMER_MAX_GC': 60.0, # in percent 'PRIMER_SALT_MONOVALENT': mv_conc, # in mM 'PRIMER_SALT_DIVALENT': dv_conc, # in mM 'PRIMER_DNA_CONC': dna_conc, # in nM (Not the concentration of oligos in the reaction mix but of those annealing to template.) 'PRIMER_DNTP_CONC': dntp_conc, # in mM 'PRIMER_MAX_SELF_ANY': 8, # alignment score 'PRIMER_MAX_SELF_END': 3, # alignment score 'PRIMER_PAIR_MAX_COMPL_ANY': 8, # alignment score 'PRIMER_PAIR_MAX_COMPL_END': 3, # alignment score 'PRIMER_MAX_SELF_ANY_TH': 45.0, # degrees C 'PRIMER_MAX_SELF_END_TH': 35.0, # degrees C 'PRIMER_PAIR_MAX_COMPL_ANY_TH': 45.0, # degrees C 'PRIMER_PAIR_MAX_COMPL_END_TH': 35.0, # degrees C 'PRIMER_MAX_HAIRPIN_TH': 35.0, # degrees C # parameters for INTERNAL oligos 'PRIMER_INTERNAL_OPT_SIZE': 20, # in nt 'PRIMER_INTERNAL_MIN_SIZE': primer_size[0], # in nt 'PRIMER_INTERNAL_MAX_SIZE': primer_size[1], # in nt 'PRIMER_INTERNAL_MAX_POLY_X': 4, # in nt 'PRIMER_INTERNAL_MAX_NS_ACCEPTED': 0, # in nt 'PRIMER_INTERNAL_OPT_TM': 60.0, # in degrees C 'PRIMER_INTERNAL_MIN_TM': 55.0, # in degrees C 'PRIMER_INTERNAL_MAX_TM': 65.0, # in degrees C 'PRIMER_INTERNAL_OPT_GC_PERCENT': 50.0, # in percent 'PRIMER_INTERNAL_MIN_GC': 40.0, # in percent 'PRIMER_INTERNAL_MAX_GC': 60.0, # in percent 'PRIMER_INTERNAL_SALT_MONOVALENT': mv_conc, # in mM 'PRIMER_INTERNAL_SALT_DIVALENT ': dv_conc, # in mM 'PRIMER_INTERNAL_DNTP_CONC': dntp_conc, # in mM 'PRIMER_INTERNAL_DNA_CONC': dna_conc, # in nM 'PRIMER_INTERNAL_MAX_SELF_ANY': 8, # alignment score 'PRIMER_INTERNAL_MAX_SELF_END': 3, # alignment score 'PRIMER_INTERNAL_MAX_SELF_ANY_TH': 45.0, # degrees C 'PRIMER_INTERNAL_MAX_SELF_END_TH': 35.0, # degrees C 'PRIMER_INTERNAL_MAX_HAIRPIN_TH': 24.0, # degrees C } primers = primer3.bindings.designPrimers(seq_args, global_args) records = [] found_records = primers['PRIMER_PAIR_NUM_RETURNED'] for i in range(min(found_records, number_records)): n = str(i) headers = [ '^PRIMER_PAIR_' + n + '_', '^PRIMER_LEFT_' + n + '_', '^PRIMER_RIGHT_' + n + '_' ] rr = '|'.join(headers) records.append({}) for p in primers: m = regex.search(rr, p) if m: records[-1][p] = primers[p] #print(p, primers[p]) outputs = [] for i, r in enumerate(records): n = str(i) prefix = 'PRIMER_LEFT_' + n + '_' p_seq = r[prefix + 'SEQUENCE'] left_seq = p_seq p_pos = seq.find(p_seq) # 0-based indexing p_hairpin = primer3.calcHairpin(p_seq, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_homodimer = primer3.calcHomodimer(p_seq, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_rc = primer3.calcHeterodimer(p_seq, rc(p_seq), mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_gc = (p_seq.count('C') + p_seq.count('G')) / len(p_seq) o_left = Primer(p_seq, p_pos, '+', p_hairpin, p_homodimer, p_rc, p_gc) prefix = 'PRIMER_RIGHT_' + n + '_' p_seq = r[prefix + 'SEQUENCE'] right_seq = p_seq p_pos = seq.find(rc(p_seq)) # 0-based indexing p_hairpin = primer3.calcHairpin(p_seq, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_homodimer = primer3.calcHomodimer(p_seq, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_rc = primer3.calcHeterodimer(p_seq, rc(p_seq), mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_gc = (p_seq.count('C') + p_seq.count('G')) / len(p_seq) o_right = Primer(p_seq, p_pos, '-', p_hairpin, p_homodimer, p_rc, p_gc) p_heterodimer = primer3.calcHeterodimer(left_seq, right_seq, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) o_het = PrimerPair(o_left, o_right, p_heterodimer) # ThermoResult object: # dg deltaG (Gibbs free energy) of the structure (cal/mol) # dh deltaH (entropy) of the structure (cal/mol) # ds deltaS (enthalpy) of the structure (cal/K*mol) # structure_found Whether or not a structure (hairpin, dimer, etc) was found as a result of the calculation. # tm Melting temperature of the structure in deg. C #print(i, left_seq, right_seq, min([left_hairpin.dg, right_hairpin.dg, left_homodimer.dg, right_homodimer.dg, heterodimer.dg])) outputs.append(o_het) return outputs