def make_primers(query): ''' In case of user's input, make list of primer dicts similar as the result of design_primer module Input: query: a string in multi-lines ''' if global_var.stop_run is True: return {'error': 'Stop running'} primers = {} for line in query.splitlines(): if line.strip() == '': continue line_data = re.split(r'\s+', line.strip()) if len(line_data) == 4: (id, rank, seq_F, seq_R) = line_data rank = int(rank) elif len(line_data) == 3: (id, seq_F, seq_R) = line_data rank = 0 else: return { 'error': f'Your input: {line} does not have three or four columns' } # whether seq_F and seq_R are valid primer seqs if check_primer_seq(seq_F) is False: return { 'error': f'Your input: {seq_F} does not seem like a vaild primer seq' } if check_primer_seq(seq_R) is False: return { 'error': f'Your input: {seq_R} does not seem like a vaild primer seq' } # generate primers if id not in primers: primers[id] = {} primers[id]['PRIMER_PAIR_NUM_RETURNED'] = rank + 1 primers[id][f'PRIMER_PAIR_{rank}_PENALTY'] = 0 primers[id][f'PRIMER_LEFT_{rank}_SEQUENCE'] = seq_F primers[id][f'PRIMER_RIGHT_{rank}_SEQUENCE'] = seq_R primers[id][f'PRIMER_LEFT_{rank}'] = [-1, len(seq_F)] primers[id][f'PRIMER_RIGHT_{rank}'] = [-1, len(seq_R)] primers[id][f'PRIMER_LEFT_{rank}_TM'] = primer3.calcTm( transform_degenerate(seq_F)) primers[id][f'PRIMER_RIGHT_{rank}_TM'] = primer3.calcTm( transform_degenerate(seq_R)) primers[id][f'PRIMER_LEFT_{rank}_GC_PERCENT'] = calculate_GC( transform_degenerate(seq_F)) primers[id][f'PRIMER_RIGHT_{rank}_GC_PERCENT'] = calculate_GC( transform_degenerate(seq_R)) primers[id][f'PRIMER_PAIR_{rank}_PRODUCT_SIZE'] = -1 primers[id]['SEQUENCE_RELATIVE_TARGET_START'] = 0 return primers
def writePadlocksToCSV(padlock_results: Dict[str, List[PadHit]], filename: str): '''Write padlocks to to a CSV file ''' tp = P_PARAMS['thermo_params'] with io.open(filename, 'w') as fd: fd.write( 'gene_name, name0, name1, strand_dir, genome_idx, index, ' 'gap_size, sequence, barcode, right_arm, scaffold, ' 'left_arm, right_tm, left_tm\n') temp = '%s, %s, %s, %s, %d, %d, %d, %s, %s, %s, %s, %s, %2.3f, %2.3f\n' for gene, seq_list in padlock_results.items(): for seq_tuple in seq_list: seq_r, seq_l = seq_tuple.seq_r, seq_tuple.seq_l tm_tuple = (calcTm(seq_r, **tp), calcTm(seq_l, **tp)) fd.write(temp % ((gene,) + seq_tuple + tm_tuple) ) print('Wrote padlocks to %s' % filename)
def get_tm(sequence): return float( primer3.calcTm(sequence, mv_conc=monovalent, dv_conc=divalent, dntp_conc=dntps, dna_conc=dna))
def __init__(self, construct, start, end): """ Primers are specified by giving start and stop indices into another sequence. If the start index is less than the end index, the primer will be taken directly from the associated sequence. If the start index in greater than the end index, the primer will be taken to be the reverse complement of the associated sequence. """ assert start != end self._construct = construct self._start = start self._end = end if start < end: self._sequence = construct.dna[start:end] else: self._sequence = dna_reverse_complement(construct.dna[end:start]) self._melting_temp = primer3.calcTm(self._sequence, tm_method='breslauer') self._gc_content = sum(x in 'GC' for x in self._sequence) / len(self) left_gc_count = sum(x in 'GC' for x in self._sequence[:5]) right_gc_count = sum(x in 'GC' for x in self._sequence[-5:]) self.has_gc_clamp = \ (1 <= left_gc_count <= 3) and (1 <= right_gc_count <= 3)
def GetPrimers(n_ind, genom, len_primer, GC, Tm, gnum): """ Нахождение праймеров в геноме""" results = [] len_g = len(genom) for i in range(n_ind, len_g - len_primer): #нахождение праймеров if gnum == 1: primer = genom[i:i + len_primer] else: primer = genom[i:i + len_primer][::-1] primer_Tm = calcTm(primer) gc_count = (primer.count('G') + primer.count('C')) gc_count = gc_count / len_primer * 100 if (primer_Tm >= Tm[0] and primer_Tm <= Tm[1]) and (gc_count >= GC[0] and gc_count <= GC[1]): results.append([ i + len_primer - 1, [primer, len_primer, str(gc_count), str(primer_Tm)] ]) return results
def calc_characteristics(infile): print("Using sequence file", infile, "to calculate Tm and structure potential") seqs = list(SeqIO.parse(infile, "fasta")) chars = dict() for myseq in seqs: # Forward forward = str(myseq.seq) hairpinF = calcHairpin(forward) homoF = calcHomodimer(forward) #Reverse complement reverse = str(myseq.seq.reverse_complement()) hairpinR = calcHairpin(reverse) homoR = calcHomodimer(reverse) # print(hairpinF,'\n', homoF,'\n', hairpinR,'\n', homoR, '\n', calcTm(forward)) # Values to save tm = calcTm(forward) hairpinF = hairpinF.tm if hairpinF.structure_found else "NA" hairpinR = hairpinR.tm if hairpinR.structure_found else "NA" homoF = homoF.tm if homoF.structure_found else "NA" homoR = homoR.tm if homoR.structure_found else "NA" output = [ str(x) for x in [forward, tm, hairpinF, homoF, reverse, hairpinR, homoR] ] chars[myseq.id] = "\t" + "\t".join(output) # print(chars[myseq.id]) return chars
def writePadlocksToCSV(padlock_results: Dict[str, List[PadHit]], filename: str): '''Write padlocks to to a CSV file ''' tp = P_PARAMS['thermo_params'] with io.open(filename, 'w') as fd: fd.write('gene_name, name0, name1, strand_dir, genome_idx, index, ' 'gap_size, sequence, barcode, right_arm, scaffold, ' 'left_arm, right_tm, left_tm\n') temp = '%s, %s, %s, %s, %d, %d, %d, %s, %s, %s, %s, %s, %2.3f, %2.3f\n' for gene, seq_list in padlock_results.items(): for seq_tuple in seq_list: seq_r, seq_l = seq_tuple.seq_r, seq_tuple.seq_l tm_tuple = (calcTm(seq_r, **tp), calcTm(seq_l, **tp)) fd.write(temp % ((gene, ) + seq_tuple + tm_tuple)) print('Wrote padlocks to %s' % filename)
def __call__(self, primer: str) -> float: """ We created NEB like calculation method from here: According to the article: https://tmcalculator.neb.com/#!/help we offset our results by 3 and it yields similar results as default NEB calculator for Q5 product group + High Fidelity. :param primer: [str] primer sequence :return: melting temperature """ if len(primer) > 0: if self.cached: if primer in self.cache: return self.cache[primer] temp = calcTm( primer, dna_conc=(500 / 6) * 7, # primer is assumed 6x template mv_conc=(60 + 20), dv_conc=2, tm_method='santalucia', salt_corrections_method='owczarzy' ) + 3 # +3 because NEB documentation recommends it and it is fairly close temp = round(temp, self.precision) if self.cached: self.cache[primer] = temp return temp else: # TODO remove and let raise exception return -float("inf") raise ValueError("Cannot calculate temperature of empty primer")
def get_tm(sequence, mv, dv, dntps, dna): return float( primer3.calcTm(sequence, mv_conc=mv, dv_conc=dv, dntp_conc=dntps, dna_conc=dna))
def __init__(self, seq, start, direction, name="", penalty=0): super().__init__(seq, start, direction) self.name = name self.penalty = penalty self.identity = 0 self.tm = calcTm(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6) self.gc = 100.0 * (seq.count("G") + seq.count("C")) / len(seq) self.alignments = []
def check(self, seq): aseq = str(seq).upper() Tm = primer3.calcTm(aseq) if Tm < self.min_Tm or Tm > self.max_Tm: rtn = False else: rtn = True return rtn
def has_melting_temperature_between_50_and_55(location): """Return False if the 20-basepair segment around the location has a melting temperature outside 50-55 Celsius.""" if min(location, len(sequence) - location) < 20: return True subsequence = sequence[location - 10:location + 10] melting_temperature = primer3.calcTm(subsequence) return 50 < melting_temperature < 55
def melting_temp(self, seq): return (primer3.calcTm(seq, mv_conc=self.mv_conc, dv_conc=self.dv_conc, dntp_conc=self.dntp_conc, dna_conc=self.dna_conc, max_nn_length=MAX_NN_LENGTH, tm_method=PRIMER3_METHOD, salt_corrections_method=PRIMER3_METHOD))
def set_annealing_ta(self): """Attempts to set the optimal annealing temperature for the product and pair of primers """ product = self.get_product() # Optimal annealing temperature calculated with Rychlik et. al formula anneal_tm = ((0.3*self.unst_primer.Tm) + (0.7*calcTm(product))) - 14.9 self._annealing_ta = anneal_tm
def tmvalue(sequence): #tm = mt.Tm_NN(Seq(sequence),Na=50,Mg=0,dnac1=250,dnac2=250) if sequence.find("R") > -1: sequence = re.sub("R", "G", sequence) if sequence.find("Y") > -1: sequence = re.sub("Y", "C", sequence) tm = primer3.calcTm(sequence, tm_method="breslauer", salt_corrections_method="schildkraut") return int(tm * 100) / 100
def pick_primer_with_best_tm(seqs, tm): import primer3 seq_tms = [ # primer3 seems to produce garbage results if given lowercase # sequences. (seq, primer3.calcTm(seq.upper(), tm_method='breslauer')) for seq in seqs ] seq_tms.sort(key=lambda seq_tm: abs(seq_tm[1] - tm)) return seq_tms[0]
def createPrimers(self,db,bowtie='bowtie2', delete=True, tags={}, tmThreshold=50.0, endMatch=6, maxAln=20): # run bowtie (max 1000 alignments, allow for one gap/mismatch?) mapfile = self.file+'.sam' if not os.path.exists(mapfile): proc = subprocess.check_call( \ [bowtie, '-f', '--end-to-end', '-p 2', \ '-k '+str(maxAln), '-L 10', '-N 1', '-D 20', '-R 3', \ '-x', db, '-U', self.file, '>', mapfile ]) # Read fasta file (Create Primer) primers = {} with pysam.FastaFile(self.file) as fasta: for s in fasta.references: # parse target locus from fasta file try: primername, targetposition = s.split('|') reTargetposition = re.match(r'(\w+):(\d+)-(\d+):([+-])',targetposition) except: primername = s targetLocus = None else: # create stranded targetlocus reverse = True if reTargetposition.group(4)=='-' else False tm = primer3.calcTm(fasta.fetch(s)) # assume targetlocus is full match targetLocus = Locus(reTargetposition.group(1), int(reTargetposition.group(2)), int(reTargetposition.group(3))-int(reTargetposition.group(2)), reverse, tm) # create primer (with target locus) primertag = tags[primername] if primername in tags.keys() else None primers[primername] = Primer(primername,fasta.fetch(s),targetLocus,tag=primertag) # read SAM OUTPUT and filter alignments mappings = pysam.Samfile(mapfile,'r') alnCount = Counter() # count alignments to kill locations of non-specific primers (count == -k) for aln in mappings: primername = aln.qname.split('|')[0] if aln.is_unmapped: continue else: alnCount[primername] += 1 ## get reference sequence qry = aln.query_sequence.upper() ref = aln.get_reference_sequence().upper() refrc = ref.translate(revcmp)[::-1] aln_tm = primer3.calcHeterodimerTm(qry,refrc) # TmThreshold and mimatches in 3'end check if aln_tm > tmThreshold: if len(qry)>endMatch and len(ref)>endMatch: if len([ x for x in zip(qry[-endMatch:], ref[-endMatch:]) if x[0]!=x[1] ]) == 0: primers[primername].addTarget(mappings.getrname(aln.reference_id), aln.pos, aln.is_reverse, aln_tm) # remove primer locations for those that have hit maximum for k, v in primers.items(): if len(v.loci) >= maxAln: v.loci = [] # cleanup if delete: os.unlink(self.file+'.sam') # delete mapping FILE return primers.values()
def _calc_Tm(self, seq, seqtype): if seqtype == "Primer" or seqtype == "Product": mv, dv, dntp, dna = self.primer_monovalent_cations, self.primer_divalent_cations, self.primer_dntps, self.primer_annealing_oligo else: mv, dv, dntp, dna = self.probe_monovalent_cations, self.probe_divalent_cations, self.probe_dntps, self.probe_annealing_oligo try: tm = primer3.calcTm(str(seq), mv_conc = mv, dv_conc = dv, dntp_conc = dntp, dna_conc = dna) tm = round(tm, 2) except: tm = "NaN" return tm
def __init__(self, sequence): self.sequence = sequence try: # Calculates melting temperature of the primer. Primer3 is quite fast but BioPython provides more constumization. #self.Tm = mt.Tm_NN( Seq( self.sequence ), nn_table=mt.DNA_NN4, Na=50, Mg=2.0, dNTPs=0.2 ) self.Tm = primer3.calcTm(self.sequence, mv_conc=50, dv_conc=2.0, dntp_conc=0.2) except IndexError: print(self.sequence) exit(69)
def SMARTplex(right): seq = right.seq ref = right.alignments[0].aln_ref for i in range(5, len(seq)): RTprimer = settings.RLBseq + seq[-i:] lcs = LCSubStr(ref, RTprimer) thermo = calcTm(RTprimer[-lcs:], mv_conc=75, dv_conc=3, dntp_conc=0.5) if thermo > 40.0: break subseq = RTprimer[-lcs:] lensubseq = i lenmatch = lcs return RTprimer, thermo, subseq, lensubseq, lenmatch
def primer_generator(length, digestion_site, tests, end_CG=True): mly_primer_20 = list() i = 0 rc_digestion_site = str(Seq(digestion_site).reverse_complement()) bp = length - 5 - len(digestion_site) while i <= tests: i = i + 1 mly_primer = str(RandomDNA_without_site( bp, digestion_site)) + digestion_site + str( RandomDNA_without_site(5, digestion_site)) s = primer3.calcHairpin(mly_primer) if end_CG: if 53<primer3.calcTm(mly_primer)<55 and mly_primer.count(digestion_site) + mly_primer.count(rc_digestion_site) == 1\ and not s.structure_found and 50 <= gc_counter(mly_primer) <= 60 and end_3(mly_primer) \ and runs_counter(mly_primer) and repeat_counter(mly_primer): mly_primer_20.append(mly_primer) else: if 53<primer3.calcTm(mly_primer)<55 and mly_primer.count(digestion_site) + mly_primer.count(rc_digestion_site) == 1\ and not s.structure_found and 50 <= gc_counter(mly_primer) <= 60 \ and runs_counter(mly_primer) and repeat_counter(mly_primer): mly_primer_20.append(mly_primer) return (list(tuple(mly_primer_20)))
def __init__(self,name,seq,targetposition=None,tag=None,loci=[],location=None): self.rank = -1 self.name = name self.seq = str(seq.upper()) self.tag = tag self.tm = primer3.calcTm(self.seq) self.gc = (self.seq.count('G') + self.seq.count('C')) / float(len(self.seq)) self.loci = [] # genome matches self.snp = [] # same order as loci attribute self.meta = {} # metadata self.targetposition = targetposition self.location = location # storage location if loci: pass
def __init__(self, direction, name, seq): self.direction = direction self.name = name self.seq = seq self.tm = calcTm(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6) self.homodimer = calcHomodimer(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6).tm self.hairpin = calcHairpin(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6).tm self.gc = 100.0 * (seq.count('G') + seq.count('C')) / len(seq)
def tm(sequence): sequence = str(sequence).strip().lower().replace(' ', '').replace('u', 't') import primer3 #from primer3 import calcTm tm = int(primer3.calcTm(sequence)) if float(tm) < 0: print('请检测序列是否输入正确') return 0 #print ('Tm is',tm,'℃') #返回 str return int(tm)
def gen_training_set(n_seq: int, seq_len_min: int, seq_len_max: int) -> List[tuple]: """ Create a set of training data, of size n_seq, and of sequnces between seq_len_min and seq_len_max in length. Using primer3 to calculate training Tm values. """ train_list = [] for i in range(n_seq): train_seq = random_dna_sequence( seq_length=random.randrange(seq_len_min, seq_len_max)) train_tm = primer3.calcTm(train_seq) train_list.append((train_seq, train_tm)) return train_list
def primerMatch(self,locus,seq,ampsize): # get sequence with flank chromStart = locus.offset-ampsize[1] if locus.reverse else locus.offset+locus.length+ampsize[0] chromEnd = locus.offset-ampsize[0] if locus.reverse else locus.offset+locus.length+ampsize[1] with pysam.FastaFile(self.file) as fasta: seqslice = fasta.fetch(locus.chrom,chromStart,chromEnd) # find sequence qrySeq = seq if locus.reverse else seq.translate(revcmp)[::-1] # create new loci loci = [] for i in [ match.start() for match in re.finditer(re.escape(qrySeq), seqslice) ]: tm = primer3.calcTm(qrySeq) loci.append(Locus(locus.chrom, chromStart+i, len(qrySeq), not locus.reverse, tm)) return loci
def compute_tm(self, sequence): """Return the melting temp of the sequence. If Primer3 is available, it's internal melting temperature calculator is used with ``self.primer3_params`` used as parameters. Else the heuristic AT/GC=2/4C is used. """ if self.params == {}: return sum([4 if c in "GC" else 2 for c in sequence]) if not PRIMER3_AVAILABLE: raise ImportError( "Melting temperature computation with '%s' " "Requires Primer3 installed." % self.primer3_params.get('method', "[unknown method]")) return primer3.calcTm(sequence, **self.primer3_params)
def __init__(self, name, seq, targetposition=None, tag=None, loci=[], location=None): self.rank = -1 self.name = name self.seq = str(seq.upper()) self.tag = tag self.tm = primer3.calcTm(self.seq) self.gc = (self.seq.count("G") + self.seq.count("C")) / float(len(self.seq)) self.loci = [] # genome matches self.snp = [] # same order as loci attribute self.meta = {} # metadata self.targetposition = targetposition # if isinstance(self.targetposition,str): # if self.targetposition.lower().startswith("chr"): # self.targetposition=self.targetposition[3:] self.location = location # storage location if loci: pass
def expanded_primer_stats(degen, oligo_DNA=50, dNTPs=0.2, salt_monovalent=50, salt_divalent=1.5): """ Calculating per-non-degen-primer stats (Tm) and averaging """ logging.info('Calculating stats on primer sets...') if degen is None: return None for num in degen.keys(): for cat in degen[num].keys(): for degen_seq in degen[num][cat].keys(): stats = {'Tm': [], 'GC': [], 'hairpin': [], 'homodimer': []} # stats on each expanded primer for seq in list(degen[num][cat][degen_seq]['expanded']): # degeneracies # melting temp stats['Tm'].append( primer3.calcTm(seq, dna_conc=oligo_DNA, dntp_conc=dNTPs, mv_conc=salt_monovalent, dv_conc=salt_divalent)) # GC stats['GC'].append(calc_GC(seq)) # hairpin stats['hairpin'].append( primer3.calcHairpin(seq, dna_conc=oligo_DNA, dntp_conc=dNTPs, mv_conc=salt_monovalent, dv_conc=salt_divalent).tm) # homodimer stats['homodimer'].append( primer3.calcHomodimer(seq, dna_conc=oligo_DNA, dntp_conc=dNTPs, mv_conc=salt_monovalent, dv_conc=salt_divalent).tm) # summarizing stats (average & std) for k, v in stats.items(): degen[num][cat][degen_seq][k] = [avg(v), sd(v)] return degen
def primerMatch(self, locus, seq, ampsize): # get sequence with flank chromStart = ( locus.offset - ampsize[1] if locus.reverse else locus.offset + locus.length + ampsize[0] ) chromEnd = ( locus.offset - ampsize[0] if locus.reverse else locus.offset + locus.length + ampsize[1] ) with pysam.FastaFile(self.file) as fasta: seqslice = fasta.fetch(locus.chrom, chromStart, chromEnd) # find sequence qrySeq = seq if locus.reverse else seq.translate(revcmp)[::-1] # create new loci loci = [] for i in [match.start() for match in re.finditer(re.escape(qrySeq), seqslice)]: tm = primer3.calcTm(qrySeq) loci.append(Locus(locus.chrom, chromStart + i, len(qrySeq), not locus.reverse, tm)) return loci
def make_primers(n=200000): """Create a bunch of random sequences between 13 and 20 bp.""" bp = "ATGC" seq_to_tm = [] # map from sequence to estimated tm for _ in range(n): primer_len = random.randint(10, 30) primer_seq = "".join([bp[random.randint(0, 3)] for _ in range(primer_len)]) primer_tm = primer3.calcTm(primer_seq) primer_hairpin = primer3.calcHairpin(primer_seq).dg seq_to_tm.append((primer_seq, primer_tm, primer_hairpin)) with open("primers.csv", "w") as output: output.write("seq,tm,hairpin\n") for seq, tm, hairpin in seq_to_tm: output.write(f"{seq},{tm},{hairpin}\n")
def GetPrimers(aut, len_primer, GC, Tm): dict_nucl = 'AGCT' for el in itertools.product(dict_nucl, repeat=len_primer): primer = ''.join(el) primer_Tm = calcTm(primer) gc_count = (el.count('G') + el.count('C')) gc_count = gc_count / len_primer * 100 if (primer_Tm >= Tm[0] and primer_Tm <= Tm[1]) and (gc_count >= GC[0] and gc_count <= GC[1]): aut.add_word(primer, [primer, len_primer, str(gc_count), str(primer_Tm)]) return aut
def primer_bindings(primers: list, template: str, min_bases=10) -> pd.DataFrame: """ Generate a primer binding dataframe from a list of primers :param primers: list of loaded primers (with sequence in the 'seq' attribute) :type primers: list :param template: template sequence :type template: basestring :return: data frame of primer binding sites :rtype: pandas.DataFrame """ bindings = find_initial_bindings(primers, template, min_bases) rows = [] for binding in bindings: t = template if binding.direction == -1: t = reverse_complement(t) matches = _extend_match(binding.matchseq, binding.primerseq, t) for match in matches: if binding.direction == 1: abs_start = match.start abs_end = match.end else: abs_start = len(template) - match.start abs_end = len(template) - match.end row = OrderedDict() row['name'] = binding.primer.name row['sequence'] = binding.primerseq row['direction'] = binding.direction row['overhang'] = match.overhang row['annealing'] = match.anneal row['start'] = match.start row['end'] = match.end row['abs_start'] = abs_start row['abs_end'] = abs_end row['Tm'] = round(primer3.calcTm(match.anneal[-60:].upper(), dv_conc=15), 2) row['match'] = binding.matchseq rows.append(row) return rows
def __init__(self, oligomer, start=None, max_runs=4): base_runs_format = re.compile("\w*(" + "".join(["A{", str(max_runs), "}|"]) + "".join(["T{", str(max_runs), "}|"]) + "".join(["G{", str(max_runs), "}|"]) + "".join(["C{", str(max_runs), "}"]) + ")\w*") self.seq = oligomer self.Tm = calcTm(oligomer) self.hairpin = Hairpin(oligomer) self.homodimer = Homodimer(oligomer) self.GC = SeqUtils.GC(oligomer) self.base_run = (re.match(base_runs_format, oligomer) is not None) self.start = start self._rating = None
def primer3_cal(sequence, mintm=37, maxhtm=37, dtm=10): primer3ft = True tm = primer3.calcTm(sequence) htm = primer3.calcHairpinTm(sequence) if tm < mintm: primer3ft = False if htm > maxhtm: primer3ft = False if (tm-htm) > dtm: primer3ft = False return (sequence, primer3ft)
def is_good_primer(primer): # ref1. http://www.premierbiosoft.com/tech_notes/PCR_Primer_Design.html seq = ''.join([i[1] for i in primer]) if re.search(poly, seq) is not None: return False, 0, 'Poly(NNNNN) structure found' if re.search(tandem, seq) is not None: return False, 0, 'Tandom(NN*5) exist' # no more 3 ambiguous base if len(re.findall(ambiguous_base, seq)) >= ambiguous_base_n: return False, 0, 'More than 3 ambiguous base' # primer3.setGlobals seems have no effect on calcTm, so I have to replace all # ambiguous base to A to get an approximate value. Othervise calcTm() will # generate -99999 if there is ambiguous base. pure_seq = re.sub(ambiguous_base, 'A', seq) tm = primer3.calcTm(pure_seq) hairpin_tm = primer3.calcHairpinTm(pure_seq) homodimer_tm = primer3.calcHomodimerTm(pure_seq) if max(tm, hairpin_tm, homodimer_tm) != tm: return False, 0, 'Hairpin or homodimer found' return True, tm, 'Ok'
def primer3_filter(sequence, mintm=37, maxhtm=35, dtm=10): primer3ft = False tm = primer3.calcTm(sequence) htm = primer3.calcHairpinTm(sequence) if tm < mintm: primer3ft = True if htm > maxhtm: primer3ft = True if (tm-htm) < dtm: primer3ft = True # print(sequence, tm, htm, dtm) return primer3ft
def primer3_filter_withRprimer(sequence, rprimer, mintm=37, maxhtm=35, dtm=10): primer3ft = False tm = primer3.calcTm(sequence) fseq = rprimer + sequence htmF = primer3.calcHairpinTm(fseq) rseq = rprimer + revcom(sequence) htmR = primer3.calcHairpinTm(rseq) if tm < mintm: primer3ft = True if htmF > maxhtm: primer3ft = True if (tm-htmF) < dtm: primer3ft = True if htmR > maxhtm: primer3ft = True if (tm-htmR) < dtm: primer3ft = True # print(sequence, tm, htm, dtm) return primer3ft
def query(self, query): '''returns suitable primer pairs for the specified interval''' try: self.db = sqlite3.connect(self.sqlite) except: raise else: cursor = self.db.cursor() datematch = re.compile("([0-9\s-]+)$") if datematch.match(str(query)): # query date subSearchName = '%'+query+'%' cursor.execute('''SELECT DISTINCT p.pairid, l.tag, r.tag, l.seq, r.seq, p.left, p.right, p.chrom, p.start, p.end, l.vessel, l.well, r.vessel, r.well, 0 FROM pairs AS p LEFT JOIN primer as l ON p.left = l.name LEFT JOIN primer as r ON p.right = r.name where p.dateadded LIKE ? ORDER BY p.pairid;''', \ (subSearchName,)) elif type(query) in [str,unicode]: # use primerpair name subSearchName = '%'+query+'%' cursor.execute('''SELECT DISTINCT p.pairid, l.tag, r.tag, l.seq, r.seq, p.left, p.right, p.chrom, p.start, p.end, l.vessel, l.well, r.vessel, r.well, 0 FROM pairs AS p LEFT JOIN primer as l ON p.left = l.name LEFT JOIN primer as r ON p.right = r.name WHERE p.pairid LIKE ? ORDER BY p.pairid;''', \ (subSearchName,)) else: # is interval cursor.execute('''SELECT DISTINCT p.pairid, l.tag, r.tag, l.seq, r.seq, p.left, p.right, p.chrom, p.start, p.end, l.vessel, l.well, r.vessel, r.well, abs(p.start+((p.end-p.start)/2) - ?) as midpointdistance FROM pairs AS p LEFT JOIN primer as l ON p.left = l.name LEFT JOIN primer as r ON p.right = r.name WHERE p.chrom = ? AND p.start + length(l.seq) <= ? AND p.end - length(r.seq) >= ? ORDER BY midpointdistance;''', \ (int(query.chromStart+int(query.chromEnd-query.chromStart)/2.0), query.chrom, query.chromStart, query.chromEnd)) rows = cursor.fetchall() finally: self.db.close() # return primer pairs that would match primerPairs = [] for row in rows: # build targets leftTargetposition = Locus(row[7], row[8], len(row[3]), False, primer3.calcTm(str(row[3]))) rightTargetposition = Locus(row[7], row[9]-len(row[4]), len(row[4]), True, primer3.calcTm(str(row[4]))) # build storage locations (if available) leftLocation = Location(*row[10:12]) if all(row[10:12]) else None rightLocation = Location(*row[12:14]) if all(row[12:14]) else None # Build primers leftPrimer = Primer(row[5], row[3], targetposition=leftTargetposition, tag=row[1], location=leftLocation) rightPrimer = Primer(row[6], row[4], targetposition=rightTargetposition, tag=row[2], location=rightLocation) # get reverse status (from name) orientations = [ x[1] for x in map(parsePrimerName,row[5:7]) ] if not any(orientations) or len(set(orientations))==1: print >> sys.stderr, '\rWARNING: {} orientation is ambiguous ({},{}){}\r'.format(row[0],\ '???' if orientations[0]==0 else 'rev' if orientations[0]<0 else 'fwd', \ '???' if orientations[0]==0 else 'rev' if orientations[1]<0 else 'fwd'," "*20) reverse = False elif orientations[0]>0 or orientations[1]<0: reverse = False elif orientations[1]>0 or orientations[0]<0: reverse = True else: raise Exception('PrimerPairStrandError') # Build pair primerPairs.append(PrimerPair([leftPrimer, rightPrimer],name=row[0],reverse=reverse)) return primerPairs # ordered by midpoint distance
res = primer3.designPrimers(seq_arg, global_arg) res = primer3.designPrimers(seq_arg, global_arg) import pprint # pprint.pprint(res) # print res['PRIMER_LEFT_0_SEQUENCE'] fp = ftail53_s + Seq(res.get('PRIMER_LEFT_0_SEQUENCE'), IUPAC.unambiguous_dna) rp = rtail53 + Seq(res.get('PRIMER_RIGHT_0_SEQUENCE'), IUPAC.unambiguous_dna) print ftail53_s, rtail53 print fp, rp print primer3.calcTm(str(fp)), primer3.calcTm(str(rp)) # output = open('results/primers.txt', 'w') # for k, v in ... # output.write(h + '\n') # output.close() seq_arg_batch = { 'SEQUENCE_ID': 'test_leg', 'SEQUENCE_TEMPLATE': test_seq, # 'SEQUENCE_INCLUDED_REGION': [3, len(test_seq)], # 'SEQUENCE_PRIMER': str(fp), # 'SEQUENCE_PRIMER_REVCOMP': str(rp), 'SEQUENCE_FORCE_LEFT_START': 3, 'SEQUENCE_FORCE_RIGHT_START': len(test_seq)-1, }
def screenPadlockArms( p_l_seq: str, p_r_seq: str, loop_seq: str, p_params: dict, do_print: bool = False) -> Tuple[bool, dict]: is_good = True tp = p_params['thermo_params'] report = { 'arm_gc_min_l': 0, 'arm_gc_max_l': 0, 'arm_gc_min_r': 0, 'arm_gc_max_r': 0, 'l_clamp': True, 'tm_arm_min_l': 0, 'tm_arm_min_r': 0, 'ex_seq': [], 'tm_hairpin_l': 0, 'tm_hairpin_r': 0, 'tm_hetero_0': 0, 'tm_hetero_1': 0, 'tm_hetero_2': 0 } "1. GC content checks" p_l_gc_content = gcContent(p_l_seq) p_r_gc_content = gcContent(p_r_seq) if p_l_gc_content < p_params['arm_gc_min']: if do_print: print("\tgc content L min fail %0.3f" % p_l_gc_content) is_good = False report['arm_gc_min_l'] = p_l_gc_content if p_r_gc_content < p_params['arm_gc_min']: if do_print: print("\tgc content R min fail %0.3f" % p_r_gc_content) is_good = False report['arm_gc_min_r'] = p_r_gc_content if p_l_gc_content > p_params['arm_gc_max']: if do_print: print("\tgc content L max fail %0.3f" % p_l_gc_content) is_good = False report['arm_gc_max_l'] = p_l_gc_content if p_r_gc_content > p_params['arm_gc_max']: if do_print: print("\tgc content R max fail %0.3f" % p_r_gc_content) is_good = False report['arm_gc_max_r'] = p_r_gc_content "2. GC clamp checks" l_3p_check = padlockLeftArmGCClamp(p_l_seq) if l_3p_check > 3: if do_print: print("\tl clamp fail") is_good = False report['l_clamp'] = False "3. Arm Tm check" p_arm_tm_l = calcTm(p_l_seq, **tp) p_arm_tm_r = calcTm(p_r_seq, **tp) if p_arm_tm_l < p_params['arm_tm_min']: if do_print: print("\tArm L fail %2.3f" % p_arm_tm_l) is_good = False report['tm_arm_min_l'] = p_arm_tm_l if p_arm_tm_r < p_params['arm_tm_min']: if do_print: print("\tArm R fail %2.3f" % p_arm_tm_r) is_good = False report['tm_arm_min_r'] = p_arm_tm_r p_seq = ( p_r_seq + loop_seq + p_l_seq ) "4. Check for excluded seqs" ex_fail = False for ex_seq in p_params['exclude_seqs']: if ex_seq in p_seq: ex_fail = True report['ex_seq'].append(ex_seq) break if ex_fail: is_good = False "5. Secondary structure / primer dimer checks" p_het_tm_0 = calcHeterodimerTm(p_l_seq, p_r_seq, **tp) p_het_tm_1 = calcHeterodimerTm(p_l_seq, loop_seq, **tp) p_het_tm_2 = calcHeterodimerTm(p_r_seq, loop_seq, **tp) if p_het_tm_0 > p_params['structure_tm_max']: if do_print: print("\thetero 0 fail") is_good = False report['tm_hetero_0'] = p_het_tm_0 if p_het_tm_1 > p_params['structure_tm_max']: if do_print: print("\thetero 1 fail") is_good = False report['tm_hetero_1'] = p_het_tm_1 if p_het_tm_2 > p_params['structure_tm_max']: if do_print: print("\thetero 2 fail") is_good = False report['tm_hetero_2'] = p_het_tm_2 return is_good, report
def calcProperties(self): # get Tm via primer3 self.tm = primer3.calcTm(self.seq) # calc GC self.gc = (self.seq.count('G') + self.seq.count('C')) / float(len(self.seq)) return
def getTM(self): ## This function returns the melting temp of the sequence object seq = self.seq return primer3.calcTm(seq)