def evalPrimerPairMT(fprimer, rprimer, ret_mt=False): """This will check the melting temperature The optimal melting temperature of the primers is 60–64°C, with an ideal temperature of 62°C, which is based on typical cycling and reaction conditions and the optimum temperature for PCR enzyme function. Ideally, the melting temperatures of the 2 primers should not differ by more than 2°C in order for both primers to bind simultaneously and efficiently amplify the product. PCR parameters used are from IDT: Oligo 0.2 uM Na 50 mM, Mg 3 mM, dNTPs 0.8 mM :param ret_mt: """ fprimer_MT = MeltingTemp.Tm_GC(fprimer, Na=50, Mg=3, dNTPs=0.8) rprimer_MT = MeltingTemp.Tm_GC(rprimer, Na=50, Mg=3, dNTPs=0.8) fprimer_MT_NN = MeltingTemp.Tm_NN(fprimer, Na=50, Mg=3, dNTPs=0.8) rprimer_MT_NN = MeltingTemp.Tm_NN(fprimer, Na=50, Mg=3, dNTPs=0.8) print( f"forw primer: {fprimer}\nforw primer MT: {fprimer_MT} {fprimer_MT_NN} \n" f"rev primer: {rprimer}\nrev primer MT : {rprimer_MT} {rprimer_MT_NN} \n" ) """Filters for primers that meet the MT standards""" if math.fabs(fprimer_MT - rprimer_MT) <= 3 and\ max(fprimer_MT,rprimer_MT) <= 64 and\ min(fprimer_MT, rprimer_MT) >= 60: print("MT of primer pair passed.\n") if ret_mt == False: return True else: return fprimer_MT, rprimer_MT else: print("MT for the primer pairs did not meet standards\n") return False
def Tm_feature(data, pam_audit=True, learn_options=None): if learn_options is None or "Tm segments" not in learn_options: segments = [(19, 24), (11, 19), (6, 11), (4, 24)] else: segments = learn_options["Tm segments"] sequence = data["30mer"].values featarray = np.ones((sequence.shape[0], 5)) rna = True for i, seq in enumerate(sequence): if pam_audit and seq[25:27] != "GG": continue raise Exception("excepted GG but found %s" % seq[25:27]) featarray[i, 0] = Tm.Tm_staluc(seq, rna=rna) #30mer featarray[i, 1] = Tm.Tm_staluc( seq[segments[0][0]:segments[0][1]], rna=rna) #5nts immediately proximal of the NGG PAM featarray[i, 2] = Tm.Tm_staluc(seq[segments[1][0]:segments[1][1]], rna=rna) #8-mer featarray[i, 3] = Tm.Tm_staluc(seq[segments[2][0]:segments[2][1]], rna=rna) #5-mer featarray[i, 4] = Tm.Tm_staluc(seq[segments[3][0]:segments[3][1]], rna=rna) #20-spacer feat = pd.DataFrame(featarray, index=data.index, columns=[ "Tm global_30mer%s" % rna, "5mer_end_%s" % rna, "8mer_middle_%s" % rna, "5mer_start_%s" % rna, "Tm global_spacer_%s" % rna ]) return feat
def get_thermo(dict, guide_sequence, context_sequence): # Use Biopython to get thermo info. from context and guides dict['Tm, context'] = MeltingTemp.Tm_NN(context_sequence) dict['Tm, 5mer-15'] = MeltingTemp.Tm_NN(guide_sequence[-5:]) dict['Tm, 5mer-3'] = MeltingTemp.Tm_NN(guide_sequence[2:7]) dict['Tm, middle'] = MeltingTemp.Tm_NN(guide_sequence[7:-5]) return dict
def Tm_feature(data, feature_options=None): if feature_options is None or 'Tm segments' not in feature_options.keys(): segments = [(15, 21), (4, 13), (0, 4)] else: segments = feature_options['Tm segments'] sequence = data.values featarray = np.ones((sequence.shape[0], 4)) for i, seq in enumerate(sequence): rna = False featarray[i, 0] = Tm.Tm_staluc(seq, rna=rna) # 21mer Tm featarray[i, 1] = Tm.Tm_staluc( seq[segments[0][0]:segments[0][1]], rna=rna) # 5nts immediately proximal of the NGG PAM featarray[i, 2] = Tm.Tm_staluc(seq[segments[1][0]:segments[1][1]], rna=rna) # 8-mer featarray[i, 3] = Tm.Tm_staluc(seq[segments[2][0]:segments[2][1]], rna=rna) # 4-mer feat = pandas.DataFrame(featarray, index=data.index, columns=[ "Tm global_%s" % rna, "5mer_end_%s" % rna, "8mer_middle_%s" % rna, "4mer_start_%s" % rna ]) return feat
def Tm_feature(data): ''' assuming '30-mer'is a key get melting temperature features from: 0-the 30-mer ("global Tm") 1-the Tm (melting temperature) of the DNA:RNA hybrid from positions 16 - 20 of the sgRNA, i.e. the 5nts immediately proximal of the NGG PAM 2-the Tm of the DNA:RNA hybrid from position 8 - 15 (i.e. 8 nt) 3-the Tm of the DNA:RNA hybrid from position 3 - 7 (i.e. 5 nt) ''' sequence = data['30mer'].values featarray = np.ones((sequence.shape[0], 4)) for i, seq in enumerate(sequence): if seq[25:27] != "GG": raise Exception("expected GG but found %s" % seq[25:27]) rna = False featarray[i, 0] = Tm.Tm_staluc(seq, rna=rna) #30mer Tm featarray[i, 1] = Tm.Tm_staluc( seq[20:25], rna=rna) #5nts immediately proximal of the NGG PAM featarray[i, 2] = Tm.Tm_staluc(seq[12:20], rna=rna) #8-mer featarray[i, 3] = Tm.Tm_staluc(seq[7:12], rna=rna) #5-mer feat = pandas.DataFrame(featarray, index=data.index, columns=[ "Tm global_%s" % rna, "5mer_end_%s" % rna, "8mer_middle_%s" % rna, "5mer_start_%s" % rna ]) return feat
def nested(seq): shrt = 18 lng = 25 pr = '' maxa = 0 ok = 0 while ok != 1: if shrt == lng + 1: shrt = 18 seq = seq[1:] pr = seq[:shrt] if GC(pr) >= 60 or GC(pr) <= 40: shrt += 1 continue else: if GCend(pr) == 0: shrt += 1 else: if mt.Tm_Wallace(pr) > 60 or mt.Tm_Wallace(pr) < 56: shrt += 1 else: pr_revers = pr[::-1] complalig = SWalig(SWvlmtrx(pr, pr_revers), pr, pr_revers) if complalig[2].find("****") >= 0 or self_dimers( pr ) == 1 or complalig[2].find( "***-**") >= 0 or complalig[2].find("**-***") >= 0: shrt += 1 else: hpin = hairpin(pr) if hpin == 0: shrt += 1 else: return pr
def BedprobeTm(self, seq7): """Tm calculation function for use with .bed output.""" bedTmVal = float(('%0.2f' % mt.Tm_NN(seq7, Na=self.sal, dnac1=self.conc1, dnac2=self.conc2))) bed_fcorrected = ('%0.2f' % mt.chem_correction(bedTmVal, fmd=self.form)) return bed_fcorrected
def probeTm(seq1, saltConc, formConc): """Calculates the melting temperature of a given sequence under the specified salt and formamide conditions.""" tmval = float(('%0.2f' % mt.Tm_NN(seq1, Na=saltConc))) fcorrected = ('%0.2f' % mt.chem_correction(tmval, fmd=formConc)) return fcorrected
def get_primer(seq, direction, name): # Tm_NN: Calculation based on nearest neighbor thermodynamics. Several # tables for DNA/DNA, DNA/RNA and RNA/RNA hybridizations are included. # Correction for mismatches, dangling ends, salt concentration and other # additives are available. # Tm_staluc is the 'old' NN calculation and is kept for compatibility. # It is, however, recommended to use Tm_NN instead, since Tm_staluc may be # depreceated in the future. Also, Tm_NN has much more options. Using # Tm_staluc and Tm_NN with default parameters gives (essentially) the same results. global PRIMER_NUM global PRIMER_TM PRIMER_LENGTH = 15 # min primer lenght if direction == "fwd": while mt.Tm_staluc(seq[0:PRIMER_LENGTH]) <= PRIMER_TM and PRIMER_LENGTH <= 65: PRIMER_LENGTH += 1 primer_seq = seq[0:PRIMER_LENGTH] primer_tm = mt.Tm_staluc(primer_seq) elif direction == "rev": while mt.Tm_staluc(seq[-PRIMER_LENGTH:]) <= PRIMER_TM and PRIMER_LENGTH <= 65: PRIMER_LENGTH += 1 primer_seq = revcomplement(seq[-PRIMER_LENGTH:]).lower() primer_tm = mt.Tm_staluc(primer_seq) primer_seq = str(primer_seq) primer_name = "{}_{}_{}".format(PRIMER_NUM, name, direction) primer = list([primer_name, primer_seq, primer_tm, PRIMER_LENGTH]) PRIMER_NUM += 1 return primer
def grow_overlap(startpoint, seq): """ Returns the sequence divided in three parts: 5', overlap and 3'. Overlap grows from the middle outwards, regardless of the oligo length limit, that is checked afterwards, and if it's not good enough codons get swapped randomly until the overlap has the proper length and Tm. The returned stings are the three parts of sequence: five prime unique section, the overlap that will be later added to both, and the three-prime unique section. """ #Minimum length of the overlap must be len = Tm/4, which assumes that it's %100 GC: min_len = minmelt/4 overlap = seq[startpoint - min_len/2 : startpoint] + seq[startpoint : startpoint + min_len/2 ] counter = 0 firsthalf = seq[:(startpoint - min_len/2 - counter)] tm = MeltingTemp.Tm_NN(overlap, Na=50, K=0, Tris=0, Mg=args.MgmM, dNTPs=args.dNTPsmM ) gc = GC(overlap) while (min_len/2 + counter < args.maxoverlaplen) and ( (tm < minmelt) or ( (gc < 40) or (gc > 60) ) ): # GC% must be between 40 and 60, Tm should be above mininimum, and the overlap should not be longer than 30bps counter += 1 overlap = seq[startpoint - min_len/2 - counter] + overlap + seq[startpoint + min_len/2 + counter - 1] tm = MeltingTemp.Tm_NN(overlap, Na=50, K=0, Tris=0, Mg=args.MgmM, dNTPs=args.dNTPsmM ) gc = GC(overlap) #print(counter) firsthalf = seq[:(startpoint - min_len/2 - counter)] secondhalf = seq[(startpoint + min_len/2 + counter):len(seq)] #print len(firsthalf + overlap + secondhalf), len(seq) #print(firsthalf) #print(overlap) #print(secondhalf) assert len(firsthalf + overlap + secondhalf) == len(seq) assert firsthalf + overlap + secondhalf == seq #assert str(Seq(seq, unambiguous_dna).translate()) == protein_seqs[design] # this is done later return firsthalf, overlap, secondhalf
def get_thermo(dict, guide_sequence, context_sequence): # Use Biopython to get thermo info. from context and guides dict['Tm, context'] = MeltingTemp.Tm_NN(context_sequence) third = len(guide_sequence)//3 dict['Tm, start'] = MeltingTemp.Tm_NN(guide_sequence[0:third]) dict['Tm, mid'] = MeltingTemp.Tm_NN(guide_sequence[third:2*third]) dict['Tm, end'] = MeltingTemp.Tm_NN(guide_sequence[2*third:]) return dict
def __probeTm(self): """ Calculates the melting temperature of a given sequence under the specified salt and formamide conditions. """ tmval = float(mt.Tm_NN(self.seq, Na=self.sal)) Tm = ('%0.2f' % mt.chem_correction(tmval, fmd=int(self.form))) return Tm
def possible_reverse_seq(seq_d, n, seq_length, mt_min, mt_max, na, tris, mg, dntps, saltcor): a = seq_d[n:n + seq_length] if (a[0] == 'C' or a[0] == 'G') and len(a) == seq_length: if mt_min < mt.Tm_NN( a, Na=na, Tris=tris, Mg=mg, dNTPs=dntps, saltcorr=saltcor) and mt.Tm_NN( a, Na=na, Tris=tris, Mg=mg, dNTPs=dntps, saltcorr=saltcor) < mt_max: return str(a)
def seqProbes(mb_seq, mb_size, mb_sscount, probe): result = list(itersplit_into_x_chunks(mb_seq, mb_size, probe)) basesl = [] for i in result: i = reverse_complement(i) basesl.append(i) basesp = [] for i in result: i = parallel_probe(i) basesp.append(i) Tml = [] for i in basesl: Tmx = mt.Tm_NN(i, dnac1=50000, dnac2=50000, Na=100, nn_table=mt.RNA_NN1, saltcorr=1) Tml.append(int(Tmx)) result_basesa = list(itersplit_into_x_chunks( mb_bases, mb_size, probe)) #list of lists of each base for each probe #base number as j and list of these numbers as jl, list of percent of Gs and Cs as perl Tmp = [] for i in basesp: Tmx = mt.Tm_NN(i, dnac1=50000, dnac2=50000, Na=100, nn_table=mt.RNA_NN1, saltcorr=1) Tmp.append(int(Tmx)) result_basesp = list(itersplit_into_x_chunks(mb_bases, mb_size, probe)) j = 0 perl = [] jl = [] for i in result_basesa: j += 1 aas = i.count('A') gs = i.count('G') per = int((aas + gs) / probe * 100) perl.append(per) jl.append(j) size2 = len(mb_sscount) result2 = list(itersplit_into_x_chunks(mb_sscount, size2, probe)) sumsl = [] for i in result2: i = list(map(int, i)) sums = sum(i) / (probe * mb_so) sumsl.append(sums) return (jl, perl, sumsl, basesl, Tml, Tmp, basesp ) #put together all data as indicated in header
def Temper(sequence): seq=sequence seq_7=seq[:7] seq_8=seq[7:15] seq_5=seq[15:20] TDic={} TDic['T20']=Tm.Tm_staluc(seq) TDic['T7']=Tm.Tm_staluc(seq_7) TDic['T8']=Tm.Tm_staluc(seq_8) TDic['T5']=Tm.Tm_staluc(seq_5) return TDic
def find_left_primer(self, seq, optimal_tm=54): seqO = Seq(seq) seqs = [] for _len in range(10, 60): if _len <= len(seq): seqO = Seq(seq[:_len]) seqs.append([seq[:_len], abs(optimal_tm - mt.Tm_NN(seqO))]) else: seqO = Seq(seq) seqs.append([seq, abs(optimal_tm - mt.Tm_NN(seqO))]) break seqs = sorted(seqs, key=lambda x: x[1]) best = seqs[0] return best[0]
def featurize(data, exp_nm, seq_col): X_all = [] start_pos, end_pos = -9, 21 # go up to N in NGG for idx, row in data.iterrows(): x_input = row[seq_col] # zero_idx = _data.pos_to_idx(0, exp_nm) zero_idx = _data.pos_to_idx_safe(0, exp_nm, row['Name (unique)']) seq = x_input[zero_idx + start_pos : zero_idx + end_pos + 1] assert len(seq) == 31 curr_x = [] # One hot encoding curr_x += one_hot_encode(seq) # Dinucleotides curr_x += dinucleotide_encode(seq) # Sum nucleotides features = [ seq.count('A'), seq.count('C'), seq.count('G'), seq.count('T'), seq.count('G') + seq.count('C'), ] curr_x += features # Melting temp from Bio.SeqUtils import MeltingTemp as mt features = [ mt.Tm_NN(seq), mt.Tm_NN(seq[-5:]), mt.Tm_NN(seq[-13:-5]), mt.Tm_NN(seq[-21:-13]), ] curr_x += features # Store X_all.append(np.array(curr_x)) ohe_nms = get_one_hot_encoder_nms(start_pos, end_pos) dint_nms = get_dinucleotide_nms(start_pos, end_pos) sum_nms = ['Num. A', 'Num. C', 'Num. G', 'Num. T', 'Num. GC'] mt_nms = ['Tm full', 'Tm -5', 'Tm -13 to -5', 'Tm -21 to -13'] param_nms = ['x_%s' % (ft_nm) for ft_nm in ohe_nms + dint_nms + sum_nms + mt_nms] return (np.array(X_all), param_nms)
def select_primers(circRNA_seq): for i in range(7, 14): print(i) five_end = circRNA_seq[0:i] three_end = circRNA_seq[(i - 20):] forward_primer = Seq(circRNA_seq[(i - 150):(i - 150) + 20]) Rev_Comp = Seq(three_end + five_end) Rev_Primer = Rev_Comp.reverse_complement() print(forward_primer, mt.Tm_Wallace(forward_primer)) print(Rev_Primer, mt.Tm_Wallace(Rev_Primer)) print(Rev_Comp) difference = mt.Tm_Wallace(forward_primer) - mt.Tm_Wallace(Rev_Primer) print(abs(difference))
def probeTmOpt_var(self, seq1): """ :param self: :param seq1: :return: """ vargibbs_res = str(self.inputFile).split('.')[0] vargibbs_run = [ self.vargibbs, f'-o={vargibbs_res}', f'-par={self.par}', '-calc=prediction', '-v=0', '-seqsalt={}'.format(1000), '\"-seq=r({})\"'.format(seq1.replace('T', 'U')), '-cseq={}'.format(complement(seq1)), f'-ct={self.ct}', f'-targetsalt={self.sal}', f'-saltscheme={self.saltscheme}' ] run_func = ' '.join(vargibbs_run) proc = subprocess.Popen(run_func, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) result, err = proc.communicate() try: tm_val = float( open(vargibbs_res + '.ver').read().split('\n')[1].split(' ')[1]) except: LOG.warning(err) return False tm_val = mt.chem_correction(tm_val, fmd=self.form) return tm_val
def repeat_finder(seq): '''Finds largest repeat in a sequence and returns the position of the repeat.''' string = seq.upper() l = list(string) d = collections.deque(string[1:]) match = [] longest_match = [] while d: for i, item in enumerate(d): if l[i] == item: match.append(item) else: if len(longest_match) < len(match): longest_match = match match = [] d.popleft() repeat_sequence = ''.join(longest_match) if len(repeat_sequence) > 20: location = string.find(repeat_sequence) length = len(repeat_sequence) logger.debug("Found long {}bp repeat at location {}".format( length, location)) return location, length if len(repeat_sequence) < 20 and int(mt.Tm_Wallace( Seq(repeat_sequence))) > 55: location = string.find(repeat_sequence) length = len(repeat_sequence) logger.debug("Found long {}bp repeat at location {}".format( length, location)) return location, length else: return False
def main(): for seq in seqarr: s = Seq(seq) res = mt.Tm_NN(s, check, strict, c_seq, shift, nn_table, tmm_table, imm_table, de_table, dnac1, dnac2, selfcomp, Na, K, Tris, Mg, dNTPs, saltcorr) print('%0.2f' % res)
def tm_func(primers): """ Calculates the nearest neighbor melting temperature using the user specified pcr salt parameters. When there are multiple sequences due to ambiguous bases, an average Tm is returned. """ return np.mean( [MeltingTemp.Tm_NN(primer, **tm_params) for primer in primers])
def get_max_domain_melt(dna_structure, staple_indices, scaffold_rotation, scaffold_id, print_staples): # physical scaffold sequence scaffold_sequence = get_sequence(dna_structure.strands[scaffold_id]).replace('N', '') # physical scaffold length scaffold_length = len(scaffold_sequence) #print(staple_indices) #loop through strands staple_domain_melt = [] for strand in staple_indices: #cur_strand= [] cur_domain_melt = [] # loop through domain for domain in strand: # loop through bases in DOMAIN cur_domain = [] for baseindex in domain: # physical index in scaffold i_physical = (baseindex+scaffold_rotation)%scaffold_length #dna_structure.strands[scaffold_id].tour[i_physical+offset].seq cur_domain.append(scaffold_sequence[i_physical]) if len(cur_domain)>1: # compute melting temperature of domain; reverse sequence of cur_domain, since it is on the scaffold and the indices follow staples cur_domain_melt.append(MeltingTemp.Tm_NN(Seq(''.join(cur_domain[::-1])))) else: cur_domain_melt.append(0.) #domain_seq_on_scaffold = Seq(''.join(cur_domain[::-1]), generic_dna) #cur_strand.append(str(domain_seq_on_scaffold.reverse_complement())) staple_domain_melt.append(max(cur_domain_melt)) #if print_staples: # print(str(cur_strand)) return staple_domain_melt
def gene_feature(Y, X, learn_options): ''' Things like the sequence of the gene, the DNA Tm of the gene, etc. ''' gene_names = Y['Target gene'] gene_length = np.zeros((gene_names.values.shape[0], 1)) gc_content = np.zeros((gene_names.shape[0], 1)) temperature = np.zeros((gene_names.shape[0], 1)) molecular_weight = np.zeros((gene_names.shape[0], 1)) for gene in gene_names.unique(): seq = util.get_gene_sequence(gene) gene_length[gene_names.values == gene] = len(seq) gc_content[gene_names.values == gene] = SeqUtil.GC(seq) temperature[gene_names.values == gene] = Tm.Tm_NN(seq, rna=False) molecular_weight[gene_names.values == gene] = SeqUtil.molecular_weight( seq, 'DNA') all = np.concatenate( (gene_length, gc_content, temperature, molecular_weight), axis=1) df = pandas.DataFrame(data=all, index=gene_names.index, columns=[ 'gene length', 'gene GC content', 'gene temperature', 'gene molecular weight' ]) return df
def main(argv): fout = open("./test.fa", 'w+') fafile = '/Users/yeweijian/Downloads/data/hg19.fa' bedfile = '/Users/yeweijian/Downloads/data/test.bed' parser = argparse.ArgumentParser(description='python Rundesign.py ') parser.add_argument('--FA', type=str, default=fafile, help='the reference fasta file') parser.add_argument('--BED', type=str, default=bedfile, help='the target region file') args = parser.parse_args() fafile = args.FA bedfile = args.BED file_exists(fafile) file_exists(bedfile) #读取fasta文件 fh = pysam.Fastafile(fafile) #sal = 390 #The mM Na+ concentration to be used for Tm #form = 50 #The percent formamide to be used for Tm #读取区间文件,提取序列信息 for line in open(bedfile): chrom, start, end = line.rstrip().split('\t') start = int(start) end = int(end) regionsize = int(end) - int(start) #区间太小,直接取区间序列 if regionsize <= 120: seq = Seq(fh.fetch(reference=chrom, start=start, end=end), IUPAC.unambiguous_dna) faout(seq, chrom, start, end) #Tm = probeTm(seq, sal, form) #print("%0.2f" % mt.Tm_NN(seq)) print( '>{}:{}-{} Repeat:{:.3f} GC:{:.3f} Nrate:{:.3f} Tm:{}'.format( chrom, start, end, repeatstat(seq), GC(seq), nstat(seq), mt.Tm_NN(seq)), file=fout) print(seq, file=fout) else: for p1 in range(start, end): p2 = start + 119 if p2 >= end: break else: seq = Seq(fh.fetch(reference=chrom, start=p1, end=p2), IUPAC.unambiguous_dna) faout(seq, chrom, p1, p2)
def gene_feature(Y): """ Things like the sequence of the gene, the DNA Tm of the gene, etc. """ gene_names = Y["Target gene"] gene_length = np.zeros((gene_names.values.shape[0], 1)) gc_content = np.zeros((gene_names.shape[0], 1)) temperature = np.zeros((gene_names.shape[0], 1)) molecular_weight = np.zeros((gene_names.shape[0], 1)) for gene in gene_names.unique(): seq = util.get_gene_sequence(gene) gene_length[gene_names.values == gene] = len(seq) gc_content[gene_names.values == gene] = SeqUtil.GC(seq) temperature[gene_names.values == gene] = Tm.Tm_staluc(seq, rna=False) molecular_weight[gene_names.values == gene] = SeqUtil.molecular_weight( seq, "DNA") everything = np.concatenate( (gene_length, gc_content, temperature, molecular_weight), axis=1) df = pd.DataFrame( data=everything, index=gene_names.index, columns=[ "gene length", "gene GC content", "gene temperature", "gene molecular weight", ], ) return df
def Tm_feature(data, pam_audit=True, learn_options=None): ''' assuming '30-mer'is a key get melting temperature features from: 0-the 30-mer ("global Tm") 1-the Tm (melting temperature) of the DNA:RNA hybrid from positions 16 - 20 of the sgRNA, i.e. the 5nts immediately proximal of the NGG PAM 2-the Tm of the DNA:RNA hybrid from position 8 - 15 (i.e. 8 nt) 3-the Tm of the DNA:RNA hybrid from position 3 - 7 (i.e. 5 nt) ''' if learn_options is None or 'Tm segments' not in list( learn_options.keys()): segments = [(19, 24), (11, 19), (6, 11)] else: segments = learn_options['Tm segments'] sequence = data['30mer'].values featarray = np.ones((sequence.shape[0], 4)) for i, seq in enumerate(sequence): if pam_audit and seq[25:27] != "GG": raise Exception("expected GG but found %s" % seq[25:27]) rna = False featarray[i, 0] = Tm.Tm_staluc(seq, rna=rna) #30mer Tm featarray[i, 1] = Tm.Tm_staluc( seq[segments[0][0]:segments[0][1]], rna=rna) #5nts immediately proximal of the NGG PAM featarray[i, 2] = Tm.Tm_staluc(seq[segments[1][0]:segments[1][1]], rna=rna) #8-mer featarray[i, 3] = Tm.Tm_staluc(seq[segments[2][0]:segments[2][1]], rna=rna) #5-mer #print "CRISPR" #for d in range(4): # print featarray[i,d] #import ipdb; ipdb.set_trace() feat = pandas.DataFrame(featarray, index=data.index, columns=[ "Tm global_%s" % rna, "5mer_end_%s" % rna, "8mer_middle_%s" % rna, "5mer_start_%s" % rna ]) return feat
def PenaltyMeltingTemperature(Primer: str): # -- Melting Temperature MTemp = mt.Tm_NN(Primer) Penalty = ValueInBounds(MTemp, 55, 3) return Penalty
def melting_tmp_T7(input_df): mtmp = zeros((len(input_df), 1), dtype=float) for l in range(len(input_df)): seq = input_df.iat[l] # Protospacer sequence (20mer) seq = seq[6:13] mtmp[l,0] = mt.Tm_staluc(seq) return mtmp
def _create_staple_max_melt_T(self) -> Dict[Strand, float]: """ max_melt_T is the staple domain with the highest metling temperature""" staple_domains_melt_t: Dict[Strand, List[float]] = dict() for staple in self.staples: domains = staple.domain_list for domain in domains: if "N" not in domain.sequence: # NOTE: using nearest neighbor for domain with length higher # than 14 using 'Wallace rule' else if len(domain.base_list) > 14: staple_domains_melt_t.setdefault(staple, []).append(MeltingTemp.Tm_NN( Seq(domain.sequence), Na=0, Mg=17.5)) else: staple_domains_melt_t.setdefault(staple, []).append(MeltingTemp.Tm_Wallace( Seq(domain.sequence))) max_staple_melt_t = {key: max(value) for ( key, value) in staple_domains_melt_t.items()} return max_staple_melt_t