def DSeqVH(fwd: str, rev: str = None, overhang: int = None, alphabet: int = AlphaEnum.DNA) -> VirtualHelix: '''Helper function for creating :class:`VirtualHelix` in the style of the :class:`DSeq` with strings ''' dseq: DSeq = DSeq(fwd, rev, overhang, alphabet) overhang: int = dseq.overhang if overhang > 0: fwd_idx_offsets = [overhang] rev_idx_offsets = [0] else: fwd_idx_offsets = [0] rev_idx_offsets = [overhang] oligo_fwd = Oligo(fwd) if rev is None: rev = reverseComplement(fwd) oligo_rev = Oligo(rev) return VirtualHelix([oligo_fwd.strand5p], fwd_idx_offsets, [oligo_rev.strand5p], rev_idx_offsets)
def find_tms(cls, sequences, sodium=0.05, magnesium=0.0, temperature=25, concentration=0.00000025, **kwargs): cls.load() def flatten(iterable, remove_none=False, add_equal=False): """Make a flat list out of a list of lists""" # Will remove None if remove_none: if add_equal: return [ '{}={}'.format(par, val) if (val != None) else par for (par, val) in iterable ] else: return [ item for sublist in iterable for item in sublist if item != None ] else: if add_equal: return [ '{}={}'.format(par, val) for (par, val) in iterable ] else: return [item for sublist in iterable for item in sublist] # RNAplex in 'probe mode' only calculates the reverse-complement Tm, and cannot # calculate the hairpin, homodimer, or heterodimer Tms. options = OrderedDict([ ('--paramFile', cls.parameters), ('--probe-mode', None), ('--probe-concentration', Oligo.float_to_str(concentration)), ('--na-concentration', Oligo.float_to_str(sodium)), ('--mg-concentration', Oligo.float_to_str(magnesium)), ('--tris-concentration', Oligo.float_to_str(0.0)), ('--k-concentration', Oligo.float_to_str(0.0)), ('--temp', temperature), ]) flat_options = flatten(options.items(), remove_none=True, add_equal=True) command_list = ['RNAplex'] + list(map(str, flat_options)) command_str = ' '.join(command_list) cls.logger.info('command: {!r}'.format(command_str)) cp = subprocess.run(command_list, input=bytes('\n'.join(sequences), 'utf-8'), shell=False, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) # The output typically looks like this: # Probe mode # Concentration K:0.000 TNP:0.000 Mg:0.000 Na:0.050 probe:0.000 # # sequence DDSL98 DDSL04 DRSU95 RRXI98 CURRENT # AGGCTTTAGGGCTATAGGAA 51.78 50.76 50.74 62.06 52.13 # CGAATTTAGAGCCTATAAT 43.60 42.90 39.52 48.07 43.46 # GGCTATGAGATAGCTAA 43.14 42.68 41.24 52.81 43.44 out_lines = cp.stdout.decode().splitlines() data_found = False tm_list = [] for line in out_lines: line = line.rstrip() if not data_found: m = regex.search( r'^\s+sequence\s+DDSL98\s+DDSL04\s+DRSU95\s+RRXI98\s+CURRENT', line) if m: data_found = True else: m = regex.match(r'^\s*(\S+)(?:\s+(\S+)){5}$', line) if m: seq = m.captures(1)[0] tm = float(m.captures(2)[-1]) # Use the 'CURRENT' column tm_list.append(tm) return tm_list
def breakStrand(self, dir_idx: int, strand: Strand, idx: int) -> Tuple[Oligo, Oligo, Oligo]: '''Break a Strand in two and create two new Oligos to assign the all of the strands in the pre-existing Oligo to Args: dir_idx: is this on the forward [0] or reverse [1] direction of the :class:`VirtualHelix`. Also use :enum:`VHDirEnum` to get these idxs strand: :class:`Strand` object to break idx: index to break the strand at in terms of it's sequence Returns: Two new :class:`Oligo` objects of form:: Oligo_5p, Oligo_3p ''' VHDirEnum.check(dir_idx) strand_array = self.strand_arrays[dir_idx] vh_strands: List[Strand] = strand_array.strands if strand not in vh_strands: dir_name: str = VHDirEnum(dir_idx).name err: str = "Strand {} not in the {} StrandArray of the VirtualHelix" raise ValueError(err.format(strand, dir_name)) idx_offsets: List[int] = strand_array.idx_offsets seq: str = strand.seq oligo_old: Oligo = strand.oligo # 1. Do the 5' portion of the break oligo_break5p: Oligo = Oligo(seq[0:idx]) strand_break5p: Strand = oligo_break5p.strand5p neighbor_5p: Strand = strand.strand5p if neighbor_5p is not None: # update existing neighbor oligos strand_break5p.strand5p = neighbor_5p neighbor_5p.strand3p = strand_break5p for seg in neighbor_5p.gen5p(): seg.oligo = oligo_break5p # 2. Do the 3' portion of the break oligo_break3p: Oligo = Oligo(seq[idx:]) strand_break3p: Strand = oligo_break3p.strand5p neighbor_3p: Strand = strand.strand3p if neighbor_3p is not None: # update existing neighbor oligos strand_break3p.strand3p = neighbor_3p neighbor_3p.strand5p = strand_break3p for seg in neighbor_3p.gen3p(): seg.oligo = oligo_break3p # 3. Update the strands list_idx: int = vh_strands.index(strand) offset_5p: int = idx_offsets[list_idx] list_idx_plus_1: int = list_idx + 1 vh_strands.insert(list_idx_plus_1, strand_break3p) vh_strands.insert(list_idx_plus_1, strand_break5p) idx_offsets.insert(list_idx_plus_1, offset_5p + len(strand_break5p)) vh_strands.pop(list_idx) # pop out the original strand return oligo_break5p, oligo_break3p
def str2Oligo(x: str) -> Tuple[bool, Oligo, Strand]: if isinstance(x, Strand): return False, x.oligo, x else: oligo = Oligo(x) return True, oligo, oligo.strand5p
else: fwd_idx_offsets = [0] rev_idx_offsets = [overhang] oligo_fwd = Oligo(fwd) if rev is None: rev = reverseComplement(fwd) oligo_rev = Oligo(rev) return VirtualHelix([oligo_fwd.strand5p], fwd_idx_offsets, [oligo_rev.strand5p], rev_idx_offsets) # end def if __name__ == '__main__': fwd = 'GGTCTCGAATTCAAA' oligo_fwd = Oligo(fwd) rev = 'TTTGAATTCGAGACC' oligo_rev = Oligo(rev) BsaI_vh = VirtualHelix([oligo_fwd.strand5p], [0], [oligo_rev.strand5p], [0]) print("1.\n%s" % BsaI_vh) BsaI_vh = DSeqVH(fwd, rev, 0) print("2.\n%s" % BsaI_vh) print(BsaI_vh.fwd_strands) BsaI_vh = DSeqVH(fwd) print("3.\n%s" % BsaI_vh) print("Da Oligos", BsaI_vh.oligos()) strand0 = BsaI_vh.fwd_strands[0] print(strand0.oligo) broken_oligos = BsaI_vh.breakStrand(dir_idx=0, strand=strand0, idx=4)
def nnn_unafold(cls, folder, seq1, seq2=None, sodium=0.05, magnesium=0.0, temperature=25, concentration=0.00000025): """ Calculate deltaG, deltaH, deltaS, Tm. Faster than running 'UNAFold.pl'. Accepts 1 or 2 input sequences. Automatically runs either: * Hairpin (1 input sequence: A=seq1, UNAFold run on A) * Homodimer (2 identical input sequences: A=seq1=seq2, UNAFold run on A & A) * Heterodimer (2 input sequences: A=seq1 B=seq2, UNAFold run on A & B) Writes '*.det' file to temp 'folder' Returns four lists: ([deltaG, ...], [deltaH, ...], [deltaS, ...], [Tm, ...]) """ # Create sequence files for input with open(os.path.join(folder, 'A.seq'), 'w') as flo: print(seq1, file=flo) if seq2: if (seq2 != seq1): with open(os.path.join(folder, 'B.seq'), 'w') as flo: print(seq2, file=flo) # 1 sequence # @command = ('hybrid-ss', @rules, @rules2, @rules3, '--tracebacks'=> $max); # Only used when --model=PG #hybrid-ss --NA DNA --tmin 25 --tmax 25 --sodium=0.05 --magnesium=0.0 --suffix DAT A.seq # <-- not actually tested yet #output_basename=None # add to function arguments #if output_basename: # outfile = os.path.join(folder, output_basename) #else: # outfile = os.devnull parameters_1 = [ '--NA=DNA', '--tmin=' + str(temperature), '--tmax=' + str(temperature), '--tinc=1', '--sodium=' + Oligo.float_to_str(sodium), '--magnesium=' + Oligo.float_to_str(magnesium), '--maxloop=' + str(30), '--mfold=' + ','.join(map(str, [5, -1, 100])), ] if (seq2 == None): prefix = 'A' # 1 sequence # Used by default, or when --model=EM # Creates files: A.ann, A.ct, A.dG, A.plot, A.run # Command: hybrid-ss-min --NA=DNA --tmin=25 --tmax=25 --tinc=1 --sodium=0.05 --magnesium=0.0 --maxloop=30 --mfold=5,-1,100 A.seq command_list = ['hybrid-ss-min'] + parameters_1 + ['A.seq'] elif (seq1 == seq2): prefix = 'A-A' command_list = ['hybrid-min'] + parameters_1 + ['A.seq', 'A.seq'] else: prefix = 'A-B' command_list = ['hybrid-min'] + parameters_1 + ['A.seq', 'B.seq'] try: with open(os.devnull, 'w+') as flo: # Prevent printing to STDOUT cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) except subprocess.CalledProcessError: # Some sequences, such as 'GAGAAGGAGAAGGAGAAG' paired with itself, will cause a segmentation fault return [math.inf], [math.inf], [math.nan], [math.nan] # If model=EM # Apparently, this is not needed # Creates file: A.h-num # Command: h-num.pl A # with open(outfile, 'w+') as flo: # command_list = ['h-num.pl', 'A'] # cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) # Apparently, this is not needed # Creates file: A.ss-count # Command: ss-count.pl A.ct > A.ss-count # with open('A.ss-count', 'w') as flo: # command_list = ['ss-count.pl', 'A.ct'] # cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=None) # This calculates delta-H # if there is no --suffix option used '--suffix=DHD' overrides '--NA=DNA --sodium=0.05 --magnesium=0.0 --temperature=25' # Command: ct-energy --suffix=DHD A.ct > A.deltaH command_list = ['ct-energy', '--suffix=DHD', prefix + '.ct'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=subprocess.PIPE, stderr=None) deltaH_list = [] for line in cp.stdout.decode().splitlines(): deltaH_list.append(float(line)) # This gives more precise delta-G calculations # Command: ct-energy --NA=DNA --sodium=0.05 --magnesium=0.0 --temperature=25 A.ct > A.deltaG parameters_2 = [ '--NA=DNA', '--temperature=' + str(temperature), '--sodium=' + Oligo.float_to_str(sodium), '--magnesium=' + Oligo.float_to_str(magnesium), ] command_list = ['ct-energy'] + parameters_2 + [prefix + '.ct'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=subprocess.PIPE, stderr=None) deltaG_list = [] for line in cp.stdout.decode().splitlines(): deltaG_list.append(float(line)) # This gives the complicated information within the A.det file... #ct-energy --NA=DNA --sodium=0.05 --magnesium=0.0 --temperature=25 --verbose A.ct | ct-energy-det.pl --mode text number_structures = len(deltaH_list) homodimer_list = cls.parse_ct_file(os.path.join( folder, prefix + '.ct' )) # Check each pair to determine if it's a h**o- or heterodimer #deltaH_list = [-32.4, -49.8] #deltaG_list = [-3.43067, -2.56959] deltaS_list = [] Tm_list = [] factor_list = [] #temperature = 25 # argument #concentration = 0.00000025 # argument R = 0.0019872 # Constant try: for i in range(number_structures): #if (len(homodimer_list) > i): # homodimer = homodimer_list[i] #else: # homodimer = None homodimer = homodimer_list[i] deltaH = deltaH_list[i] deltaG = deltaG_list[i] deltaS = 1000.0 * (deltaH - deltaG) / (273.15 + temperature) if (homodimer != None): if (homodimer == True): factor = 1 else: factor = 4 Tm = 1000.0 * deltaH / ( deltaS + 1000.0 * R * math.log(concentration / factor) ) - 273.15 # Natural log (base e) else: Tm = 1000.0 * deltaH / deltaS - 273.15 factor = None deltaS_list.append(deltaS) Tm_list.append(Tm) factor_list.append(factor) except IndexError: # Problematic sequences, such as 'TTCTCCACTTCCATCACC' will cause an error # So we artificially give them unreasonable results so they will be discarded for i in range(number_structures): deltaS_list.append(math.nan) Tm_list.append(math.nan) factor_list.append(None) # Write the det file with open(os.path.join(folder, prefix + '.det'), 'w') as flo: for i in range(number_structures): print( 'Structure {}: dG = {} dH = {} dS = {} Tm = {}'.format( i + 1, deltaG_list[i], deltaH_list[i], deltaS_list[i], Tm_list[i]), file=flo) # Create Probability dot plot #my @command = ('hybrid-plot-ng', '--temperature' => $temp) #system(@command) # Create Energy dot plot #my @command = ('boxplot_ng', '-d', -c => 4); #system(@command) # Create structure plots #system($sirgraph, @flags, -ss => "${prefix}_$fold") #system($sirgraph, @flags, -p => "${prefix}_$fold") #system($sirgraph, @flags, $img, "${prefix}_$fold") #system('ps2pdfwr', "${prefix}_$fold.ps") # ???? #system('ct2rnaml', $prefix) # These are all of the float data type return deltaG_list, deltaH_list, deltaS_list, Tm_list
def calculate_simple(cls, folder_p, seq1, seq2=None, sodium=0.05, magnesium=0.0, temperature=25, concentration=0.00000025, output_basename=None): """ Writes files to temp folder 'folder_p' If 1 input sequence, then UNAFold is run on seq1 only If 2 input sequences, then UNAFold run on seq1+seq2 """ # Make temporary folder folder = os.path.join(folder_p, 'oligos') os.makedirs(folder, exist_ok=True) # Create sequence files for input with open(os.path.join(folder, 'A.seq'), 'w') as flo: print(seq1, file=flo) if seq2: with open(os.path.join(folder, 'B.seq'), 'w') as flo: print(seq2, file=flo) # Default concetrations: 0.25 uM = 0.00025 mM = 0.00000025 M basic_command_list = [ 'UNAFold.pl', '--NA=DNA', '--temp=' + str(temperature), '--sodium=' + Oligo.float_to_str(sodium), '--magnesium=' + Oligo.float_to_str(magnesium), '--Ct=' + Oligo.float_to_str(concentration), '--max=100' ] if output_basename: outfile = os.path.join(folder, output_basename) else: outfile = os.devnull with open(outfile, 'w+') as flo: if seq2: # Do heterodimer command_list = basic_command_list + ['A.seq', 'B.seq'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) else: # Do hairpins try: command_list = basic_command_list + ['A.seq'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) except subprocess.CalledProcessError: # This error happens sometimes, i.e. when A.seq contains 'TTCTCCACTTCCATCACC' # UNAFold.pl crashes, so we just write a crappy 'A.det' file print("UNAFold.pl CRASH") with open(os.path.join(folder, 'A.det'), 'w') as crash_flo: print( 'Structure 1: dG = -999.0 dH = -999.0 dS = -999.0 Tm = 99.0', file=crash_flo) # Make the objects if seq2: return cls.make_objects(os.path.join(folder, 'A-B.det'), sodium, magnesium, temperature, concentration, seq1, seq2) else: return cls.make_objects(os.path.join(folder, 'A.det'), sodium, magnesium, temperature, concentration, seq1)
def calculate_full(cls, folder_p, seq1, seq2, sodium=0.05, magnesium=0.0, temperature=25, concentration=0.00000025, output_basename=None): # Make temporary folder folder = os.path.join(folder_p, 'oligos') os.makedirs(folder, exist_ok=True) # Create sequence files for input with open(os.path.join(folder, 'A.seq'), 'w') as flo: print(seq1, file=flo) with open(os.path.join(folder, 'B.seq'), 'w') as flo: print(seq2, file=flo) with open(os.path.join(folder, 'rcA.seq'), 'w') as flo: print(rc(seq1), file=flo) with open(os.path.join(folder, 'rcB.seq'), 'w') as flo: print(rc(seq2), file=flo) # Default concetrations: 0.25 uM = 0.00025 mM = 0.00000025 M basic_command_list = [ 'UNAFold.pl', '--NA=DNA', '--temp=' + str(temperature), '--sodium=' + Oligo.float_to_str(sodium), '--magnesium=' + Oligo.float_to_str(magnesium), '--Ct=' + Oligo.float_to_str(concentration), '--max=100' ] if output_basename: outfile = os.path.join(folder, output_basename) else: outfile = os.devnull with open(outfile, 'w+') as flo: # Do hairpins command_list = basic_command_list + ['A.seq'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) command_list = basic_command_list + ['B.seq'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) # Do homodimers command_list = basic_command_list + ['A.seq', 'A.seq'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) command_list = basic_command_list + ['B.seq', 'B.seq'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) # Do heterodimer command_list = basic_command_list + ['A.seq', 'B.seq'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) # Do reverse complements command_list = basic_command_list + ['A.seq', 'rcA.seq'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) command_list = basic_command_list + ['B.seq', 'rcB.seq'] cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT) # Make the objects #for det_filename in ['A.det', 'B.det', 'A-A.det', 'B-B.det', 'A-B.det']: # pass a = cls.make_objects(os.path.join(folder, 'A.det'), sodium, magnesium, temperature, concentration, seq1) b = cls.make_objects(os.path.join(folder, 'B.det'), sodium, magnesium, temperature, concentration, seq2) aa = cls.make_objects(os.path.join(folder, 'A-A.det'), sodium, magnesium, temperature, concentration, seq1, seq1) bb = cls.make_objects(os.path.join(folder, 'B-B.det'), sodium, magnesium, temperature, concentration, seq2, seq2) ab = cls.make_objects(os.path.join(folder, 'A-B.det'), sodium, magnesium, temperature, concentration, seq1, seq2) ra = cls.make_objects(os.path.join(folder, 'A-rcA.det'), sodium, magnesium, temperature, concentration, seq1, rc(seq1)) rb = cls.make_objects(os.path.join(folder, 'B-rcB.det'), sodium, magnesium, temperature, concentration, seq2, rc(seq2)) return a, b, aa, bb, ab, ra, rb