def find_lengths_h1lengths_tar(j_len): """Return all lengths corresponding to 'standard' mode""" lengths = [8] * 3 + [9] * 4 + [10] * 5 + [11] * 5 offsets = [-1, 0, 1] + [-2, -1, 0, 1] + [-2, -1, 0, 1, 2 ] + [-2, -1, 0, 1, 2] h1_lengths = [ convert_offset_to_h1length(length - j_len, i) for i, length in zip(offsets, lengths) ] return lengths, h1_lengths if __name__ == '__main__': args = parser.parse_args() helix_seqs = processing.load_file(args.helix_seqs) junction_seqs = processing.load_file(args.junction_seqs) if args.flank_to_add: base_before, base_after = args.flank_to_add.split('_') junction_seqs.loc[:, 'no_flank'] = junction_seqs.side1 + '_' + junction_seqs.side2 side1 = base_before + junction_seqs.side1 + base_after side2 = seqfun.rc(base_after, rna=True) + junction_seqs.side2 + seqfun.rc( base_before, rna=True) junction_seqs.loc[:, 'side1'] = side1 junction_seqs.loc[:, 'side2'] = side2 junction_seqs.loc[:, 'flank'] = args.flank_to_add if args.switch_sides:
##### IMPORT ##### import numpy as np import pandas as pd import os import sys import argparse import itertools from hjh import processing # load sequences junction_seqs = processing.load_file( '~/JunctionLibrary/seq_params/three_way_junctions.dat') # cut out the 'extra' junction_seq_controls = {} for idx, row in junction_seqs.iterrows(): for loop_context in ['L1', 'L2']: if loop_context == 'L1': two_way_seq = pd.Series({ 'side1': row.j1, 'side2': row.j2[0] + row.j3[-1] }) elif loop_context == 'L2': two_way_seq = pd.Series({ 'side1': row.j1[0] + row.j2[-1], 'side2': row.j3 }) junction_seq_controls[(idx, loop_context)] = pd.concat( [two_way_seq, row.drop(['j1', 'j2', 'j3'])])
##### IMPORT ##### import numpy as np import pandas as pd import os import sys import argparse import itertools from hjh import processing # load sequences junction_seqs = processing.load_file('~/JunctionLibrary/seq_params/three_way_junctions.dat') # cut out the 'extra' junction_seq_controls = {} for idx, row in junction_seqs.iterrows(): for loop_context in ['L1','L2']: if loop_context == 'L1': two_way_seq = pd.Series({'side1':row.j1, 'side2':row.j2[0] + row.j3[-1]}) elif loop_context == 'L2': two_way_seq = pd.Series({'side1':row.j1[0] + row.j2[-1], 'side2':row.j3}) junction_seq_controls[(idx, loop_context)] = pd.concat([ two_way_seq, row.drop(['j1', 'j2', 'j3'])]) junction_seq_controls = pd.concat(junction_seq_controls, names=['index', 'loop_context']).unstack().swaplevel(0,1).sort_index() for loop_context in ['L1', 'L2']:
parser.add_argument( '-s', '--seqs', help='filename of things to mutate with the ' 'positions in which to mutate (side1, side2, positions), i.e. ' '~/JunctionLibrary/seq_params/receptors_expt3_original.dat', required=True) parser.add_argument('-out', '--out_file', help='file to save output', required=True) if __name__ == '__main__': args = parser.parse_args() receptors = processing.load_file(args.seqs) script = 'python ~/JunctionLibrary/mutate_seqs.py -s {in_file} -out {out_file} -p {positions}' working_dir = './' # groupby side length out_filenames = [] for name, group in receptors.groupby('positions'): # make name machine friendly filename = working_dir + os.path.basename( os.path.splitext(args.seqs)[0] + '_' + name.replace(';', '.').replace(',', '')) in_filename, out_filename = filename + '.dat', filename + '_muts.dat', group.drop('positions', axis=1).to_csv(in_filename, index=False, sep='\t') call = script.format(in_file=in_filename, out_file=out_filename,
# Provides modular tools for making a helix-junction # helix DNA library ##### IMPORT ##### import numpy as np import pandas as pd import os import sys import argparse import itertools from hjh import processing #set up command line argument parser parser = argparse.ArgumentParser(description="script for making library") parser.add_argument( '-s', '--seqs', help='filenames with column "seq" to test ss structure of.') parser.add_argument('-out', '--out_file', help='file to save output', required=True) if __name__ == '__main__': args = parser.parse_args() seqs = processing.load_file(args.seqs) seqs.loc[:, 'ss'] = processing.check_ss_structure_set(seqs) ## save seqs.to_csv(args.out_file, index=False, sep='\t')
'junction location already defined (h1_side1, h2_side1, h2_side2, h2_side1).' ' If given, all other options (i.e. length, offsets) are ignored.') parser.add_argument('-r', '--junction_seqs', help='side1 and side2 of the junction sequence', required=True) parser.add_argument('-out','--out_file', help='file to save output', ) def find_length(seq): """find length of shortest side of junction seq""" return min([len(s) for s in seq.split('_')]) if __name__ == '__main__': args = parser.parse_args() helix_seqs = processing.load_file(args.predefined_helix_seqs) junction_seqs = processing.load_file(args.junction_seqs) all_seqs = [] for (idx1, helix_row), (idx2, junction_row) in itertools.product(helix_seqs.iterrows(), junction_seqs.iterrows()): seq = pd.Series({'side1':helix_row.h1_side1 + junction_row.side1 + helix_row.h2_side1, 'side2':helix_row.h2_side2 + junction_row.side2 + helix_row.h1_side2}) seq_data = pd.concat([helix_row.drop(['h1_side1', 'h1_side2', 'h2_side1', 'h2_side2']), junction_row.drop(['side1', 'side2']), pd.Series({'helix_seq':helix_row.h1_side1 + '_' + helix_row.h2_side1 + '&' + helix_row.h2_side2 + '_' + helix_row.h1_side2, 'junction_seq':junction_row.side1 + '_' + junction_row.side2})]) all_seqs.append(pd.concat([seq, seq_data]))
import logging from hjh import processing #set up command line argument parser parser = argparse.ArgumentParser(description="script for making library") parser.add_argument('-s', '--seqs', help='filename of things to mutate with the ' 'positions in which to mutate (side1, side2, positions), i.e. ' '~/JunctionLibrary/seq_params/receptors_expt3_original.dat', required=True) parser.add_argument('-out','--out_file', help='file to save output', required=True ) if __name__=='__main__': args = parser.parse_args() receptors = processing.load_file(args.seqs) script = 'python ~/JunctionLibrary/mutate_seqs.py -s {in_file} -out {out_file} -p {positions}' working_dir = './' # groupby side length out_filenames = [] for name, group in receptors.groupby('positions'): # make name machine friendly filename = working_dir + os.path.basename(os.path.splitext(args.seqs)[0] + '_' + name.replace(';', '.').replace(',', '')) in_filename, out_filename = filename + '.dat', filename + '_muts.dat', group.drop('positions', axis=1).to_csv(in_filename, index=False, sep='\t') call = script.format(in_file=in_filename, out_file=out_filename, positions='"%s"'%name) logging.info(call) subprocess.call(call, shell=True) out_filenames.append(out_filename) # join
from hjh import processing #set up command line argument parser parser = argparse.ArgumentParser(description="script for making library") parser.add_argument('-a', '--starting_seq', help='seed sequence, i.e. a loop. Must hve "seq" column.', default='seq_params/loop_1.dat') parser.add_argument('-b', '--add_seqs', nargs="+", help='list of filenames of sequences to add. ' 'All should have columns "side1" and "side2" of the adapter sequences') parser.add_argument('-out','--out_file', help='file to save output', required=True) if __name__ == '__main__': args = parser.parse_args() # load seqs loop_seqs = processing.load_file(args.starting_seq) other_seqs = [processing.load_file(filename) for filename in args.add_seqs] # starting with the loop, thread together the sequential pieces of the tectoRNA new_seqs = loop_seqs new_seqs.loc[:, 'starting_seq'] = new_seqs.seq for add_seqs in other_seqs: all_seqs = [] for (idx1, row1), (idx2, row2) in itertools.product(new_seqs.iterrows(), add_seqs.iterrows()): seq = processing.thread_together(row1.seq, (row2.side1, row2.side2)) seq_data = pd.concat([pd.Series({'seq':seq}), row1.drop('seq'), row2.drop(['side1', 'side2'])]) all_seqs.append(seq_data) all_seqs = pd.concat(all_seqs, axis=1).transpose() new_seqs = all_seqs
'--unique', action="store_true", help='[optional] whether to ensure ' '"seq" column is unique. If set, will take first instance of each seq. default=False.' ) parser.add_argument('-out', '--out_file', help='file to save output', required=True) if __name__ == '__main__': args = parser.parse_args() # load new_seqs = pd.concat( { '%s_%d' % (args.libname, i): processing.load_file(filename) for i, filename in enumerate(args.add_seqs) }, names=['sublibrary', 'index']) if not 'sublibrary' in new_seqs.columns.tolist(): # only add 'sublibrary' if it isn't already a columns new_seqs.reset_index(level=0, inplace=True) # make unique if option given if args.unique: old_cols = new_seqs.columns.tolist() new_seqs = new_seqs.groupby('seq').first().reset_index().loc[:, old_cols] # save ext_out = os.path.splitext(args.out_file)[-1]
offsets = [0, -1, 0, -1, 0, 0] h1_lengths = [convert_offset_to_h1length(length-j_len, i) for i, length in zip(offsets, lengths)] return lengths, h1_lengths def find_lengths_h1lengths_tar(j_len): """Return all lengths corresponding to 'standard' mode""" lengths = [8]*3 + [9]*4 + [10]*5 + [11]*5 offsets = [-1, 0, 1] + [-2, -1, 0, 1] + [-2, -1, 0, 1, 2] + [-2, -1, 0, 1, 2] h1_lengths = [convert_offset_to_h1length(length-j_len, i) for i, length in zip(offsets, lengths)] return lengths, h1_lengths if __name__ == '__main__': args = parser.parse_args() helix_seqs = processing.load_file(args.helix_seqs) junction_seqs = processing.load_file(args.junction_seqs) if args.flank_to_add: base_before, base_after = args.flank_to_add.split('_') junction_seqs.loc[:, 'no_flank'] = junction_seqs.side1 + '_' + junction_seqs.side2 side1 = base_before + junction_seqs.side1 + base_after side2 = seqfun.rc(base_after, rna=True) + junction_seqs.side2 + seqfun.rc(base_before, rna=True) junction_seqs.loc[:, 'side1'] = side1 junction_seqs.loc[:, 'side2'] = side2 junction_seqs.loc[:, 'flank'] = args.flank_to_add if args.switch_sides: opposite_side = junction_seqs.copy() opposite_side.loc[:, 'side1'] = junction_seqs.side2 opposite_side.loc[:, 'side2'] = junction_seqs.side1
from hjh import processing #set up command line argument parser parser = argparse.ArgumentParser(description="script for making library") parser.add_argument('-a', '--add_seqs', nargs="+", help='list of filenames of sequence data to add.') parser.add_argument('-i', '--libname', help='[optional] the name to prepend to sublibraries, ' 'i.e. "tertcontact". default is "library"', default='library') parser.add_argument('-u', '--unique', action="store_true", help='[optional] whether to ensure ' '"seq" column is unique. If set, will take first instance of each seq. default=False.') parser.add_argument('-out','--out_file', help='file to save output', required=True) if __name__ == '__main__': args = parser.parse_args() # load new_seqs = pd.concat({'%s_%d'%(args.libname, i):processing.load_file(filename) for i, filename in enumerate(args.add_seqs)}, names=['sublibrary', 'index']) if not 'sublibrary' in new_seqs.columns.tolist(): # only add 'sublibrary' if it isn't already a columns new_seqs.reset_index(level=0, inplace=True) # make unique if option given if args.unique: old_cols = new_seqs.columns.tolist() new_seqs = new_seqs.groupby('seq').first().reset_index().loc[:, old_cols] # save ext_out = os.path.splitext(args.out_file)[-1] if ext_out == '.csv': new_seqs.to_csv(args.out_file, index=False)
import pandas as pd import os import sys import argparse import itertools from hjh import processing # load sequences for filename in ['~/JunctionLibrary/seq_params/three_way_helices.dat', '~/JunctionLibrary/seq_params/three_way_helices2.dat', '~/JunctionLibrary/seq_params/three_way_helices_minus1.dat', '~/JunctionLibrary/seq_params/three_way_helices_minus2.dat']: helix_seqs = processing.load_file(filename) # split into base helix, L1 helix, and L2 helix keys = ['base_side1', 'base_side2'] base_helix = helix_seqs.loc[:, keys].rename(columns={key:'h1_'+key.split('_')[-1] for key in keys}).copy() for loop_context in ['L1', 'L2']: if loop_context == 'L1': keys = ['h1_side1', 'h1_side2'] elif loop_context == 'L2': keys = ['h2_side1', 'h2_side2'] else: keys=None loop_helix = helix_seqs.loc[:, keys].rename(columns={key:'h2_'+key.split('_')[-1] for key in keys}).copy() predefined_helix =pd.concat([base_helix, loop_helix], axis=1)
required=True) parser.add_argument( '-out', '--out_file', help='file to save output', ) def find_length(seq): """find length of shortest side of junction seq""" return min([len(s) for s in seq.split('_')]) if __name__ == '__main__': args = parser.parse_args() helix_seqs = processing.load_file(args.predefined_helix_seqs) junction_seqs = processing.load_file(args.junction_seqs) all_seqs = [] for (idx1, helix_row), (idx2, junction_row) in itertools.product( helix_seqs.iterrows(), junction_seqs.iterrows()): seq = pd.Series({ 'side1': helix_row.h1_side1 + junction_row.side1 + helix_row.h2_side1, 'side2': helix_row.h2_side2 + junction_row.side2 + helix_row.h1_side2 }) seq_data = pd.concat([ helix_row.drop(['h1_side1', 'h1_side2', 'h2_side1', 'h2_side2']), junction_row.drop(['side1', 'side2']),
#!/usr/bin/env python # Author: Sarah Denny, Stanford University # Provides modular tools for making a helix-junction # helix DNA library ##### IMPORT ##### import numpy as np import pandas as pd import os import sys import argparse import itertools from hjh import processing #set up command line argument parser parser = argparse.ArgumentParser(description="script for making library") parser.add_argument('-s', '--seqs', help='filenames with column "seq" to test ss structure of.') parser.add_argument('-out','--out_file', help='file to save output', required=True) if __name__ == '__main__': args = parser.parse_args() seqs = processing.load_file(args.seqs) seqs.loc[:, 'ss'] = processing.check_ss_structure_set(seqs) ## save seqs.to_csv(args.out_file, index=False, sep='\t')