def runAllMappedSplit(start_idx=0, stop_idx=100000, part='[]', map_dir_ext=''): if part == 'all': parts = ["[%d]" % x for x in range(50)] else: parts = [part] map_dir, out_label = '/mapped_reads%s' % map_dir_ext, 'out_split%s' % map_dir_ext all_dir, out_dir = getAllDataDirs(), getLogDir() for part in parts: idx = 0 for dirname in all_dir: print(getShortDir(dirname), idx) extra_cmd = '' if ( part != '[]' and 0 not in eval(part) ) or getRunLocal() else 'rm -rf %s/%s' % (dirname, map_dir) if getRunLocal() and 'NULL' not in dirname and os.path.isdir( '%s/%s' % (dirname, map_dir)): shutil.rmtree('%s/%s' % (dirname, map_dir)) cmd = getPythonCmd( ) + ' %s %s %s/%s "%s"' % ( dirname, getExpOligoFile(dirname), dirname, map_dir, part) idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir, out_label, extra_cmd=extra_cmd, queue='normal')
def runAllSplitNullMappings(start_idx=0, stop_idx=100000, map_dir_ext=''): null_dirs, out_dir = getAllNullDirs(), getLogDir() idx = 0 for dirname in null_dirs: print(getShortDir(dirname), idx) cmd = getPythonCmd() + ' %s %s' % (dirname, map_dir_ext) idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir, 'out_null_split')
def runAllIndelMap(start_idx=0, stop_idx=10000000, overbeek_only=False, queue='normal', map_dir='/mapped_reads/', max_cut_dist=4, num_parts=1, order_by_incomplete=False): all_dir, out_dir = getAllDataDirs(), getLogDir() if overbeek_only: print('Computing for Overbeek guides only') completed_lookup = {} if order_by_incomplete: f ='../quality_checks/status.log') completed_lookup = { toks[0]: min([eval(x) for x in toks[1:]]) != 0 for toks in csv.reader(f, delimiter='\t') } f.close() completed = [ x for x in all_dir if getDirLabel(x) in completed_lookup and completed_lookup[getDirLabel(x)] ] not_completed = [x for x in all_dir if x not in completed] max_files_per_dir = 20 file_per_part = int(max_files_per_dir / num_parts + 0.99) i, idx = 0, 0 for dirname in not_completed + completed: if len(not_completed) == i: print('-------------------------------------------------') i += 1 print(getShortDir(dirname), idx) if not os.path.isdir(dirname + map_dir): continue for subdir in getSubdirs(dirname, withpath=False): if overbeek_only and subdir != 'Oligos_71': continue args = (dirname, getNullDir(dirname), subdir, file_per_part, map_dir, max_cut_dist, getIndelMapExe(), getPythonCmd()) cmd = getPythonCmd( ) + ' %s %s %s %d - 0 %s %d %s %s' % args idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir, 'out_indelmap_%s' % getDirLabel(dirname), numj=num_parts, queue=queue)
def runAllPartition(start_idx=0, stop_idx=1000000, nump=50): all_dir, out_dir = getAllDataDirs(), getLogDir() idx = 0 for dirname in all_dir: print(getShortDir(dirname), idx) filenames = [ x for x in os.listdir(dirname) if x.split('_')[-1] == 'pear.assembled.fastq' ] for filename in filenames: cmd = getPythonCmd() + ' %s/%s %d' % ( dirname, filename, nump) idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir, 'out_part')
def runAllMap(start_idx=0, stop_idx=100000000, recompute=True, unassembled_only=False, max_cut_dist=4, map_dir='mapping_files'): all_dir, out_dir = getAllDataDirs(), getLogDir() idx = 0 for dirname in all_dir: exp_file = getExpOligoFile(dirname) print(getShortDir(dirname), idx) check_str = '_pear.unassembled_pear.assembled._' if unassembled_only else '_pear.assembled._' filenames = [x for x in os.listdir(dirname) if check_str in x] for filename in filenames: cmd_args = (dirname, filename, exp_file, dirname, map_dir, filename[:-6], max_cut_dist) cmd = getIndelMapExe( ) + ' %s/%s %s %s/%s/%s_mappings.txt 1 %d' % cmd_args extra_cmd = '' if not os.path.isdir(dirname + '/' + map_dir): if getRunLocal(): os.mkdir(dirname + '/' + map_dir) else: extra_cmd = 'mkdir %s' % (dirname + '/' + map_dir) if not recompute and os.path.isfile(dirname + '/' + map_dir + '/' + filename[:-6] + '_mappings.txt'): continue idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir, 'out_map', extra_cmd=extra_cmd)
def runAllPear(start_idx=0, stop_idx=100000): all_dir, out_dir = getAllDataDirs(), getLogDir() idx = 0 for dirname in all_dir: print(getShortDir(dirname), idx) r1_fasta_files = [ x for x in os.listdir(dirname) if x[-9:] == '_R1.fastq' or x[-13:] == '_R1_001.fastq' ] for r1_file in r1_fasta_files: file_prefix = r1_file[:r1_file.index('_R1')] file_suffix = r1_file[len(file_prefix):].replace('1', '2', 1) if not os.path.isfile(dirname + '/' + file_prefix + file_suffix): print('Could not find matching R2 file:', dirname, file_prefix, file_suffix) continue cargs = (dirname, r1_file, dirname, file_prefix, file_suffix, dirname, file_prefix) cmd = getPearExe( ) + ' -f %s/%s -r %s/%s%s -o %s/%s_pear -n 20 -p 0.01' % cargs idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir, 'out_pear')
#e.g. /lustre/scratch117/cellgen/team227/fa9/self-target/indel_analysis/ST_June_2017/data/K562_800x_LV7B_DPI3/mapped_reads/Oligos_70/Oligos_70850-70899_mappedindelsummary.txt full_filename = line.split()[0] toks = full_filename.split('/') fasta_file = toks[-1][:-23] + '.fasta' filepath = '/'.join(toks[:-1]) dirname = '/'.join(toks[:-3]) nulldir = getNullDir(dirname) subdir = toks[-2] print idx, dirname, fasta_file cmd = '~/ %s %s %s -1 1 %s %d' % ( dirname, nulldir, subdir, fasta_file) idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir, 'out_incomplete_indelmap_%s' % getDirLabel(dirname), queue=queue) f.close() elif source == 'mapped_counts': for dirname in all_dir: dirlabel = getDirLabel(dirname) nulldir = getNullDir(dirname) repeat_indelmaps = set() repeat_reformat = set() f ='../quality_checks/mapped_read_summaries/%s.txt' % dirlabel)
if len(sys.argv) >= 4: overbeek_only = eval(sys.argv[3]) if len(sys.argv) >= 5: queue = sys.argv[4] all_dir, out_dir = getAllDataDirs(), getLogDir() if overbeek_only: print 'Computing for Overbeek guides only' file_per_part = 1 max_files_per_dir = 20 num_parts = (max_files_per_dir + file_per_part - 1) / file_per_part i, idx = 0, 0 for dirname in all_dir: i += 1 print getShortDir(dirname), idx if not os.path.isdir(dirname + '/mapped_reads'): continue for subdir in getSubdirs(dirname, withpath=False): if overbeek_only and subdir != 'Oligos_71': continue cmd = './ %s %s %s %d' % ( dirname, getNullDir(dirname), subdir, file_per_part) idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir, 'out_correct_indelmap_%s' % getDirLabel(dirname), numj=num_parts, queue=queue)