def runAllIndelMap(start_idx=0, stop_idx=10000000, overbeek_only=False, queue='normal', map_dir='/mapped_reads/', max_cut_dist=4, num_parts=1, order_by_incomplete=False): all_dir, out_dir = getAllDataDirs(), getLogDir() if overbeek_only: print('Computing for Overbeek guides only') completed_lookup = {} if order_by_incomplete: f = io.open('../quality_checks/status.log') completed_lookup = { toks[0]: min([eval(x) for x in toks[1:]]) != 0 for toks in csv.reader(f, delimiter='\t') } f.close() completed = [ x for x in all_dir if getDirLabel(x) in completed_lookup and completed_lookup[getDirLabel(x)] ] not_completed = [x for x in all_dir if x not in completed] max_files_per_dir = 20 file_per_part = int(max_files_per_dir / num_parts + 0.99) i, idx = 0, 0 for dirname in not_completed + completed: if len(not_completed) == i: print('-------------------------------------------------') i += 1 print(getShortDir(dirname), idx) if not os.path.isdir(dirname + map_dir): continue for subdir in getSubdirs(dirname, withpath=False): if overbeek_only and subdir != 'Oligos_71': continue args = (dirname, getNullDir(dirname), subdir, file_per_part, map_dir, max_cut_dist, getIndelMapExe(), getPythonCmd()) cmd = getPythonCmd( ) + ' indelmap_subdir.py %s %s %s %d - 0 %s %d %s %s' % args idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir, 'out_indelmap_%s' % getDirLabel(dirname), numj=num_parts, queue=queue)
def runPerSubdir(python_script, out_label, caller, extra_args='', include_null=False): idx = 0 for dirname in [ x for x in getAllDataDirs() if include_null or not isNullDir(x) ]: if not os.path.isdir(dirname + '/mapped_reads'): print(getShortDir(dirname)), 'No mapped_reads directory' else: subdirs = getSubdirs(dirname) idx = runSubdir(idx, subdirs, getShortDir(dirname), python_script, out_label, caller, extra_args=extra_args)
from selftarget.oligo import getOligoIdsFromFile from selftarget.data import getDirLabel, getIndelSummaryFiles, getSubdirs def filterLargeI(profile): return {x:profile[x] for x in profile if (x[0] == '-' or x[0] != 'I' or x[1] != '1')} if len(sys.argv) != 4: print('compare_pairwise.py <dirname1> <dirname2> <subdir>') else: remove_largeI = True dirname1, dirname2 = sys.argv[1], sys.argv[2] subdir = sys.argv[3] if subdir not in getSubdirs(dirname1, withpath=False): raise Exception('No subdir %s in %s' % (subdir, dirname1) ) if subdir not in getSubdirs(dirname2, withpath=False): raise Exception('No subdir %s in %s' % (subdir, dirname2) ) out_dir = 'profile_comparison_summaries' if not remove_largeI else 'profile_comparison_summaries_nolargeI' if not os.path.isdir(out_dir): os.mkdir(out_dir) out_dir += '/%s_vs_%s' % (getDirLabel(dirname1),getDirLabel(dirname2)) if not os.path.isdir(out_dir): os.mkdir(out_dir) out_file = out_dir + '/%s.txt' % subdir fout = io.open(out_file,'w') fout.write(u'ID\tNum Reads 1\tNum Reads 2\tNum States 1\tNum States 2\tNum null reads 1\tNum null reads 2\tKL with Null\tKL without null\tPerc Accepted Reads 1\tPerc Accepted Reads 2\t1st Nonmatch Indel\tNum Top 3 Common\tNum Top 5 Common\tNum Top 10 Common\tProfile 1 Entropy (before mods)\tProfile 2 Entropy (before mods)\tProfile 1 Entropy (after mods)\tProfile 2 Entropy (after mods)\tPerc Overlap\tP1 Perc in Top 3\tP2 Perc in Top 3\tP1 Perc in Top 5\tP2 Perc in Top 5\tP1 Perc in Top 10\tP2 Perc in Top 10\n') dir1_files = getIndelSummaryFiles(dirname1 + '/mapped_reads/' + subdir, withpath=False) dir2_files = getIndelSummaryFiles(dirname2 + '/mapped_reads/' + subdir, withpath=False)
from selftarget.util import getLogDir, runCmdCheckIdx, runSubdir if __name__ == '__main__': all_dir, out_dir = getAllDataDirs(), getLogDir() idx = 0 for old_lib in [False]: #[True,False]: lib_dirs = [ x for x in all_dir if (isOldLib(x) == old_lib) and os.path.isdir(x + '/mapped_reads') and 'DPI7' in x and 'K562_1600x_LV7B_DPI7' not in x and '2A_TREX' not in x and 'K562_800x_7A_DPI7_may' not in x ] for dirname1, dirname2 in itertools.combinations(lib_dirs, 2): subdirs_1 = getSubdirs(dirname1, withpath=False) subdirs_2 = getSubdirs(dirname2, withpath=False) common_subdirs = set(subdirs_1).intersection(set(subdirs_2)) label = '%s\t%s' % (getShortDir(dirname1), getShortDir(dirname2)) extra_args = '%s %s ' % (dirname1, dirname2) idx = runSubdir(idx, common_subdirs, label, 'compare_pairwise.py', 'out_compare_pairwise', __file__, extra_args=extra_args)
if len(sys.argv) >= 4: overbeek_only = eval(sys.argv[3]) if len(sys.argv) >= 5: queue = sys.argv[4] all_dir, out_dir = getAllDataDirs(), getLogDir() if overbeek_only: print 'Computing for Overbeek guides only' file_per_part = 1 max_files_per_dir = 20 num_parts = (max_files_per_dir + file_per_part - 1) / file_per_part i, idx = 0, 0 for dirname in all_dir: i += 1 print getShortDir(dirname), idx if not os.path.isdir(dirname + '/mapped_reads'): continue for subdir in getSubdirs(dirname, withpath=False): if overbeek_only and subdir != 'Oligos_71': continue cmd = './run_correct_indel.sh %s %s %s %d' % ( dirname, getNullDir(dirname), subdir, file_per_part) idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir, 'out_correct_indelmap_%s' % getDirLabel(dirname), numj=num_parts, queue=queue)