示例#1
0
def runAllMappedSplit(start_idx=0, stop_idx=100000, part='[]', map_dir_ext=''):

    if part == 'all': parts = ["[%d]" % x for x in range(50)]
    else: parts = [part]

    map_dir, out_label = '/mapped_reads%s' % map_dir_ext, 'out_split%s' % map_dir_ext
    all_dir, out_dir = getAllDataDirs(), getLogDir()

    for part in parts:
        idx = 0
        for dirname in all_dir:

            print(getShortDir(dirname), idx)
            extra_cmd = '' if (
                part != '[]' and 0 not in eval(part)
            ) or getRunLocal() else 'rm -rf %s/%s' % (dirname, map_dir)
            if getRunLocal() and 'NULL' not in dirname and os.path.isdir(
                    '%s/%s' % (dirname, map_dir)):
                shutil.rmtree('%s/%s' % (dirname, map_dir))
            cmd = getPythonCmd(
            ) + ' split_mapped_reads_by_id.py %s %s %s/%s "%s"' % (
                dirname, getExpOligoFile(dirname), dirname, map_dir, part)
            idx = runCmdCheckIdx(cmd,
                                 idx,
                                 start_idx,
                                 stop_idx,
                                 out_dir,
                                 out_label,
                                 extra_cmd=extra_cmd,
                                 queue='normal')
def runAllSplitNullMappings(start_idx=0, stop_idx=100000, map_dir_ext=''):

    null_dirs, out_dir = getAllNullDirs(), getLogDir()

    idx = 0
    for dirname in null_dirs:

        print(getShortDir(dirname), idx)
        cmd = getPythonCmd() + ' split_null_mappings.py %s %s' % (dirname,
                                                                  map_dir_ext)
        idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir,
                             'out_null_split')
示例#3
0
def runAllIndelMap(start_idx=0,
                   stop_idx=10000000,
                   overbeek_only=False,
                   queue='normal',
                   map_dir='/mapped_reads/',
                   max_cut_dist=4,
                   num_parts=1,
                   order_by_incomplete=False):

    all_dir, out_dir = getAllDataDirs(), getLogDir()
    if overbeek_only: print('Computing for Overbeek guides only')

    completed_lookup = {}
    if order_by_incomplete:
        f = io.open('../quality_checks/status.log')
        completed_lookup = {
            toks[0]: min([eval(x) for x in toks[1:]]) != 0
            for toks in csv.reader(f, delimiter='\t')
        }
        f.close()
    completed = [
        x for x in all_dir if getDirLabel(x) in completed_lookup
        and completed_lookup[getDirLabel(x)]
    ]
    not_completed = [x for x in all_dir if x not in completed]

    max_files_per_dir = 20
    file_per_part = int(max_files_per_dir / num_parts + 0.99)

    i, idx = 0, 0
    for dirname in not_completed + completed:

        if len(not_completed) == i:
            print('-------------------------------------------------')
        i += 1

        print(getShortDir(dirname), idx)
        if not os.path.isdir(dirname + map_dir): continue

        for subdir in getSubdirs(dirname, withpath=False):
            if overbeek_only and subdir != 'Oligos_71': continue
            args = (dirname, getNullDir(dirname), subdir, file_per_part,
                    map_dir, max_cut_dist, getIndelMapExe(), getPythonCmd())
            cmd = getPythonCmd(
            ) + ' indelmap_subdir.py %s %s %s %d - 0 %s %d %s %s' % args
            idx = runCmdCheckIdx(cmd,
                                 idx,
                                 start_idx,
                                 stop_idx,
                                 out_dir,
                                 'out_indelmap_%s' % getDirLabel(dirname),
                                 numj=num_parts,
                                 queue=queue)
示例#4
0
def runAllPartition(start_idx=0, stop_idx=1000000, nump=50):
    all_dir, out_dir = getAllDataDirs(), getLogDir()

    idx = 0
    for dirname in all_dir:

        print(getShortDir(dirname), idx)
        filenames = [
            x for x in os.listdir(dirname)
            if x.split('_')[-1] == 'pear.assembled.fastq'
        ]

        for filename in filenames:

            cmd = getPythonCmd() + ' partition_pear.py %s/%s %d' % (
                dirname, filename, nump)
            idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir,
                                 'out_part')
示例#5
0
def runAllMap(start_idx=0,
              stop_idx=100000000,
              recompute=True,
              unassembled_only=False,
              max_cut_dist=4,
              map_dir='mapping_files'):

    all_dir, out_dir = getAllDataDirs(), getLogDir()

    idx = 0
    for dirname in all_dir:

        exp_file = getExpOligoFile(dirname)
        print(getShortDir(dirname), idx)

        check_str = '_pear.unassembled_pear.assembled._' if unassembled_only else '_pear.assembled._'
        filenames = [x for x in os.listdir(dirname) if check_str in x]
        for filename in filenames:

            cmd_args = (dirname, filename, exp_file, dirname, map_dir,
                        filename[:-6], max_cut_dist)
            cmd = getIndelMapExe(
            ) + ' %s/%s %s %s/%s/%s_mappings.txt 1 %d' % cmd_args
            extra_cmd = ''
            if not os.path.isdir(dirname + '/' + map_dir):
                if getRunLocal(): os.mkdir(dirname + '/' + map_dir)
                else: extra_cmd = 'mkdir %s' % (dirname + '/' + map_dir)
            if not recompute and os.path.isfile(dirname + '/' + map_dir + '/' +
                                                filename[:-6] +
                                                '_mappings.txt'):
                continue
            idx = runCmdCheckIdx(cmd,
                                 idx,
                                 start_idx,
                                 stop_idx,
                                 out_dir,
                                 'out_map',
                                 extra_cmd=extra_cmd)
示例#6
0
def runAllPear(start_idx=0, stop_idx=100000):
    all_dir, out_dir = getAllDataDirs(), getLogDir()

    idx = 0
    for dirname in all_dir:
        print(getShortDir(dirname), idx)

        r1_fasta_files = [
            x for x in os.listdir(dirname)
            if x[-9:] == '_R1.fastq' or x[-13:] == '_R1_001.fastq'
        ]
        for r1_file in r1_fasta_files:
            file_prefix = r1_file[:r1_file.index('_R1')]
            file_suffix = r1_file[len(file_prefix):].replace('1', '2', 1)
            if not os.path.isfile(dirname + '/' + file_prefix + file_suffix):
                print('Could not find matching R2 file:', dirname, file_prefix,
                      file_suffix)
                continue
            cargs = (dirname, r1_file, dirname, file_prefix, file_suffix,
                     dirname, file_prefix)
            cmd = getPearExe(
            ) + ' -f %s/%s -r %s/%s%s -o %s/%s_pear -n 20 -p 0.01' % cargs
            idx = runCmdCheckIdx(cmd, idx, start_idx, stop_idx, out_dir,
                                 'out_pear')
        #e.g. /lustre/scratch117/cellgen/team227/fa9/self-target/indel_analysis/ST_June_2017/data/K562_800x_LV7B_DPI3/mapped_reads/Oligos_70/Oligos_70850-70899_mappedindelsummary.txt
        full_filename = line.split()[0]
        toks = full_filename.split('/')
        fasta_file = toks[-1][:-23] + '.fasta'
        filepath = '/'.join(toks[:-1])
        dirname = '/'.join(toks[:-3])
        nulldir = getNullDir(dirname)
        subdir = toks[-2]

        print idx, dirname, fasta_file
        cmd = '~/run_python.sh indelmap_subdir.py %s %s %s -1 1 %s %d' % (
            dirname, nulldir, subdir, fasta_file)
        idx = runCmdCheckIdx(cmd,
                             idx,
                             start_idx,
                             stop_idx,
                             out_dir,
                             'out_incomplete_indelmap_%s' %
                             getDirLabel(dirname),
                             queue=queue)

    f.close()

elif source == 'mapped_counts':

    for dirname in all_dir:
        dirlabel = getDirLabel(dirname)
        nulldir = getNullDir(dirname)
        repeat_indelmaps = set()
        repeat_reformat = set()
        f = io.open('../quality_checks/mapped_read_summaries/%s.txt' %
                    dirlabel)
    if len(sys.argv) >= 4: overbeek_only = eval(sys.argv[3])
    if len(sys.argv) >= 5: queue = sys.argv[4]

all_dir, out_dir = getAllDataDirs(), getLogDir()
if overbeek_only: print 'Computing for Overbeek guides only'

file_per_part = 1
max_files_per_dir = 20
num_parts = (max_files_per_dir + file_per_part - 1) / file_per_part

i, idx = 0, 0
for dirname in all_dir:

    i += 1

    print getShortDir(dirname), idx
    if not os.path.isdir(dirname + '/mapped_reads'): continue

    for subdir in getSubdirs(dirname, withpath=False):
        if overbeek_only and subdir != 'Oligos_71': continue
        cmd = './run_correct_indel.sh %s %s %s %d' % (
            dirname, getNullDir(dirname), subdir, file_per_part)
        idx = runCmdCheckIdx(cmd,
                             idx,
                             start_idx,
                             stop_idx,
                             out_dir,
                             'out_correct_indelmap_%s' % getDirLabel(dirname),
                             numj=num_parts,
                             queue=queue)