Пример #1
0
def parallelParse(spec, indir, run_types, run_strings):
    s_mod = spec.replace(" ", "-")
    spec_dir = os.path.join(args.indir, s_mod)
    assert os.path.isdir(spec_dir), "\nCANNOT FIND DIRECTORY: " + spec_dir
    if args.file_check:
        return "", ""
    # Check output directory

    outline = {
        "Total bases": 0,
        "Total reads": 0
    }

    for run_ind in range(len(run_types)):
        r = run_types[run_ind]
        run_string = run_strings[run_ind]
        run_dir = os.path.join(spec_dir, run_string)

        seqfiles = mfiles.getFiles(s_mod, r, run_string, indir)
        if seqfiles:
            bases, reads = countReads(seqfiles, r)
            outline['Total bases'] += bases
            outline['Total reads'] += reads

            print(outline)

        if s_mod in ["Rattus-exulans", "Rattus-hoffmanni"]:
            run_string += "-no-WGA"
            run_dir = os.path.join(spec_dir, run_string)
            seqfiles = mfiles.getFiles(s_mod, r, run_string, indir)
            if seqfiles:
                bases, reads = countReads(seqfiles, r)
                outline['Total bases'] += bases
                outline['Total reads'] += reads

    return s_mod, outline
        # Make the output directory for this species.

        dedup_cmds = []

        for r in runtype:
            run_string = runstrs[r]
            run_dir = os.path.join(spec_dir, run_string)
            #print(run_string);

            base_logfile = os.path.join(logdir,
                                        s_mod + "-" + run_string + "-dedup")
            # Get the base logfile name for this run.

            seqfiles = mfiles.getFiles(s_mod,
                                       r,
                                       run_string,
                                       prev_step_dir,
                                       unmerged_flag=True)
            if seqfiles:
                if not os.path.isdir(run_dir):
                    os.system("mkdir " + run_dir)
                # Make the output directory for this run.

                dedup_cmds = genDedupCmd(args.path, seqfiles, r, base_logfile,
                                         step, prev_step, dedup_cmds)

            if s_mod in ["Rattus-exulans", "Rattus-hoffmanni"]:
                run_string += "-no-WGA"
                seqfiles = mfiles.getFiles(s_mod,
                                           r,
                                           run_string,
            ref = "/mnt/beegfs/gt156213e/ref-genomes/mm10/mm10.fa"
        elif args.ref == "-rat":
            ref = "/mnt/beegfs/gt156213e/ref-genomes/rnor6/rn6.fa"
        else:
            ref = os.path.join(ref_dir, s_mod, s_mod + ".fa")
        ref = ref.replace("..fa", ".fa")
        assert os.path.isfile(ref), "\nAssembly file not found: " + ref
        # Get reference

        for run_ind in range(len(runtype)):
            r = runtype[run_ind]
            run_string = runstrs[run_ind]
            #print(run_string);
            run_dir = os.path.join(spec_dir, run_string)

            seqfiles = mfiles.getFiles(s_mod, r, run_string, prev_step_dir)
            if seqfiles:
                #print(seqfiles);
                #sys.exit();
                if not os.path.isdir(run_dir):
                    os.system("mkdir " + run_dir)
                bwa_cmds += genBWACmd(args.path, seqfiles, r, run_string,
                                      s_mod, run_dir, base_logfile, ref)

            if s_mod in ["Rattus-exulans", "Rattus-hoffmanni"]:
                run_string += "-no-WGA"

                seqfiles = mfiles.getFiles(s_mod, r, run_string, prev_step_dir)
                if seqfiles:
                    if not os.path.isdir(run_dir):
                        os.system("mkdir " + run_dir)