Пример #1
0
def spades(args):
    """
    %prog spades folder

    Run automated SPADES.
    """
    from jcvi.formats.fastq import readlen

    p = OptionParser(spades.__doc__)
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())

    folder, = args
    for p, pf in iter_project(folder, 2):
        rl = readlen([p[0], "--silent"])

        # <http://spades.bioinf.spbau.ru/release3.1.0/manual.html#sec3.4>
        kmers = None
        if rl >= 150:
            kmers = "21,33,55,77"
        elif rl >= 250:
            kmers = "21,33,55,77,99,127"

        cmd = "spades.py"
        if kmers:
            cmd += " -k {0}".format(kmers)
        cmd += " --careful"
        cmd += " --pe1-1 {0} --pe1-2 {1}".format(*p)
        cmd += " -o {0}_spades".format(pf)
        print cmd
Пример #2
0
def spades(args):
    """
    %prog spades folder

    Run automated SPADES.
    """
    from jcvi.formats.fastq import readlen

    p = OptionParser(spades.__doc__)
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())

    folder, = args
    for p, pf in iter_project(folder, 2):
        rl = readlen([p[0], "--silent"])

        # <http://spades.bioinf.spbau.ru/release3.1.0/manual.html#sec3.4>
        kmers = None
        if rl >= 150:
            kmers = "21,33,55,77"
        elif rl >= 250:
            kmers = "21,33,55,77,99,127"

        cmd = "spades.py"
        if kmers:
            cmd += " -k {0}".format(kmers)
        cmd += " --careful"
        cmd += " --pe1-1 {0} --pe1-2 {1}".format(*p)
        cmd += " -o {0}_spades".format(pf)
        print cmd
Пример #3
0
def prepare(args):
    """
    %prog prepare genomesize *.fastq

    Prepare MERACULOUS configuation file. Genome size should be entered in Mb.
    """
    p = OptionParser(prepare.__doc__ + FastqNamings)
    p.add_option("-K", default=51, type="int", help="K-mer size")
    p.set_cpus(cpus=32)
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    genomesize = float(args[0]) / 1000
    fnames = args[1:]
    for x in fnames:
        assert op.exists(x), "File `{0}` not found.".format(x)

    s = comment_banner("Meraculous params file") + "\n"
    s += comment_banner("Basic parameters") + "\n"
    s += "# Describe the libraries ( one line per library )\n"
    s += "# " + " ".join(header.split()) + "\n"

    libs = get_libs(fnames)
    lib_seqs = []
    rank = 0
    for lib, fs in libs:
        size = lib.size
        if size == 0:
            continue
        rank += 1
        library_name = lib.library_name
        name = library_name.replace("-", "")
        wildcard = "{0}*.1.*,{0}*.2.*".format(library_name)
        rl = max(readlen([x]) for x in fs)
        lib_seq = lib.get_lib_seq(wildcard, name, rl, rank)
        lib_seqs.append(lib_seq)

    s += "\n" + "\n".join(load_csv(None, lib_seqs, sep=" ")) + "\n"
    params = [("genome_size", genomesize),
              ("is_diploid", 0),
              ("mer_size", opts.K),
              ("num_prefix_blocks", 1),
              ("no_read_validation", 0),
              ("local_num_procs", opts.cpus)]
    s += "\n" + "\n".join(load_csv(None, params, sep=" ")) + "\n"

    cfgfile = "meraculous.config"
    write_file(cfgfile, s, tee=True)

    s = "~/export/meraculous/bin/run_meraculous.sh -c {0}"\
                .format(cfgfile)
    runsh = "run.sh"
    write_file(runsh, s)
Пример #4
0
def prepare(args):
    """
    %prog prepare genomesize *.fastq

    Prepare MERACULOUS configuation file. Genome size should be entered in Mb.
    """
    p = OptionParser(prepare.__doc__ + FastqNamings)
    p.add_option("-K", default=51, type="int", help="K-mer size")
    p.set_cpus(cpus=32)
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    genomesize = float(args[0]) / 1000
    fnames = args[1:]
    for x in fnames:
        assert op.exists(x), "File `{0}` not found.".format(x)

    s = comment_banner("Meraculous params file") + "\n"
    s += comment_banner("Basic parameters") + "\n"
    s += "# Describe the libraries ( one line per library )\n"
    s += "# " + " ".join(header.split()) + "\n"

    libs = get_libs(fnames)
    lib_seqs = []
    rank = 0
    for lib, fs in libs:
        size = lib.size
        if size == 0:
            continue
        rank += 1
        library_name = lib.library_name
        name = library_name.replace("-", "")
        wildcard = "{0}*.1.*,{0}*.2.*".format(library_name)
        rl = max(readlen([x]) for x in fs)
        lib_seq = lib.get_lib_seq(wildcard, name, rl, rank)
        lib_seqs.append(lib_seq)

    s += "\n" + "\n".join(load_csv(None, lib_seqs, sep=" ")) + "\n"
    params = [("genome_size", genomesize), ("is_diploid", 0),
              ("mer_size", opts.K), ("num_prefix_blocks", 1),
              ("no_read_validation", 0), ("local_num_procs", opts.cpus)]
    s += "\n" + "\n".join(load_csv(None, params, sep=" ")) + "\n"

    cfgfile = "meraculous.config"
    write_file(cfgfile, s, tee=True)

    s = "~/export/meraculous/bin/run_meraculous.sh -c {0}"\
                .format(cfgfile)
    runsh = "run.sh"
    write_file(runsh, s)
Пример #5
0
Файл: soap.py Проект: rrane/jcvi
def prepare(args):
    """
    %prog prepare *.fastq

    Scan input fastq files (see below) and write SOAP config files based
    on inputfiles. Use "--scaffold contigs.fasta" to perform scaffolding.
    """
    from jcvi.formats.base import write_file

    p = OptionParser(prepare.__doc__ + FastqNamings)
    p.add_option("-K", default=45, type="int", help="K-mer size [default: %default]")
    p.add_option(
        "--assemble_1st_rank_only",
        default=False,
        action="store_true",
        help="Assemble the first rank only, other libs asm_flags=2 [default: %default]",
    )
    p.add_option("--scaffold", help="Only perform scaffolding [default: %default]")
    p.add_option("--gapclose", help="Only perform gap closure [default: %default]")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fnames = args
    for x in fnames:
        assert op.exists(x), "File `{0}` not found.".format(x)

    a1st = opts.assemble_1st_rank_only

    cfgfile = "soap.config"
    gc_cfgfile = "soap.gc.config"
    fw = open(cfgfile, "w")
    fw_gc = open(gc_cfgfile, "w")

    libs = get_libs(fnames)
    rank = 0
    singletons = []
    max_rd_len = max(readlen([f]) for f in fnames)

    block = "max_rd_len={0}\n".format(max_rd_len)
    for stream in (sys.stderr, fw, fw_gc):
        print >> stream, block

    # Collect singletons first
    singletons = []
    for lib, fs in libs:
        if lib.size == 0:
            singletons += fs
            continue

    for lib, fs in libs:
        size = lib.size
        if size == 0:
            continue

        rank += 1
        block = "[LIB]\n"
        block += "avg_ins={0}\n".format(size)
        f = fs[0]
        block += "reverse_seq={0}\n".format(lib.reverse_seq)
        asm_flags = 2 if (rank > 1 and a1st) else lib.asm_flags
        block += "asm_flags={0}\n".format(asm_flags)
        block += "rank={0}\n".format(rank)
        if lib.reverse_seq:
            pair_num_cutoff = 3
            block += "pair_num_cutoff={0}\n".format(pair_num_cutoff)
        block += "map_len=35\n"

        for f in fs:
            if ".1." in f:
                tag = "q1"
            elif ".2." in f:
                tag = "q2"
            block += "{0}={1}\n".format(tag, f)

        if rank == 1:
            for s in singletons:
                block += "q={0}\n".format(s)

        print >>sys.stderr, block
        print >> fw, block

        if asm_flags > 2:
            print >> fw_gc, block

    runfile = "run.sh"
    scaffold = opts.scaffold
    header = SOAPHEADER.format(opts.cpus, opts.K)
    if opts.gapclose:
        gapclose = opts.gapclose
        outfile = gapclose.rsplit(".", 1)[0] + ".closed.fasta"
        template = header + GCRUNG.format(gapclose, outfile)
    else:
        template = header + (SCFRUN % scaffold if scaffold else SOAPRUN)

    write_file(runfile, template, meta="run script")
    fw.close()
    fw_gc.close()
Пример #6
0
def prepare(args):
    """
    %prog prepare *.fastq

    Scan input fastq files (see below) and write SOAP config files based
    on inputfiles. Use "--scaffold contigs.fasta" to perform scaffolding.
    """
    from jcvi.formats.base import write_file

    p = OptionParser(prepare.__doc__ + FastqNamings)
    p.add_option("-K",
                 default=45,
                 type="int",
                 help="K-mer size [default: %default]")
    p.add_option(
        "--assemble_1st_rank_only",
        default=False,
        action="store_true",
        help=
        "Assemble the first rank only, other libs asm_flags=2 [default: %default]"
    )
    p.add_option("--scaffold",
                 help="Only perform scaffolding [default: %default]")
    p.add_option("--gapclose",
                 help="Only perform gap closure [default: %default]")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fnames = args
    K = opts.K
    for x in fnames:
        assert op.exists(x), "File `{0}` not found.".format(x)

    a1st = opts.assemble_1st_rank_only

    cfgfile = "soap.config"
    gc_cfgfile = "soap.gc.config"
    fw = open(cfgfile, "w")
    fw_gc = open(gc_cfgfile, "w")

    libs = get_libs(fnames)
    rank = 0
    singletons = []
    max_rd_len = max(readlen([f]) for f in fnames)

    block = "max_rd_len={0}\n".format(max_rd_len)
    for stream in (sys.stderr, fw, fw_gc):
        print(block, file=stream)

    # Collect singletons first
    singletons = []
    for lib, fs in libs:
        if lib.size == 0:
            singletons += fs
            continue

    for lib, fs in libs:
        size = lib.size
        if size == 0:
            continue

        rank += 1
        block = "[LIB]\n"
        block += "avg_ins={0}\n".format(size)
        f = fs[0]
        block += "reverse_seq={0}\n".format(lib.reverse_seq)
        asm_flags = 2 if (rank > 1 and a1st) else lib.asm_flags
        block += "asm_flags={0}\n".format(asm_flags)
        block += "rank={0}\n".format(rank)
        if lib.reverse_seq:
            pair_num_cutoff = 3
            block += "pair_num_cutoff={0}\n".format(pair_num_cutoff)
        block += "map_len=35\n"

        for f in fs:
            if ".1." in f:
                tag = "q1"
            elif ".2." in f:
                tag = "q2"
            block += "{0}={1}\n".format(tag, f)

        if rank == 1:
            for s in singletons:
                tag = "q" if is_fastq(s) else "f"
                block += tag + "={0}\n".format(s)

        print(block, file=sys.stderr)
        print(block, file=fw)

        if asm_flags > 2:
            print(block, file=fw_gc)

    runfile = "run.sh"
    scaffold = opts.scaffold
    bb = 63 if K <= 63 else 127
    binary = "SOAPdenovo-{0}mer".format(bb)
    header = SOAPHEADER.format(opts.cpus, K, binary)
    if opts.gapclose:
        gapclose = opts.gapclose
        outfile = gapclose.rsplit(".", 1)[0] + ".closed.fasta"
        template = header + GCRUNG.format(gapclose, outfile)
    else:
        template = header + (SCFRUN % scaffold if scaffold else SOAPRUN)

    write_file(runfile, template)
    fw.close()
    fw_gc.close()
Пример #7
0
def prepare(args):
    """
    %prog prepare "B. oleracea" *.fastq

    Scan input fastq files (see below) and create `in_groups.csv` and
    `in_libs.csv`. The species name does not really matter.
    """
    from jcvi.utils.table import write_csv
    from jcvi.formats.base import write_file
    from jcvi.formats.fastq import guessoffset, readlen

    p = OptionParser(prepare.__doc__ + FastqNamings)
    p.add_option(
        "--corr",
        default=False,
        action="store_true",
        help="Extra parameters for corrected data",
    )
    p.add_option(
        "--norun",
        default=False,
        action="store_true",
        help="Don't write `run.sh` script",
    )
    p.add_option("--ploidy", default="2", choices=("1", "2"), help="Ploidy")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    organism_name = args[0]
    project_name = "".join(x[0] for x in organism_name.split()).upper()
    fnames = sorted(glob("*.fastq*") if len(args) == 1 else args[1:])
    for x in fnames:
        assert op.exists(x), "File `{0}` not found.".format(x)

    groupheader = "group_name library_name file_name".split()
    libheader = (
        "library_name project_name organism_name type paired "
        "frag_size frag_stddev insert_size insert_stddev read_orientation "
        "genomic_start genomic_end".split())
    groups_33 = []
    groups_64 = []
    libs = []
    for file_name in fnames:
        offset = guessoffset([file_name])
        group_name = op.basename(file_name).split(".")[0]
        library_name = "-".join(group_name.split("-")[:2])

        # Handle paired files and convert to wildcard
        if ".1." in file_name:
            file_name = file_name.replace(".1.", ".?.")
        elif ".2." in file_name:
            continue

        groupscontents = groups_64 if offset == 64 else groups_33
        groupscontents.append((group_name, library_name, file_name))
        if library_name not in libs:
            libs.append(library_name)

    libcontents = []
    for library_name in libs:
        L = Library(library_name)
        size = L.size
        stddev = L.stddev
        type = L.type
        paired = L.paired
        read_orientation = L.read_orientation

        size = size or ""
        stddev = stddev or ""
        frag_size = size if type == "fragment" else ""
        frag_stddev = stddev if type == "fragment" else ""
        insert_size = size if type != "fragment" else ""
        insert_stddev = stddev if type != "fragment" else ""
        genomic_start, genomic_end = "", ""
        libcontents.append((
            library_name,
            project_name,
            organism_name,
            type,
            paired,
            frag_size,
            frag_stddev,
            insert_size,
            insert_stddev,
            read_orientation,
            genomic_start,
            genomic_end,
        ))

    for groups, csvfile in (
        (groups_33, "in_groups_33.csv"),
        (groups_64, "in_groups_64.csv"),
        (groups_33 + groups_64, "in_groups.csv"),
    ):
        if not groups:
            continue
        write_csv(groupheader, groups, filename=csvfile, tee=True)
        logging.debug("`{0}` created (# of groups = {1}).".format(
            csvfile, len(groups)))

    write_csv(libheader, libcontents, filename="in_libs.csv", tee=True)
    logging.debug("`in_libs.csv` created (# of libs = {0}).".format(
        len(libcontents)))

    runfile = "run.sh"

    # ALLPATHS stalls on reads over 250bp <https://www.biostars.org/p/122091/>
    max_rd_len = max(readlen([f]) for f in fnames)
    extra = "CLOSE_UNIPATH_GAPS=False " if max_rd_len > 200 else ""
    if opts.corr:
        extra += "FE_NUM_CYCLES=1 EC_K=28 FE_QUAL_CEIL_RADIUS=0"
        extra += " REMOVE_DODGY_READS_FRAG=False FE_MAX_KMER_FREQ_TO_MARK=1"

    if not opts.norun:
        contents = ALLPATHSRUN.format(opts.ploidy, opts.cpus, extra)
        write_file(runfile, contents)
Пример #8
0
def expand(args):
    """
    %prog expand bes.fasta reads.fastq

    Expand sequences using short reads. Useful, for example for getting BAC-end
    sequences. The template to use, in `bes.fasta` may just contain the junction
    sequences, then align the reads to get the 'flanks' for such sequences.
    """
    import math

    from jcvi.formats.fasta import Fasta, SeqIO
    from jcvi.formats.fastq import readlen, first, fasta
    from jcvi.formats.blast import Blast
    from jcvi.formats.base import FileShredder
    from jcvi.apps.bowtie import align, get_samfile
    from jcvi.apps.align import blast

    p = OptionParser(expand.__doc__)
    p.set_depth(depth=200)
    p.set_firstN()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bes, reads = args
    size = Fasta(bes).totalsize
    rl = readlen([reads])
    expected_size = size + 2 * rl
    nreads = expected_size * opts.depth / rl
    nreads = int(math.ceil(nreads / 1000.)) * 1000

    # Attract reads
    samfile, logfile = align([bes, reads, "--reorder", "--mapped",
           "--firstN={0}".format(opts.firstN)])

    samfile, mapped, _ = get_samfile(reads, bes, bowtie=True, mapped=True)
    logging.debug("Extract first {0} reads from `{1}`.".format(nreads, mapped))

    pf = mapped.split(".")[0]
    pf = pf.split("-")[0]
    bespf = bes.split(".")[0]
    reads = pf + ".expand.fastq"
    first([str(nreads), mapped, "-o", reads])

    # Perform mini-assembly
    fastafile = reads.rsplit(".", 1)[0] + ".fasta"
    qualfile = ""
    if need_update(reads, fastafile):
        fastafile, qualfile = fasta([reads])

    contigs = op.join(pf, "454LargeContigs.fna")
    if need_update(fastafile, contigs):
        cmd = "runAssembly -o {0} -cpu 8 {1}".format(pf, fastafile)
        sh(cmd)
    assert op.exists(contigs)

    # Annotate contigs
    blastfile = blast([bes, contigs])
    mapping = {}
    for query, b in Blast(blastfile).iter_best_hit():
        mapping[query] = b

    f = Fasta(contigs, lazy=True)
    annotatedfasta = ".".join((pf, bespf, "fasta"))
    fw = open(annotatedfasta, "w")
    keys = list(Fasta(bes).iterkeys_ordered())  # keep an ordered list
    recs = []
    for key, v in f.iteritems_ordered():
        vid = v.id
        if vid not in mapping:
            continue
        b = mapping[vid]
        subject = b.subject
        rec = v.reverse_complement() if b.orientation == '-' else v
        rec.id = rid = "_".join((pf, vid, subject))
        rec.description = ""
        recs.append((keys.index(subject), rid, rec))

    recs = [x[-1] for x in sorted(recs)]
    SeqIO.write(recs, fw, "fasta")
    fw.close()

    FileShredder([samfile, logfile, mapped, reads, fastafile, qualfile, blastfile, pf])
    logging.debug("Annotated seqs (n={0}) written to `{1}`.".\
                    format(len(recs), annotatedfasta))

    return annotatedfasta
Пример #9
0
def expand(args):
    """
    %prog expand bes.fasta reads.fastq

    Expand sequences using short reads. Useful, for example for getting BAC-end
    sequences. The template to use, in `bes.fasta` may just contain the junction
    sequences, then align the reads to get the 'flanks' for such sequences.
    """
    import math

    from jcvi.formats.fasta import Fasta, SeqIO
    from jcvi.formats.fastq import readlen, first, fasta
    from jcvi.formats.blast import Blast
    from jcvi.formats.base import FileShredder
    from jcvi.apps.bowtie import align, get_samfile
    from jcvi.apps.align import blast

    p = OptionParser(expand.__doc__)
    p.set_depth(depth=200)
    p.set_firstN()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bes, reads = args
    size = Fasta(bes).totalsize
    rl = readlen([reads])
    expected_size = size + 2 * rl
    nreads = expected_size * opts.depth / rl
    nreads = int(math.ceil(nreads / 1000.)) * 1000

    # Attract reads
    samfile, logfile = align([bes, reads, "--reorder", "--mapped",
           "--firstN={0}".format(opts.firstN)])

    samfile, mapped, _ = get_samfile(reads, bes, bowtie=True, mapped=True)
    logging.debug("Extract first {0} reads from `{1}`.".format(nreads, mapped))

    pf = mapped.split(".")[0]
    pf = pf.split("-")[0]
    bespf = bes.split(".")[0]
    reads = pf + ".expand.fastq"
    first([str(nreads), mapped, "-o", reads])

    # Perform mini-assembly
    fastafile = reads.rsplit(".", 1)[0] + ".fasta"
    qualfile = ""
    if need_update(reads, fastafile):
        fastafile, qualfile = fasta([reads])

    contigs = op.join(pf, "454LargeContigs.fna")
    if need_update(fastafile, contigs):
        cmd = "runAssembly -o {0} -cpu 8 {1}".format(pf, fastafile)
        sh(cmd)
    assert op.exists(contigs)

    # Annotate contigs
    blastfile = blast([bes, contigs])
    mapping = {}
    for query, b in Blast(blastfile).iter_best_hit():
        mapping[query] = b

    f = Fasta(contigs, lazy=True)
    annotatedfasta = ".".join((pf, bespf, "fasta"))
    fw = open(annotatedfasta, "w")
    keys = list(Fasta(bes).iterkeys_ordered())  # keep an ordered list
    recs = []
    for key, v in f.iteritems_ordered():
        vid = v.id
        if vid not in mapping:
            continue
        b = mapping[vid]
        subject = b.subject
        rec = v.reverse_complement() if b.orientation == '-' else v
        rec.id = rid = "_".join((pf, vid, subject))
        rec.description = ""
        recs.append((keys.index(subject), rid, rec))

    recs = [x[-1] for x in sorted(recs)]
    SeqIO.write(recs, fw, "fasta")
    fw.close()

    FileShredder([samfile, logfile, mapped, reads, fastafile, qualfile, blastfile, pf])
    logging.debug("Annotated seqs (n={0}) written to `{1}`.".\
                    format(len(recs), annotatedfasta))

    return annotatedfasta
Пример #10
0
def prepare(args):
    """
    %prog prepare "B. oleracea" *.fastq

    Scan input fastq files (see below) and create `in_groups.csv` and
    `in_libs.csv`. The species name does not really matter.
    """
    from jcvi.utils.table import write_csv
    from jcvi.formats.base import write_file
    from jcvi.formats.fastq import guessoffset, readlen

    p = OptionParser(prepare.__doc__ + FastqNamings)
    p.add_option("--corr", default=False, action="store_true",
                 help="Extra parameters for corrected data [default: %default]")
    p.add_option("--norun", default=False, action="store_true",
                 help="Don't write `run.sh` script [default: %default]")
    p.add_option("--ploidy", default="2", choices=("1", "2"),
                 help="Ploidy [default: %default]")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    organism_name = args[0]
    project_name = "".join(x[0] for x in organism_name.split()).upper()
    fnames = sorted(glob("*.fastq*") if len(args) == 1 else args[1:])
    for x in fnames:
        assert op.exists(x), "File `{0}` not found.".format(x)

    groupheader = "group_name library_name file_name".split()
    libheader = "library_name project_name organism_name type paired "\
        "frag_size frag_stddev insert_size insert_stddev read_orientation "\
        "genomic_start genomic_end".split()
    groups_33 = []
    groups_64 = []
    libs = []
    for file_name in fnames:
        offset = guessoffset([file_name])
        group_name = op.basename(file_name).split(".")[0]
        library_name = "-".join(group_name.split("-")[:2])

        # Handle paired files and convert to wildcard
        if ".1." in file_name:
            file_name = file_name.replace(".1.", ".?.")
        elif ".2." in file_name:
            continue

        groupscontents = groups_64 if offset == 64 else groups_33
        groupscontents.append((group_name, library_name, file_name))
        if library_name not in libs:
            libs.append(library_name)

    libcontents = []
    for library_name in libs:
        L = Library(library_name)
        size = L.size
        stddev = L.stddev
        type = L.type
        paired = L.paired
        read_orientation = L.read_orientation

        size = size or ""
        stddev = stddev or ""
        frag_size = size if type == "fragment" else ""
        frag_stddev = stddev if type == "fragment" else ""
        insert_size = size if type != "fragment" else ""
        insert_stddev = stddev if type != "fragment" else ""
        genomic_start, genomic_end = "", ""
        libcontents.append((library_name, project_name, organism_name, type, \
            paired, frag_size, frag_stddev, insert_size, insert_stddev, \
            read_orientation, genomic_start, genomic_end))

    for groups, csvfile in ((groups_33, "in_groups_33.csv"), \
                            (groups_64, "in_groups_64.csv"), \
                            (groups_33 + groups_64, "in_groups.csv")):
        if not groups:
            continue
        write_csv(groupheader, groups, filename=csvfile, tee=True)
        logging.debug("`{0}` created (# of groups = {1}).".\
            format(csvfile, len(groups)))

    write_csv(libheader, libcontents, filename="in_libs.csv", tee=True)
    logging.debug("`in_libs.csv` created (# of libs = {0}).".\
        format(len(libcontents)))

    runfile = "run.sh"

    # ALLPATHS stalls on reads over 250bp <https://www.biostars.org/p/122091/>
    max_rd_len = max(readlen([f]) for f in fnames)
    extra = "CLOSE_UNIPATH_GAPS=False " if max_rd_len > 200 else ""
    if opts.corr:
        extra += "FE_NUM_CYCLES=1 EC_K=28 FE_QUAL_CEIL_RADIUS=0"
        extra += " REMOVE_DODGY_READS_FRAG=False FE_MAX_KMER_FREQ_TO_MARK=1"

    if not opts.norun:
        contents = ALLPATHSRUN.format(opts.ploidy, opts.cpus, extra)
        write_file(runfile, contents)