Exemplo n.º 1
0
def patcher(args):
    """
    %prog patcher backbone.bed other.bed

    Given optical map alignment, prepare the patchers. Use --backbone to suggest
    which assembly is the major one, and the patchers will be extracted from
    another assembly.
    """
    from jcvi.formats.bed import uniq

    p = OptionParser(patcher.__doc__)
    p.add_option("--backbone",
                 default="OM",
                 help="Prefix of the backbone assembly [default: %default]")
    p.add_option("--object",
                 default="object",
                 help="New object name [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    backbonebed, otherbed = args
    backbonebed = uniq([backbonebed])
    otherbed = uniq([otherbed])

    bb = opts.backbone
    pf = backbonebed.split(".")[0]
    key = lambda x: (x.seqid, x.start, x.end)
    is_bb = lambda x: x.startswith(bb)

    # Make a uniq bed keeping backbone at redundant intervals
    cmd = "intersectBed -v -wa"
    cmd += " -a {0} -b {1}".format(otherbed, backbonebed)
    outfile = otherbed.rsplit(".", 1)[0] + ".not." + backbonebed
    sh(cmd, outfile=outfile)

    uniqbed = Bed()
    uniqbedfile = pf + ".merged.bed"
    uniqbed.extend(Bed(backbonebed))
    uniqbed.extend(Bed(outfile))
    uniqbed.print_to_file(uniqbedfile, sorted=True)

    # Condense adjacent intervals, allow some chaining
    bed = uniqbed
    key = lambda x: range_parse(x.accn).seqid

    bed_fn = pf + ".patchers.bed"
    bed_fw = open(bed_fn, "w")

    for k, sb in groupby(bed, key=key):
        sb = list(sb)
        chr, start, end, strand = merge_ranges(sb)

        id = "{0}:{1}-{2}".format(chr, start, end)
        print >> bed_fw, "\t".join(str(x) for x in \
                (chr, start, end, opts.object, 1000, strand))

    bed_fw.close()
Exemplo n.º 2
0
def patcher(args):
    """
    %prog patcher backbone.bed other.bed

    Given optical map alignment, prepare the patchers. Use --backbone to suggest
    which assembly is the major one, and the patchers will be extracted from
    another assembly.
    """
    from jcvi.formats.bed import uniq

    p = OptionParser(patcher.__doc__)
    p.add_option("--backbone", default="OM",
                 help="Prefix of the backbone assembly [default: %default]")
    p.add_option("--object", default="object",
                 help="New object name [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    backbonebed, otherbed = args
    backbonebed = uniq([backbonebed])
    otherbed = uniq([otherbed])

    pf = backbonebed.split(".")[0]
    key = lambda x: (x.seqid, x.start, x.end)

    # Make a uniq bed keeping backbone at redundant intervals
    cmd = "intersectBed -v -wa"
    cmd += " -a {0} -b {1}".format(otherbed, backbonebed)
    outfile = otherbed.rsplit(".", 1)[0] + ".not." + backbonebed
    sh(cmd, outfile=outfile)

    uniqbed = Bed()
    uniqbedfile = pf + ".merged.bed"
    uniqbed.extend(Bed(backbonebed))
    uniqbed.extend(Bed(outfile))
    uniqbed.print_to_file(uniqbedfile, sorted=True)

    # Condense adjacent intervals, allow some chaining
    bed = uniqbed
    key = lambda x: range_parse(x.accn).seqid

    bed_fn = pf + ".patchers.bed"
    bed_fw = open(bed_fn, "w")

    for k, sb in groupby(bed, key=key):
        sb = list(sb)
        chr, start, end, strand = merge_ranges(sb)

        print >> bed_fw, "\t".join(str(x) for x in \
                (chr, start, end, opts.object, 1000, strand))

    bed_fw.close()
Exemplo n.º 3
0
def gaps(args):
    """
    %prog gaps OM.bed fastafile

    Create patches around OM gaps.
    """
    from jcvi.formats.bed import uniq

    p = OptionParser(gaps.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    ombed, fastafile = args
    ombed = uniq([ombed])
    bed = Bed(ombed)

    for a, b in pairwise(bed):
        om_a = (a.seqid, a.start, a.end, "+")
        om_b = (b.seqid, b.start, b.end, "+")
        ch_a = range_parse(a.accn)
        ch_b = range_parse(b.accn)
        ch_a = (ch_a.seqid, ch_a.start, ch_a.end, "+")
        ch_b = (ch_b.seqid, ch_b.start, ch_b.end, "+")

        om_dist, x = range_distance(om_a, om_b, distmode="ee")
        ch_dist, x = range_distance(ch_a, ch_b, distmode="ee")

        if om_dist <= 0 and ch_dist <= 0:
            continue

        print(a)
        print(b)
        print(om_dist, ch_dist)
Exemplo n.º 4
0
def paste(args):
    """
    %prog paste flanks.bed flanks_vs_assembly.blast backbone.fasta

    Paste in good sequences in the final assembly.
    """
    from jcvi.formats.bed import uniq

    p = OptionParser(paste.__doc__)
    p.add_option(
        "--maxsize",
        default=300000,
        type="int",
        help="Maximum size of patchers to be replaced",
    )
    p.add_option("--prefix", help="Prefix of the new object")
    p.set_rclip(rclip=1)
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pbed, blastfile, bbfasta = args
    maxsize = opts.maxsize  # Max DNA size to replace gap
    order = Bed(pbed).order

    beforebed, afterbed = blast_to_twobeds(
        blastfile, order, log=True, rclip=opts.rclip, maxsize=maxsize, flipbeds=True
    )
    beforebed = uniq([beforebed])

    afbed = Bed(beforebed)
    bfbed = Bed(afterbed)

    shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix)
Exemplo n.º 5
0
def paste(args):
    """
    %prog paste flanks.bed flanks_vs_assembly.blast backbone.fasta

    Paste in good sequences in the final assembly.
    """
    from jcvi.formats.bed import uniq

    p = OptionParser(paste.__doc__)
    p.add_option("--maxsize", default=300000, type="int",
            help="Maximum size of patchers to be replaced [default: %default]")
    p.add_option("--prefix", help="Prefix of the new object [default: %default]")
    p.set_rclip(rclip=1)
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pbed, blastfile, bbfasta = args
    maxsize = opts.maxsize  # Max DNA size to replace gap
    order = Bed(pbed).order

    beforebed, afterbed = blast_to_twobeds(blastfile, order, log=True,
                                           rclip=opts.rclip, maxsize=maxsize,
                                           flipbeds=True)
    beforebed = uniq([beforebed])

    afbed = Bed(beforebed)
    bfbed = Bed(afterbed)

    shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix)
Exemplo n.º 6
0
def gaps(args):
    """
    %prog gaps OM.bed fastafile

    Create patches around OM gaps.
    """
    from jcvi.formats.bed import uniq
    from jcvi.utils.iter import pairwise

    p = OptionParser(gaps.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    ombed, fastafile = args
    ombed = uniq([ombed])
    bed = Bed(ombed)

    for a, b in pairwise(bed):
        om_a = (a.seqid, a.start, a.end, "+")
        om_b = (b.seqid, b.start, b.end, "+")
        ch_a = range_parse(a.accn)
        ch_b = range_parse(b.accn)
        ch_a = (ch_a.seqid, ch_a.start, ch_a.end, "+")
        ch_b = (ch_b.seqid, ch_b.start, ch_b.end, "+")

        om_dist, x = range_distance(om_a, om_b, distmode="ee")
        ch_dist, x = range_distance(ch_a, ch_b, distmode="ee")

        if om_dist <= 0 and ch_dist <= 0:
            continue

        print a
        print b
        print om_dist, ch_dist