示例#1
0
文件: tgbs.py 项目: Nicholas-NVS/jcvi
def mstmap(args):
    """
    %prog mstmap LMD50.snps.genotype.txt

    Convert LMDs to MSTMAP input.
    """
    from jcvi.assembly.geneticmap import MSTMatrix

    p = OptionParser(mstmap.__doc__)
    p.add_option("--population_type", default="RIL6",
                 help="Type of population, possible values are DH and RILd")
    p.add_option("--missing_threshold", default=.5,
                 help="Missing threshold, .25 excludes any marker with >25% missing")
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    lmd, = args
    fp = open(lmd)
    fp.next()  # Header
    table = {"0": "-", "1": "A", "2": "B", "3": "X"}
    mh = ["locus_name"] + fp.next().split()[4:]
    genotypes = []
    for row in fp:
        atoms = row.split()
        chr, pos, ref, alt = atoms[:4]
        locus_name = ".".join((chr, pos))
        codes = [table[x] for x in atoms[4:]]
        genotypes.append([locus_name] + codes)

    mm = MSTMatrix(genotypes, mh, opts.population_type, opts.missing_threshold)
    mm.write(opts.outfile, header=True)
示例#2
0
文件: tgbs.py 项目: zhaotao1987/jcvi
def mstmap(args):
    """
    %prog mstmap LMD50.snps.genotype.txt

    Convert LMDs to MSTMAP input.
    """
    from jcvi.assembly.geneticmap import MSTMatrix

    p = OptionParser(mstmap.__doc__)
    p.add_option("--population_type", default="RIL6",
                 help="Type of population, possible values are DH and RILd")
    p.add_option("--missing_threshold", default=.5,
                 help="Missing threshold, .25 excludes any marker with >25% missing")
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    lmd, = args
    fp = open(lmd)
    fp.next()  # Header
    table = {"0": "-", "1": "A", "2": "B", "3": "X"}
    mh = ["locus_name"] + fp.next().split()[4:]
    genotypes = []
    for row in fp:
        atoms = row.split()
        chr, pos, ref, alt = atoms[:4]
        locus_name = ".".join((chr, pos))
        codes = [table[x] for x in atoms[4:]]
        genotypes.append([locus_name] + codes)

    mm = MSTMatrix(genotypes, mh, opts.population_type, opts.missing_threshold)
    mm.write(opts.outfile, header=True)
示例#3
0
文件: vcf.py 项目: Hensonmw/jcvi
def mstmap(args):
    """
    %prog mstmap bcffile/vcffile > matrixfile

    Convert bcf/vcf format to mstmap input.
    """
    from jcvi.assembly.geneticmap import MSTMatrix

    p = OptionParser(mstmap.__doc__)
    p.add_option("--dh", default=False, action="store_true",
                 help="Double haploid population, no het [default: %default]")
    p.add_option("--freq", default=.2, type="float",
                 help="Allele must be above frequency [default: %default]")
    p.add_option("--mindepth", default=3, type="int",
                 help="Only trust genotype calls with depth [default: %default]")
    p.add_option("--missing_threshold", default=.25, type="float",
                 help="Fraction missing must be below")
    p.add_option("--noheader", default=False, action="store_true",
                 help="Do not print MSTmap run parameters [default: %default]")
    p.add_option("--pv4", default=False, action="store_true",
                 help="Enable filtering strand-bias, tail distance bias, etc. "
                 "[default: %default]")
    p.add_option("--freebayes", default=False, action="store_true",
                 help="VCF output from freebayes")
    p.set_sep(sep=".", help="Use separator to simplify individual names")
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    vcffile, = args
    if vcffile.endswith(".bcf"):
        bcffile = vcffile
        vcffile = bcffile.rsplit(".", 1)[0] + ".vcf"
        cmd = "bcftools view {0}".format(bcffile)
        cmd += " | vcfutils.pl varFilter"
        if not opts.pv4:
            cmd += " -1 0 -2 0 -3 0 -4 0 -e 0"
        if need_update(bcffile, vcffile):
            sh(cmd, outfile=vcffile)

    freq = opts.freq
    sep = opts.sep
    depth_index = 1 if opts.freebayes else 2

    ptype = "DH" if opts.dh else "RIL6"
    nohet = ptype == "DH"
    fp = open(vcffile)
    genotypes = []
    for row in fp:
        if row[:2] == "##":
            continue
        atoms = row.split()
        if row[0] == '#':
            ind = [x.split(sep)[0] for x in atoms[9:]]
            nind = len(ind)
            mh = ["locus_name"] + ind
            continue

        marker = "{0}.{1}".format(*atoms[:2])

        geno = atoms[9:]
        geno = [encode_genotype(x, mindepth=opts.mindepth,
                                depth_index=depth_index,
                                nohet=nohet) for x in geno]
        assert len(geno) == nind
        f = 1. / nind

        if geno.count("A") * f < freq:
            continue
        if geno.count("B") * f < freq:
            continue
        if geno.count("-") * f > opts.missing_threshold:
            continue

        genotype = [marker] + geno
        genotypes.append(genotype)

    mm = MSTMatrix(genotypes, mh, ptype, opts.missing_threshold)
    mm.write(opts.outfile, header=(not opts.noheader))
示例#4
0
文件: vcf.py 项目: zjwang6/jcvi
def mstmap(args):
    """
    %prog mstmap bcffile/vcffile > matrixfile

    Convert bcf/vcf format to mstmap input.
    """
    from jcvi.assembly.geneticmap import MSTMatrix

    p = OptionParser(mstmap.__doc__)
    p.add_option(
        "--dh",
        default=False,
        action="store_true",
        help="Double haploid population, no het [default: %default]",
    )
    p.add_option(
        "--freq",
        default=0.2,
        type="float",
        help="Allele must be above frequency [default: %default]",
    )
    p.add_option(
        "--mindepth",
        default=3,
        type="int",
        help="Only trust genotype calls with depth [default: %default]",
    )
    p.add_option(
        "--missing_threshold",
        default=0.25,
        type="float",
        help="Fraction missing must be below",
    )
    p.add_option(
        "--noheader",
        default=False,
        action="store_true",
        help="Do not print MSTmap run parameters [default: %default]",
    )
    p.add_option(
        "--pv4",
        default=False,
        action="store_true",
        help="Enable filtering strand-bias, tail distance bias, etc. "
        "[default: %default]",
    )
    p.add_option(
        "--freebayes",
        default=False,
        action="store_true",
        help="VCF output from freebayes",
    )
    p.set_sep(sep=".", help="Use separator to simplify individual names")
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (vcffile, ) = args
    if vcffile.endswith(".bcf"):
        bcffile = vcffile
        vcffile = bcffile.rsplit(".", 1)[0] + ".vcf"
        cmd = "bcftools view {0}".format(bcffile)
        cmd += " | vcfutils.pl varFilter"
        if not opts.pv4:
            cmd += " -1 0 -2 0 -3 0 -4 0 -e 0"
        if need_update(bcffile, vcffile):
            sh(cmd, outfile=vcffile)

    freq = opts.freq
    sep = opts.sep
    depth_index = 1 if opts.freebayes else 2

    ptype = "DH" if opts.dh else "RIL6"
    nohet = ptype == "DH"
    fp = open(vcffile)
    genotypes = []
    for row in fp:
        if row[:2] == "##":
            continue
        atoms = row.split()
        if row[0] == "#":
            ind = [x.split(sep)[0] for x in atoms[9:]]
            nind = len(ind)
            mh = ["locus_name"] + ind
            continue

        marker = "{0}.{1}".format(*atoms[:2])

        geno = atoms[9:]
        geno = [
            encode_genotype(x,
                            mindepth=opts.mindepth,
                            depth_index=depth_index,
                            nohet=nohet) for x in geno
        ]
        assert len(geno) == nind
        f = 1.0 / nind

        if geno.count("A") * f < freq:
            continue
        if geno.count("B") * f < freq:
            continue
        if geno.count("-") * f > opts.missing_threshold:
            continue

        genotype = [marker] + geno
        genotypes.append(genotype)

    mm = MSTMatrix(genotypes, mh, ptype, opts.missing_threshold)
    mm.write(opts.outfile, header=(not opts.noheader))