Пример #1
0
def main(args):
    ncontrib, nloci, ploci = mhapi.contrib(Profile(fromfile=args.result))
    data = {
        "min_num_contrib": ncontrib,
        "num_loci_max_alleles": nloci,
        "perc_loci_max_alleles": ploci,
    }
    with mhopen(args.out, "w") as fh:
        json.dump(data, fh, indent=4)
Пример #2
0
def main(args):
    contained, total = mhapi.contain(Profile(fromfile=args.profile1),
                                     Profile(fromfile=args.profile2))
    data = {
        "containment": round(contained / total, 4),
        "contained_alleles": contained,
        "total_alleles": total,
    }
    with mhopen(args.out, "w") as fh:
        json.dump(data, fh, indent=4)
Пример #3
0
def main(args):
    prof1 = Profile(fromfile=args.profile1)
    prof2 = Profile(fromfile=args.profile2) if args.profile2 else None
    frequencies = load_marker_frequencies(args.freq)
    result = mhapi.prob(frequencies, prof1, prof2=prof2, erate=args.erate)
    key = "random_match_probability" if prof2 is None else "likelihood_ratio"
    data = {
        key: "{:.3E}".format(result),
    }
    with mhopen(args.out, "w") as fh:
        json.dump(data, fh, indent=4)
Пример #4
0
 def __init__(self, fromfile=None):
     global SCHEMA
     if fromfile:
         if isinstance(fromfile, str) or isinstance(fromfile, Path):
             with mhopen(str(fromfile), "r") as fh:
                 self.data = json.load(fh)
         else:
             self.data = json.load(fromfile)
         if SCHEMA is None:
             SCHEMA = load_schema()
         jsonschema.validate(instance=self.data, schema=SCHEMA)
     else:
         self.data = self.initialize()
Пример #5
0
def read_length_dist(fastq,
                     outfile,
                     xlabel="Read Length (bp)",
                     xlim=None,
                     scale=1000,
                     title=None):
    """Plot distribution of read lengths

    :param str fastq: path of a FASTQ file containing NGS reads
    :param str outfile: path of a graphic file to create
    :param str xlabel: label for the X axis
    :param tuple xlim: a 2-tuple of numbers (x1, x2) representing the start and end points of the portion of the X axis to be displayed; by default this is determined automatically
    :param float scale: scaling factor for the Y axis
    :param str title: title for the plot
    """
    backend = matplotlib.get_backend()
    plt.switch_backend("Agg")
    lengths = list()
    with mhopen(fastq, "r") as fh:
        for record in SeqIO.parse(fh, "fastq"):
            lengths.append(len(record))
    fig = plt.figure(figsize=(6, 4), dpi=200)
    plt.hist(lengths,
             bins=25,
             weights=[1 / scale] * len(lengths),
             edgecolor="#000099")
    if xlim is None:
        xlim = (min(lengths) * 0.9, max(lengths) * 1.1)
    plt.xlim(*xlim)
    ax = plt.gca()
    ax.yaxis.grid(True, color="#DDDDDD")
    ax.set_axisbelow(True)
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.spines["bottom"].set_color("#CCCCCC")
    ax.tick_params(left=False)
    ax.set_xlabel(xlabel, labelpad=15, fontsize=16)
    ax.set_ylabel(f"Frequency (× {scale})", labelpad=15, fontsize=16)
    if title:
        ax.set_title(title, pad=25, fontsize=18)
    plt.savefig(outfile, bbox_inches="tight")
    plt.switch_backend(backend)
Пример #6
0
 def populate_from_bed(bedfile):
     with mhopen(bedfile, "r") as fh:
         line = next(fh)
         ploidy = line.count("|") + 1
         fh.seek(0)
         marker_alleles = defaultdict(
             lambda: [list() for _ in range(ploidy)])
         for line in fh:
             line = line.strip()
             if line == "":
                 continue
             marker, start, end, allelestr = line.split("\t")
             alleles = allelestr.split("|")
             for i, a in enumerate(alleles):
                 marker_alleles[marker][i].append(a)
         profile = SimulatedProfile(ploidy=ploidy)
         for marker, allele_list in marker_alleles.items():
             for i, haplotype in enumerate(allele_list):
                 profile.add(i, marker, ",".join(haplotype))
         return profile
Пример #7
0
def main(args):
    profiles = [SimulatedProfile(pfile) for pfile in args.profiles]
    combined = SimulatedProfile.merge(profiles)
    with mhopen(args.out, "w") as fh:
        combined.dump(fh)
Пример #8
0
def load_schema():
    with mhopen(package_file("data/profile-schema.json"), "r") as fh:
        return json.load(fh)
Пример #9
0
 def dump(self, outfile):
     if isinstance(outfile, str) or isinstance(outfile, Path):
         with mhopen(str(outfile), "w") as fh:
             json.dump(self.data, fh, indent=4, sort_keys=True)
     else:
         json.dump(self.data, outfile, indent=4, sort_keys=True)