def main(args): ncontrib, nloci, ploci = mhapi.contrib(Profile(fromfile=args.result)) data = { "min_num_contrib": ncontrib, "num_loci_max_alleles": nloci, "perc_loci_max_alleles": ploci, } with mhopen(args.out, "w") as fh: json.dump(data, fh, indent=4)
def main(args): contained, total = mhapi.contain(Profile(fromfile=args.profile1), Profile(fromfile=args.profile2)) data = { "containment": round(contained / total, 4), "contained_alleles": contained, "total_alleles": total, } with mhopen(args.out, "w") as fh: json.dump(data, fh, indent=4)
def main(args): prof1 = Profile(fromfile=args.profile1) prof2 = Profile(fromfile=args.profile2) if args.profile2 else None frequencies = load_marker_frequencies(args.freq) result = mhapi.prob(frequencies, prof1, prof2=prof2, erate=args.erate) key = "random_match_probability" if prof2 is None else "likelihood_ratio" data = { key: "{:.3E}".format(result), } with mhopen(args.out, "w") as fh: json.dump(data, fh, indent=4)
def __init__(self, fromfile=None): global SCHEMA if fromfile: if isinstance(fromfile, str) or isinstance(fromfile, Path): with mhopen(str(fromfile), "r") as fh: self.data = json.load(fh) else: self.data = json.load(fromfile) if SCHEMA is None: SCHEMA = load_schema() jsonschema.validate(instance=self.data, schema=SCHEMA) else: self.data = self.initialize()
def read_length_dist(fastq, outfile, xlabel="Read Length (bp)", xlim=None, scale=1000, title=None): """Plot distribution of read lengths :param str fastq: path of a FASTQ file containing NGS reads :param str outfile: path of a graphic file to create :param str xlabel: label for the X axis :param tuple xlim: a 2-tuple of numbers (x1, x2) representing the start and end points of the portion of the X axis to be displayed; by default this is determined automatically :param float scale: scaling factor for the Y axis :param str title: title for the plot """ backend = matplotlib.get_backend() plt.switch_backend("Agg") lengths = list() with mhopen(fastq, "r") as fh: for record in SeqIO.parse(fh, "fastq"): lengths.append(len(record)) fig = plt.figure(figsize=(6, 4), dpi=200) plt.hist(lengths, bins=25, weights=[1 / scale] * len(lengths), edgecolor="#000099") if xlim is None: xlim = (min(lengths) * 0.9, max(lengths) * 1.1) plt.xlim(*xlim) ax = plt.gca() ax.yaxis.grid(True, color="#DDDDDD") ax.set_axisbelow(True) ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["bottom"].set_color("#CCCCCC") ax.tick_params(left=False) ax.set_xlabel(xlabel, labelpad=15, fontsize=16) ax.set_ylabel(f"Frequency (× {scale})", labelpad=15, fontsize=16) if title: ax.set_title(title, pad=25, fontsize=18) plt.savefig(outfile, bbox_inches="tight") plt.switch_backend(backend)
def populate_from_bed(bedfile): with mhopen(bedfile, "r") as fh: line = next(fh) ploidy = line.count("|") + 1 fh.seek(0) marker_alleles = defaultdict( lambda: [list() for _ in range(ploidy)]) for line in fh: line = line.strip() if line == "": continue marker, start, end, allelestr = line.split("\t") alleles = allelestr.split("|") for i, a in enumerate(alleles): marker_alleles[marker][i].append(a) profile = SimulatedProfile(ploidy=ploidy) for marker, allele_list in marker_alleles.items(): for i, haplotype in enumerate(allele_list): profile.add(i, marker, ",".join(haplotype)) return profile
def main(args): profiles = [SimulatedProfile(pfile) for pfile in args.profiles] combined = SimulatedProfile.merge(profiles) with mhopen(args.out, "w") as fh: combined.dump(fh)
def load_schema(): with mhopen(package_file("data/profile-schema.json"), "r") as fh: return json.load(fh)
def dump(self, outfile): if isinstance(outfile, str) or isinstance(outfile, Path): with mhopen(str(outfile), "w") as fh: json.dump(self.data, fh, indent=4, sort_keys=True) else: json.dump(self.data, outfile, indent=4, sort_keys=True)