def _parse_variant_panel(self, row): allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row( row) params = get_params(allele) if 'var_name' in params: var_name = params.get('var_name') else: var_name = allele.split('?')[0].split('-')[1] num_alts = int(params.get("num_alts", 0)) reference_coverages = [ ProbeCoverage(percent_coverage=percent_coverage, median_depth=median_depth, min_depth=min_depth, k_count=k_count) ] alt_or_ref = 'ref' alternate_coverages = [] for i in range(num_alts - 1): row = next(self.reader) ref_allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row( row) if ref_allele.split('-')[0] != 'ref': logger.warning("Fewer ref alleles than alt alleles for %s" % ref_allele) alternate_coverages.append( ProbeCoverage(min_depth=min_depth, k_count=k_count, percent_coverage=percent_coverage, median_depth=median_depth)) num_alts -= 1 break assert ref_allele.split('-')[0] == 'ref' reference_coverages.append( ProbeCoverage(percent_coverage=percent_coverage, median_depth=median_depth, min_depth=min_depth, k_count=k_count)) for i in range(num_alts): row = next(self.reader) alt_allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row( row) assert alt_allele.split('-')[0] == 'alt' alternate_coverages.append( ProbeCoverage(min_depth=min_depth, k_count=k_count, percent_coverage=percent_coverage, median_depth=median_depth)) variant_probe_coverage = VariantProbeCoverage( reference_coverages=reference_coverages, alternate_coverages=alternate_coverages, var_name=var_name, params=params) try: self.variant_covgs[allele].append(variant_probe_coverage) except KeyError: self.variant_covgs[allele] = [variant_probe_coverage]
def _name_to_id(self, probe_name): names = [] params = get_params(probe_name) if params.get("mut"): names.append("_".join([params.get("gene"), params.get("mut")])) var_name = params.get("var_name") else: var_name = probe_name.split('?')[0].split('-')[1] names.append(var_name) return "-".join(names)
def _parse_seq_panel(self, row): allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row( row) probe_coverage = ProbeCoverage(percent_coverage=percent_coverage, median_depth=median_depth, min_depth=min_depth, k_count=k_count) allele_name = allele.split('?')[0] params = get_params(allele) panel_type = params.get("panel_type", "presence") name = params.get('name') version = params.get('version', '1') if panel_type in ["variant", "presence"]: sequence_probe_coverage = SequenceProbeCoverage( name=name, probe_coverage=probe_coverage, version=version, length=params.get("length")) try: self.covgs[panel_type][name][version] = sequence_probe_coverage except KeyError: self.covgs[panel_type][name] = {} self.covgs[panel_type][name][version] = sequence_probe_coverage else: # Species panels are treated differently l = int(params.get("length", -1)) try: self.covgs[panel_type][name]["total_bases"] += l if percent_coverage > 75 and median_depth > 0: self.covgs[panel_type][name]["percent_coverage"].append( percent_coverage) self.covgs[panel_type][name]["length"].append(l) self.covgs[panel_type][name]["median"].append(median_depth) except KeyError: if panel_type not in self.covgs: self.covgs[panel_type] = {} self.covgs[panel_type][name] = {} self.covgs[panel_type][name]["total_bases"] = l if percent_coverage > 75 and median_depth > 0: self.covgs[panel_type][name]["percent_coverage"] = [ percent_coverage ] self.covgs[panel_type][name]["length"] = [l] self.covgs[panel_type][name]["median"] = [median_depth] else: self.covgs[panel_type][name]["percent_coverage"] = [] self.covgs[panel_type][name]["length"] = [] self.covgs[panel_type][name]["median"] = []
def _create_variant(self, probe_name): names = [] params = get_params(probe_name) if params.get("mut"): names.append("_".join([params.get("gene"), params.get("mut")])) var_name = probe_name.split('?')[0].split('-')[1] names.append(var_name) try: # If it's a variant panel we can create a variant ref, start, alt = split_var_name(var_name) return Variant.create(start=start, reference_bases=ref, alternate_bases=[alt], names=names, info=params) except AttributeError: return None
def run(parser, args): genes = {} skip_list = { "tem": ["191", "192"], "oxa": ["12", "14", "33"], "shv": ["12", "6"] } check_args(args) if args.seq: build_binary() if args.also_genotype: _out_dict = run_genotype(parser, args) else: _out_dict = {} _out_dict[args.sample] = {} _out_dict[args.sample]["paths"] = {} out_dict = _out_dict[args.sample]["paths"] wb = WebServer(port=0, args=[args.ctx], memory=args.memory, mccortex_path=args.mccortex31_path) logger.debug("Loading binary") wb.start() logger.debug("Walking the graph") gw = GraphWalker(proc=wb.mccortex, kmer_size=args.kmer, print_depths=True) with open(args.probe_set, 'r') as infile: for i, record in enumerate(SeqIO.parse(infile, "fasta")): repeat_kmers = get_repeat_kmers(record, args.kmer) params = get_params(record.id) gene_name = params.get("name", i) version = params.get("version", i) if gene_name not in genes: logger.debug("Loading kmer data for %s" % (gene_name)) last_kmer = str(record.seq)[-args.kmer:] start_kmer, skipped = find_start_kmer(str(record.seq), gw.mcq, args.kmer) if gene_name not in genes: genes[gene_name] = {} genes[gene_name]["pathdetails"] = [] genes[gene_name]["known_kmers"] = "" if version not in skip_list.get(gene_name, []) and start_kmer: pd = PathDetails(start_kmer, last_kmer, len(record.seq), skipped=skipped, v=version) pd.set_repeat_kmers(repeat_kmers) genes[gene_name]["pathdetails"].append(pd) if gene_name in genes: genes[gene_name]["known_kmers"] += "%sN" % str(record.seq) for gene_name, gene_dict in genes.items(): logger.debug("Walking graph with seeds defined by %s" % gene_name) paths = get_paths_for_gene(gene_name, gene_dict, gw) if args.show_all_paths: out_dict[gene_name] = paths.values() else: if len(paths.keys()) > 1: # choose best version best_path = choose_best_assembly(paths.values()) elif len(paths.keys()) == 1: best_path = paths.values()[0] else: best_path = {"found": False} out_dict[gene_name] = [best_path] print(json.dumps(_out_dict, sort_keys=False, indent=4)) logger.info("Cleaning up") if wb is not None: wb.stop()