def _aggregate(self, covgs, threshold=5): del_phylo_groups = [] for phylo_group, covg_dict in covgs.items(): percent_coverage = covg_dict["percent_coverage"] length = covg_dict["length"] bases_covered = self._bases_covered(percent_coverage, length) total_bases = covg_dict["total_bases"] total_percent_covered = round(bases_covered / total_bases, 3) _median = covg_dict.get("median", [0]) minimum_percentage_coverage_required = percent_coverage_from_expected_coverage( self.expected_depth) * self.threshold.get(phylo_group, DEFAULT_THRESHOLD) if total_percent_covered < minimum_percentage_coverage_required or median( _median) < 0.1 * self.expected_depth: # Remove low coverage nodes _index = [ i for i, d in enumerate(_median) if d > 0.1 * self.expected_depth] percent_coverage = [percent_coverage[i] for i in _index] length = [length[i] for i in _index] bases_covered = self._bases_covered(percent_coverage, length) _median = [_median[i] for i in _index] total_percent_covered = round(bases_covered / total_bases, 3) if total_percent_covered > threshold: if phylo_group == "Mycobacterium_llatzerense": # Mistake in panel phylo_group = "Mycobacterium_mucogenicum" covgs[phylo_group] = { "percent_coverage": total_percent_covered, "median_depth": median(_median)} else: del_phylo_groups.append(phylo_group) for phylo_group in del_phylo_groups: del covgs[phylo_group]
def _log_post_het_or_alt(self, llk, expected_depth, sequence_coverage): expected_percentage_coverage = percent_coverage_from_expected_coverage( expected_depth) minimum_percentage_coverage_required = expected_percentage_coverage * \ sequence_coverage.percent_coverage_threshold if sequence_coverage.percent_coverage > minimum_percentage_coverage_required: return self._log_post_hom_ref(llk) else: return MIN_LLK
def hom_alt_lik(self, variant_probe_coverage): if variant_probe_coverage.alternate_percent_coverage < 100 * \ percent_coverage_from_expected_coverage(max(self.expected_depths)): return MIN_LLK else: hom_alt_liks = [] # Either alt+cov or alt_covg + contam_covg for expected_depth in self.expected_depths: hom_alt_liks.append( log_lik_R_S_coverage( variant_probe_coverage.alternate_median_depth, variant_probe_coverage.reference_median_depth, expected_depth, expected_depth * self.error_rate / 3)) for contamination in self.contamination_depths: hom_alt_liks.append( log_lik_R_S_coverage( variant_probe_coverage.alternate_median_depth, variant_probe_coverage.reference_median_depth, expected_depth + contamination, (expected_depth + contamination) * self.error_rate / 3)) return max(hom_alt_liks)
def test_percentage_coverage(): assert percent_coverage_from_expected_coverage( 100) > percent_coverage_from_expected_coverage(10) assert percent_coverage_from_expected_coverage(100) == 1 assert percent_coverage_from_expected_coverage(1) < 1