class RegionFilter(Filter): intervaltree = None region_names = [] @classmethod def customize_parser(cls, parser: argparse.ArgumentParser): parser.add_argument("--region_filter", "-R", action=RegionArgParser, default=[]) def __init__(self, args: argparse.Namespace) -> "RegionFilter": super().__init__(args) self.intervaltree = IntervalTree() if hasattr(args, "region_filter"): self.region_names = args.region_filter for name in args.region_filter: regions = REGIONS[name].regions for location in regions: # convert to 0-based, half open coordinates self.intervaltree.add( Interval(location.start - 1, location.end)) def __repr__(self): name = f"{self.__class__.__name__}" if self.region_names: name += " on " + ", ".join(self.region_names) else: name += " (inactive)" return name def __call__(self, record: Record) -> Union[Record, None]: # this logic added so that it easier to add debug code retain = True if record.affected_end < record.affected_start: # this is a insert - 0 length feature retain = not self.intervaltree.overlaps_point( record.affected_start) else: # SNV or MNV (del) - size 1 and above retain = not self.intervaltree.overlaps(record.affected_start, record.affected_end) if retain: return record else: return None
class CloseToIndelFilter(Filter): intervaltree = None dist = 0 def __init__(self, args: argparse.Namespace) -> "CloseToIndelFilter": super().__init__(args) self.intervaltree = IntervalTree() if hasattr(args, "close_to_indel_filter") and args.close_to_indel_filter: reader = Reader(args.input_file) dist = args.indel_window_size self.dist = dist for record in reader: if not record.is_snv(): if record.affected_end < record.affected_start: # this is an insertion, we only have the start site self.intervaltree.addi( begin=record.affected_start - dist, end=record.affected_start + dist, ) else: self.intervaltree.addi( begin=record.affected_start - dist, end=record.affected_end + dist, ) args.input_file.seek(0) def __repr__(self) -> str: name = f"{self.__class__.__name__}" if self.dist: name += f" (Window {self.dist})" else: name += " (inactive)" return name @classmethod def customize_parser(cls, parser: argparse.ArgumentParser) -> None: parser.add_argument( "--close_to_indel_filter", "-I", action="store_true", default=False, help= "Mask out single nucleotide variants that are too close to indels", ) parser.add_argument( "--indel_window_size", type=int, default=5, help= "Window around indel to mask out (mask this number of bases upstream/downstream from the indel. Requires -I option to selected)", # noqa: E501 ) def __call__(self, record: Record) -> Union[Record, None]: retain = True if record.is_snv(): # we are masking SNVs, only the affected_start is relevant since this is a size 1 feature retain = not self.intervaltree.overlaps_point( record.affected_start) return record if retain else None