Exemplo n.º 1
0
class RegionFilter(Filter):
    intervaltree = None
    region_names = []

    @classmethod
    def customize_parser(cls, parser: argparse.ArgumentParser):
        parser.add_argument("--region_filter",
                            "-R",
                            action=RegionArgParser,
                            default=[])

    def __init__(self, args: argparse.Namespace) -> "RegionFilter":
        super().__init__(args)
        self.intervaltree = IntervalTree()
        if hasattr(args, "region_filter"):
            self.region_names = args.region_filter
            for name in args.region_filter:
                regions = REGIONS[name].regions
                for location in regions:
                    # convert to 0-based, half open coordinates
                    self.intervaltree.add(
                        Interval(location.start - 1, location.end))

    def __repr__(self):
        name = f"{self.__class__.__name__}"
        if self.region_names:
            name += " on " + ", ".join(self.region_names)
        else:
            name += " (inactive)"
        return name

    def __call__(self, record: Record) -> Union[Record, None]:
        # this logic added so that it easier to add debug code
        retain = True
        if record.affected_end < record.affected_start:
            # this is a insert - 0 length feature
            retain = not self.intervaltree.overlaps_point(
                record.affected_start)
        else:
            # SNV or MNV (del) - size 1 and above
            retain = not self.intervaltree.overlaps(record.affected_start,
                                                    record.affected_end)
        if retain:
            return record
        else:
            return None
class CloseToIndelFilter(Filter):
    intervaltree = None
    dist = 0

    def __init__(self, args: argparse.Namespace) -> "CloseToIndelFilter":
        super().__init__(args)
        self.intervaltree = IntervalTree()
        if hasattr(args,
                   "close_to_indel_filter") and args.close_to_indel_filter:
            reader = Reader(args.input_file)
            dist = args.indel_window_size
            self.dist = dist
            for record in reader:
                if not record.is_snv():
                    if record.affected_end < record.affected_start:
                        # this is an insertion, we only have the start site
                        self.intervaltree.addi(
                            begin=record.affected_start - dist,
                            end=record.affected_start + dist,
                        )
                    else:
                        self.intervaltree.addi(
                            begin=record.affected_start - dist,
                            end=record.affected_end + dist,
                        )
            args.input_file.seek(0)

    def __repr__(self) -> str:

        name = f"{self.__class__.__name__}"
        if self.dist:
            name += f" (Window {self.dist})"
        else:
            name += " (inactive)"
        return name

    @classmethod
    def customize_parser(cls, parser: argparse.ArgumentParser) -> None:
        parser.add_argument(
            "--close_to_indel_filter",
            "-I",
            action="store_true",
            default=False,
            help=
            "Mask out single nucleotide variants that are too close to indels",
        )
        parser.add_argument(
            "--indel_window_size",
            type=int,
            default=5,
            help=
            "Window around indel to mask out (mask this number of bases upstream/downstream from the indel. Requires -I option to selected)",  # noqa: E501
        )

    def __call__(self, record: Record) -> Union[Record, None]:
        retain = True
        if record.is_snv():
            # we are masking SNVs, only the affected_start is relevant since this is a size 1 feature
            retain = not self.intervaltree.overlaps_point(
                record.affected_start)
        return record if retain else None