Пример #1
0
    def cluster(self, strand=None, by=None, **kwargs):

        if strand is None:
            strand = self.stranded

        kwargs = fill_kwargs(kwargs)

        kwargs["sparse"] = {"self": False}

        _stranded = self.stranded
        if not strand and _stranded:
            # print(" WOOO " * 100)
            self.Strand2 = self.Strand
            self = self.unstrand()

        if not by:
            from pyranges.methods.cluster import _cluster
            df = pyrange_apply_single(_cluster, self, strand, kwargs)
        else:
            from pyranges.methods.cluster import _cluster_by
            kwargs["by"] = by
            df = pyrange_apply_single(_cluster_by, self, strand, kwargs)

        gr = PyRanges(df)

        # each cluster got same ids (0 to len). Need to make unique!
        new_dfs = {}
        first = True
        max_id = 0
        for k, v in gr.items():
            if first:
                max_id = v.Cluster.max()
                new_dfs[k] = v
                first = False
                continue

            v.loc[:, "Cluster"] += max_id
            max_id = v.Cluster.max()
            new_dfs[k] = v

        if not strand and _stranded:
            # print(" wooo " * 100)
            # print(new_dfs)
            new_dfs = {
                k: d.rename(columns={"Strand2": "Strand"})
                for k, d in new_dfs.items()
            }
            # print(new_dfs)

        self = PyRanges(new_dfs)

        return self
Пример #2
0
def _to_rle(ranges, value_col=None, strand=True, rpm=False, **kwargs):

    try:
        from pyrle.methods import coverage
        from pyrle import PyRles
    except ImportError:
        raise Exception(
            "Using the coverage method requires that pyrle is installed.")

    _kwargs = {
        "strand": strand,
        "value_col": value_col,
        "sparse": {
            "self": False
        }
    }  # already sparse
    kwargs.update(_kwargs)

    result = pyrange_apply_single(coverage, ranges, **kwargs)

    if rpm:
        multiplier = 1e6 / len(ranges)
        result = {k: v * multiplier for k, v in result.items()}

    return PyRles(result)
Пример #3
0
    def slack(self, slack):

        kwargs = fill_kwargs({"slack": slack})
        prg = PyRanges(
            pyrange_apply_single(_slack, self, self.stranded, kwargs))

        return prg
Пример #4
0
def _coverage(ranges, value_col=None, strand=True, rpm=False, **kwargs):

    try:
        from pyrle.methods import coverage
        from pyrle import PyRles
    except ImportError:
        raise Exception(
            "Using the coverage method requires that pyrle is installed.")

    keep = [value_col if not value_col is None else "Score"]
    kwargs = {
        "value_col": value_col,
        "sparse": {
            "self": False
        }
    }  # already sparse
    # from pydbg import dbg

    result = pyrange_apply_single(coverage, ranges, strand, kwargs)

    if rpm:
        multiplier = 1e6 / len(ranges)
        result = {k: v * multiplier for k, v in result.items()}

    return PyRles(result)
Пример #5
0
 def sort(self, by=None, **kwargs):
     from pyranges.methods.sort import _sort
     kwargs["sparse"] = {"self": False}
     if by:
         kwargs["by"] = by
     kwargs = fill_kwargs(kwargs)
     return PyRanges(
         pyrange_apply_single(_sort, self, self.stranded, kwargs))
Пример #6
0
    def merge(self, strand=None, **kwargs):

        from pyranges.methods.merge import _merge

        kwargs["sparse"] = {"self": True}
        df = pyrange_apply_single(_merge, self, strand, kwargs)

        return PyRanges(df)
Пример #7
0
    def merge(self, strand=None, count=False, **kwargs):

        if strand is None:
            strand = self.stranded

        if not ("by" in kwargs):
            kwargs["sparse"] = {"self": True}
            from pyranges.methods.merge import _merge
            df = pyrange_apply_single(_merge, self, strand, kwargs)
        else:
            kwargs["sparse"] = {"self": False}
            from pyranges.methods.merge import _merge_by
            df = pyrange_apply_single(_merge_by, self, strand, kwargs)

        if not count:
            df = {k: v.drop("Count", axis=1) for k, v in df.items()}

        return PyRanges(df)
Пример #8
0
    def tile(self, tile_size, strand=None, **kwargs):

        from pyranges.methods.windows import _tiles

        kwargs["sparse"] = {"self": False}
        kwargs["tile_size"] = tile_size

        df = pyrange_apply_single(_tiles, self, strand, kwargs)

        return PyRanges(df)
Пример #9
0
    def window(self, window_size, strand=None, **kwargs):

        from pyranges.methods.windows import _windows

        kwargs["sparse"] = {"self": False}
        kwargs["window_size"] = window_size

        df = pyrange_apply_single(_windows, self, strand, kwargs)

        return PyRanges(df)
Пример #10
0
    def apply(self, f, strand=False, as_pyranges=True, **kwargs):

        kwargs.update({"strand": strand})
        kwargs = fill_kwargs(kwargs)

        result = pyrange_apply_single(f, self, strand, kwargs)

        if not as_pyranges:
            return result
        else:
            return PyRanges(result)
Пример #11
0
    def slack(self, slack):

        if isinstance(slack, dict):
            assert self.stranded, "PyRanges must be stranded to add 5/3-end specific slack."

        kwargs = fill_kwargs({"slack": slack})

        prg = PyRanges(
            pyrange_apply_single(_slack, self, self.stranded, kwargs))

        return prg
Пример #12
0
    def split(self, strand=None, **kwargs):

        if strand is None:
            strand = self.stranded

        kwargs = fill_kwargs(kwargs)

        from pyranges.methods.split import _split
        df = pyrange_apply_single(_split, self, strand, kwargs)

        return pr.PyRanges(df)
Пример #13
0
    def drop_duplicate_positions(self, strand=None, **kwargs):

        from pyranges.methods.drop_duplicates import _drop_duplicate_positions
        if strand is None:
            strand = self.stranded

        kwargs["sparse"] = {"self": False}
        kwargs = fill_kwargs(kwargs)
        kwargs["strand"] = strand and self.stranded
        return PyRanges(
            pyrange_apply_single(_drop_duplicate_positions, self, strand,
                                 kwargs))
Пример #14
0
    def eval(self, eval_cmd, strand=True, as_pyranges=True, **kwargs):

        f = lambda df: eval(eval_cmd)

        kwargs = fill_kwargs(kwargs)

        result = pyrange_apply_single(f, self, strand, kwargs)

        if not as_pyranges:
            return result
        else:
            return PyRanges(result)
Пример #15
0
    def new_position(self, new_pos, strand=None, **kwargs):

        from pyranges.methods.new_position import _new_position

        kwargs["sparse"] = {"self": False}
        kwargs["new_pos"] = new_pos
        kwargs = fill_kwargs(kwargs)

        if strand is None:
            strand = self.stranded

        dfs = pyrange_apply_single(_new_position, self, strand, kwargs)

        return pr.PyRanges(dfs)
Пример #16
0
    def apply(self, f, strand=False, as_pyranges=True, kwargs=None):

        if not kwargs:
            kwargs = {}
        kwargs = fill_kwargs(kwargs)

        f = ray.remote(f)

        result = pyrange_apply_single(f, self, strand, kwargs)

        if not as_pyranges:
            return result
        else:
            return PyRanges(result)
Пример #17
0
    def apply(self, f, strand=None, as_pyranges=True, **kwargs):

        if strand is None:
            strand = self.stranded

        kwargs.update({"strand": strand})
        kwargs.update(kwargs.get("kwargs", {}))
        kwargs = fill_kwargs(kwargs)

        result = pyrange_apply_single(f, self, strand, kwargs)

        if not as_pyranges:
            return result
        else:
            return PyRanges(result)
Пример #18
0
    def set_columns(self, value):
        assert len(value) == len(
            self.columns), "New and old columns must be same length"

        def _columns(df):
            df.columns = value
            return df

        return pr.PyRanges(
            pyrange_apply_single(_columns,
                                 self,
                                 strand=None,
                                 kwargs={"sparse": {
                                     "self": False
                                 }}))
Пример #19
0
    def assign(self, col, function, strand=False, **kwargs):

        kwargs = fill_kwargs(kwargs)

        result = pyrange_apply_single(function, self, strand, kwargs)

        first_result = next(iter(result.values()))

        assert type(
            first_result
        ) == pd.Series, "result of assign function must be Series, but is {}".format(
            type(first_result))

        # do a deepcopy of object
        new_self = pr.PyRanges({k: v.copy() for k, v in self.items()})
        new_self.__setattr__(col, result)

        return new_self
Пример #20
0
def _coverage(ranges, value_col=None, strand=True, rpm=False):

    try:
        from pyrle.methods import coverage
        from pyrle import PyRles
    except ImportError:
        raise Exception(
            "Using the coverage method requires that pyrle is installed.")

    kwargs = {"value_col": value_col}
    if value_col is None:
        kwargs["sparse"] = {"self": True}
    # from pydbg import dbg

    result = pyrange_apply_single(coverage, ranges, strand, kwargs)

    if rpm:
        multiplier = 1e6 / len(ranges)
        result = {k: v * multiplier for k, v in result.items()}

    return PyRles(result)
Пример #21
0
    def subset(self, function, strand=None, **kwargs):

        kwargs = fill_kwargs(kwargs)

        if strand is None:
            strand = self.stranded

        if self.stranded and not strand:
            self = self.unstrand()

        result = pyrange_apply_single(function, self, strand, kwargs)

        if not result:
            return pr.PyRanges()

        first_result = next(iter(result.values()))

        assert first_result.dtype == bool, "result of subset function must be bool, but is {}".format(
            first_result.dtype)

        return self[result]
Пример #22
0
    def tesify(self, slack=0):

        kwargs = fill_kwargs({"slack": slack})
        return PyRanges(
            pyrange_apply_single(_tes, self, self.stranded, kwargs))
Пример #23
0
 def sort(self, columns=("Start", "End"), **kwargs):
     from pyranges.methods.sort import _sort
     kwargs["sparse"] = {"self": False}
     kwargs = fill_kwargs(kwargs)
     return PyRanges(
         pyrange_apply_single(_sort, self, self.stranded, kwargs))
Пример #24
0
    def three_end(self, slack=0):

        assert self.stranded, "Need stranded pyrange to find 3'."
        kwargs = fill_kwargs({"slack": slack})
        return PyRanges(pyrange_apply_single(_tes, self, self.stranded,
                                             kwargs))