示例#1
0
def _overlap(scdf, ocdf, **kwargs):

    invert = kwargs["invert"]
    return_indexes = kwargs.get("return_indexes", False)

    if scdf.empty or ocdf.empty:
        return None

    how = kwargs["how"]

    assert how in "containment first".split() + [False, None]
    starts = scdf.Start.values
    ends = scdf.End.values
    indexes = scdf.index.values

    it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    if not how:
        _indexes = it.all_overlaps_self(starts, ends, indexes)
    elif how == "containment":
        _indexes, _ = it.all_containments_both(starts, ends, indexes)
    else:
        _indexes = it.has_overlaps(starts, ends, indexes)

    if invert:
        _indexes = scdf.index.difference(_indexes)

    if return_indexes:
        return _indexes

    return scdf.reindex(_indexes)
示例#2
0
def _both_dfs(scdf, ocdf, how=False):

    assert how in "containment first".split() + [False, None]
    starts = scdf.Start.values
    ends = scdf.End.values
    indexes = scdf.index.values

    ocdf = ocdf.reset_index(drop=True)
    it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    if not how:
        _self_indexes, _other_indexes = it.all_overlaps_both(
            starts, ends, indexes)
    elif how == "containment":
        _self_indexes, _other_indexes = it.all_containments_both(
            starts, ends, indexes)
    else:
        _self_indexes, _other_indexes = it.first_overlap_both(
            starts, ends, indexes)

    _self_indexes = _self_indexes
    _other_indexes = _other_indexes
    scdf = scdf.reindex(_self_indexes)
    ocdf = ocdf.reindex(_other_indexes)

    return scdf, ocdf
示例#3
0
def _intersection(scdf, ocdf, kwargs):

    how = kwargs["how"]

    if ocdf.empty or scdf.empty:
        return None

    assert how in "containment first last".split() + [False, None]
    starts = scdf.Start.values
    ends = scdf.End.values
    indexes = scdf.index.values

    in_dtype = ocdf.Start.dtype

    oncls = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    if not how or how is None:
        _self_indexes, _other_indexes = oncls.all_overlaps_both(
            starts, ends, indexes)
    elif how == "containment":
        _self_indexes, _other_indexes = oncls.all_containments_both(
            starts, ends, indexes)
    elif how == "first":
        _self_indexes, _other_indexes = oncls.first_overlap_both(
            starts, ends, indexes)
    elif how == "last":
        _self_indexes, _other_indexes = oncls.last_overlap_both(
            starts, ends, indexes)

    _self_indexes = _self_indexes
    _other_indexes = _other_indexes

    scdf, ocdf = scdf.reindex(_self_indexes), ocdf.reindex(_other_indexes)

    new_starts = pd.Series(
        np.where(scdf.Start.values > ocdf.Start.values, scdf.Start,
                 ocdf.Start),
        index=scdf.index,
        dtype=in_dtype)

    new_ends = pd.Series(
        np.where(scdf.End.values < ocdf.End.values, scdf.End, ocdf.End),
        index=scdf.index,
        dtype=in_dtype)

    pd.options.mode.chained_assignment = None  # default='warn'
    scdf.loc[:, "Start"] = new_starts
    scdf.loc[:, "End"] = new_ends
    pd.options.mode.chained_assignment = 'warn'

    if not scdf.empty:
        return scdf
    else:
        return None
示例#4
0
def test_all_containments_both():

    starts = np.array([1291845632, 3002335232], dtype=int)
    ends = np.array([1292894207, 3002597375], dtype=int)
    ids = np.array([0, 1], dtype=int)

    ncls = NCLS(starts, ends, ids)
    subs, covers = ncls.all_containments_both(starts, ends, ids)

    print(ncls.intervals())

    assert list(subs) == [0, 1] == list(covers)
示例#5
0
    def test_all_containments_both():

        starts = np.array([5, 10], dtype=np.int64)
        ends = np.array([6, 50], dtype=np.int64)
        ids = np.array([0, 1], dtype=np.int64)

        ncls = NCLS(starts, ends, ids)
        subs, covers = ncls.all_containments_both(starts, ends, ids)

        print(ncls.intervals())

        assert list(subs) == [0, 1] == list(covers)
示例#6
0
def _both_indexes(scdf, ocdf, how=False):

    assert (how in "containment first last outer right left".split() +
            [False, None]) or isinstance(how, int)
    starts = scdf.Start.values
    ends = scdf.End.values
    indexes = scdf.index.values

    it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    if not how:
        _self_indexes, _other_indexes = it.all_overlaps_both(
            starts, ends, indexes)
    elif how == "containment":
        _self_indexes, _other_indexes = it.all_containments_both(
            starts, ends, indexes)
    elif how == "first":
        _self_indexes, _other_indexes = it.first_overlap_both(
            starts, ends, indexes)
    elif how == "last":
        _self_indexes, _other_indexes = it.last_overlap_both(
            starts, ends, indexes)
        six = scdf.index
        oix = ocdf.index
    elif how in ["outer", "left", "right"]:

        _self_indexes, _other_indexes = it.all_overlaps_both(
            starts, ends, indexes)

        missing_in_s = scdf.index.difference(_self_indexes)
        missing_in_o = ocdf.index.difference(_other_indexes)

        filler_s = np.ones(len(missing_in_o), dtype=int) * -1
        filler_o = np.ones(len(missing_in_s), dtype=int) * -1

        if how == "outer":
            _self_indexes = np.concatenate(
                [_self_indexes, missing_in_s, filler_s])
            _other_indexes = np.concatenate(
                [_other_indexes, filler_o, missing_in_o])
        elif how == "left":
            _self_indexes = np.concatenate([_self_indexes, missing_in_s])
            _other_indexes = np.concatenate([_other_indexes, filler_o])
        elif how == "right":
            _self_indexes = np.concatenate([_self_indexes, filler_s])
            _other_indexes = np.concatenate([_other_indexes, missing_in_o])

    return _self_indexes, _other_indexes
示例#7
0
def _both_indexes(scdf, ocdf, how=False):

    assert (how in "containment first".split() + [False, None]) or isinstance(
        how, int)
    starts = scdf.Start.values
    ends = scdf.End.values
    indexes = scdf.index.values

    it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    if not how:
        _self_indexes, _other_indexes = it.all_overlaps_both(
            starts, ends, indexes)
    elif how == "containment":
        _self_indexes, _other_indexes = it.all_containments_both(
            starts, ends, indexes)
    else:
        _self_indexes, _other_indexes = it.first_overlap_both(
            starts, ends, indexes)

    return _self_indexes, _other_indexes
示例#8
0
def _both_dfs(scdf, ocdf, how=False, **kwargs):

    assert how in "containment first".split() + [False, None]
    starts = scdf.Start.values
    ends = scdf.End.values
    indexes = scdf.index.values

    it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    if not how:
        _self_indexes, _other_indexes = it.all_overlaps_both(
            starts, ends, indexes)
    elif how == "containment":
        _self_indexes, _other_indexes = it.all_containments_both(
            starts, ends, indexes)
    else:
        _self_indexes, _other_indexes = it.first_overlap_both(
            starts, ends, indexes)

    _self_indexes = _self_indexes
    _other_indexes = _other_indexes

    return scdf.loc[_self_indexes], ocdf.loc[_other_indexes]