Exemplo n.º 1
0
def semi_join(left, right=None, on=None):
    if isinstance(on, Mapping):
        # coerce colnames to list, to avoid indexing with tuples
        on_cols, right_on = map(list, zip(*on.items()))
        right = right[right_on].rename(dict(zip(right_on, on_cols)))
    elif on is None:
        on_cols = set(left.columns).intersection(set(right.columns))
        if not len(on_cols):
            raise Exception(
                "No joining column specified, and no shared column names")
    elif isinstance(on, str):
        on_cols = [on]
    else:
        on_cols = on

    # get our semi join on ----
    if len(on_cols) == 1:
        col_name = on_cols[0]
        indx = left[col_name].isin(right[col_name])
        return left.loc[indx]

    # Not a super efficient approach. Effectively, an inner join with what would
    # be duplicate rows removed.
    merger = _MergeOperation(left, right, left_on=on_cols, right_on=on_cols)
    _, l_indx, _ = merger._get_join_info()

    range_indx = pd.RangeIndex(len(left))
    return left.loc[range_indx.isin(l_indx)]
Exemplo n.º 2
0
def anti_join(left, right=None, on=None):
    """Return the left table with every row that would *not* be kept in an inner join.
    """
    # copied from semi_join
    if isinstance(on, Mapping):
        left_on, right_on = zip(*on.items())

    # manually perform merge, up to getting pieces need for indexing
    merger = _MergeOperation(left, right, left_on=left_on, right_on=right_on)
    _, l_indx, _ = merger._get_join_info()

    # use the left table's indexer to exclude those rows
    range_indx = pd.RangeIndex(len(left))
    return left.iloc[range_indx.difference(l_indx), :]