Пример #1
0
def is_incremented(compCat, bigfilt, smallfilt, df, offset=0):
    """ general equation is
        compCat(bigCat:bigElem) = compCat(smallCat:smallElem) + offset

        Same as is_ordered, but eq instead of gt

        if bigfilt or smallfilt are strings instead of lambdas, turn them
        into lambdas with the val_filter function

        we also know, from this, that bigCat:bigElem != smallCat:smallElem
    """
    compCat = common.comparison_category(compCat, df)
    bigfilt = common.force_filter(bigfilt)
    smallfilt = common.force_filter(smallfilt)

    df2 = df.copy()

    # take care of the != clause first
    df2 = is_diff(bigfilt, smallfilt, df2)

    # possible values
    small = df2[smallfilt(df2)][compCat].unique()
    big = df2[bigfilt(df2)][compCat].unique()

    # find impossible values (small values with no corresponding big; vice versa)
    badsmall = set(small).difference(big - offset)
    badbig = set(big).difference(small + offset)

    # drop impossible values
    df2.loc[smallfilt(df2) & (df2[compCat].isin(badsmall)), common.STATUS] = common.REJECTED
    df2.loc[bigfilt(df2) & (df2[compCat].isin(badbig)), common.STATUS] = common.REJECTED

    return df2
Пример #2
0
def is_ordered(compCat, bigfilt, smallfilt, df, offset=0):
    """ general equation is
        compCat(bigCat:bigElem) > compCat(smallCat:smallElem) + offset

        we also know, from this, that bigCat:bigElem != smallCat:smallElem

    """
    compCat = common.comparison_category(compCat, df)
    bigfilt = common.force_filter(bigfilt)
    smallfilt = common.force_filter(smallfilt)

    df2 = df.copy()

    # take care of the != clause first
    df2 = is_diff(bigfilt, smallfilt, df2)

    # all vals of bigCat must be > the minium val of smallCat
    minSmall = df2[smallfilt(df2)][compCat].min()
    df2.loc[
        bigfilt(df2) & (df2[compCat] <= (minSmall + offset)),
        common.STATUS
    ] = common.REJECTED

    # all vals of smallCat must be < the largest val of bigCat
    maxBig = df2[bigfilt(df2)][compCat].max()
    df2.loc[
        smallfilt(df2) & (df2[compCat] >= (maxBig - offset)),
        common.STATUS
    ] = common.REJECTED

    return df2
Пример #3
0
def is_same(filt1, filt2, df):
    """ Reject all rows where
        cat1:elem1 != cat2:elem2

    """
    filt1 = common.force_filter(filt1)
    filt2 = common.force_filter(filt2)
    df2 = df.copy()
    df2.loc[filt1(df2) & (~filt2(df2)), common.STATUS] = common.REJECTED
    df2.loc[(~filt1(df2)) & filt2(df2), common.STATUS] = common.REJECTED
    return df2
Пример #4
0
def is_either_or(isfilt, eitherfilt, orfilt, df):
    isfilt = common.force_filter(isfilt)
    eitherfilt = common.force_filter(eitherfilt)
    orfilt = common.force_filter(orfilt)

    df2 = df.copy()

    # we have one exclusion relation here
    df2 = is_diff(eitherfilt, orfilt, df)

    # reject all values which are is but not either or
    df2.loc[(isfilt(df2) & ~((eitherfilt(df2)) | (orfilt(df2)))), common.STATUS] = common.REJECTED

    return df2
Пример #5
0
def is_diff(filt1, filt2, df):
    filt1 = common.force_filter(filt1)
    filt2 = common.force_filter(filt2)
    df2 = df.copy()
    df2.loc[filt1(df2) & filt2(df2), common.STATUS] = common.REJECTED
    return df2