示例#1
0
def test_date_columns_equal_with_ignore_spaces_and_case():
    data = """a|b|expected
2017-01-01|2017-01-01   |True
2017-01-02  |2017-01-02|True
2017-10-01  |2017-10-10   |False
2017-01-01||False
|2017-01-01|False
||True"""
    df = pd.read_csv(six.StringIO(data), sep="|")
    # First compare just the strings
    actual_out = datacompy.columns_equal(
        df.a, df.b, rel_tol=0.2, ignore_spaces=True, ignore_case=True
    )
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)

    # Then compare converted to datetime objects
    df["a"] = pd.to_datetime(df["a"])
    df["b"] = pd.to_datetime(df["b"])
    actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2, ignore_spaces=True)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
    # and reverse
    actual_out_rev = datacompy.columns_equal(df.b, df.a, rel_tol=0.2, ignore_spaces=True)
    assert_series_equal(expect_out, actual_out_rev, check_names=False)
示例#2
0
def test_mixed_column():
    df = pd.DataFrame([{
        'a': 'hi',
        'b': 'hi',
        'expected': True
    }, {
        'a': 1,
        'b': 1,
        'expected': True
    }, {
        'a': np.inf,
        'b': np.inf,
        'expected': True
    }, {
        'a': Decimal('1'),
        'b': Decimal('1'),
        'expected': True
    }, {
        'a': 1,
        'b': '1',
        'expected': False
    }, {
        'a': 1,
        'b': 'yo',
        'expected': False
    }])
    actual_out = datacompy.columns_equal(df.a, df.b)
    expect_out = df['expected']
    assert_series_equal(expect_out, actual_out, check_names=False)
示例#3
0
def test_rounded_date_columns():
    """If strings can't be coerced into dates then it should be false for the
    whole column.
    """
    df = pd.DataFrame([
        {
            "a": "2017-01-01",
            "b": "2017-01-01 00:00:00.000000",
            "exp": True
        },
        {
            "a": "2017-01-01",
            "b": "2017-01-01 00:00:00.123456",
            "exp": False
        },
        {
            "a": "2017-01-01",
            "b": "2017-01-01 00:00:01.000000",
            "exp": False
        },
        {
            "a": "2017-01-01",
            "b": "2017-01-01 00:00:00",
            "exp": True
        },
    ])
    df["a_dt"] = pd.to_datetime(df["a"])
    actual = datacompy.columns_equal(df.a_dt, df.b)
    expected = df["exp"]
    assert_series_equal(actual, expected, check_names=False)
示例#4
0
def test_infinity_and_beyond():
    df = pd.DataFrame([{
        'a': np.inf,
        'b': np.inf,
        'expected': True
    }, {
        'a': -np.inf,
        'b': -np.inf,
        'expected': True
    }, {
        'a': -np.inf,
        'b': np.inf,
        'expected': False
    }, {
        'a': np.inf,
        'b': -np.inf,
        'expected': False
    }, {
        'a': 1,
        'b': 1,
        'expected': True
    }, {
        'a': 1,
        'b': 0,
        'expected': False
    }])
    actual_out = datacompy.columns_equal(df.a, df.b)
    expect_out = df['expected']
    assert_series_equal(expect_out, actual_out, check_names=False)
示例#5
0
def test_rounded_date_columns():
    """If strings can't be coerced into dates then it should be false for the
    whole column.
    """
    df = pd.DataFrame([{
        'a': '2017-01-01',
        'b': '2017-01-01 00:00:00.000000',
        'exp': True
    }, {
        'a': '2017-01-01',
        'b': '2017-01-01 00:00:00.123456',
        'exp': False
    }, {
        'a': '2017-01-01',
        'b': '2017-01-01 00:00:01.000000',
        'exp': False
    }, {
        'a': '2017-01-01',
        'b': '2017-01-01 00:00:00',
        'exp': True
    }])
    df['a_dt'] = pd.to_datetime(df['a'])
    actual = datacompy.columns_equal(df.a_dt, df.b)
    expected = df['exp']
    assert_series_equal(actual, expected, check_names=False)
示例#6
0
def test_bad_date_columns():
    """If strings can't be coerced into dates then it should be false for the
    whole column.
    """
    df = pd.DataFrame(
        [{"a": "2017-01-01", "b": "2017-01-01"}, {"a": "2017-01-01", "b": "217-01-01"}]
    )
    df["a_dt"] = pd.to_datetime(df["a"])
    assert not datacompy.columns_equal(df.a_dt, df.b).any()
示例#7
0
def test_mixed_column_with_ignore_spaces_and_case():
    df = pd.DataFrame([
        {
            "a": "hi",
            "b": "hi ",
            "expected": True
        },
        {
            "a": 1,
            "b": 1,
            "expected": True
        },
        {
            "a": np.inf,
            "b": np.inf,
            "expected": True
        },
        {
            "a": Decimal("1"),
            "b": Decimal("1"),
            "expected": True
        },
        {
            "a": 1,
            "b": "1 ",
            "expected": False
        },
        {
            "a": 1,
            "b": "yo ",
            "expected": False
        },
        {
            "a": "Hi",
            "b": "hI ",
            "expected": True
        },
        {
            "a": "HI",
            "b": "HI ",
            "expected": True
        },
        {
            "a": "hi",
            "b": "hi ",
            "expected": True
        },
    ])
    actual_out = datacompy.columns_equal(df.a,
                                         df.b,
                                         ignore_spaces=True,
                                         ignore_case=True)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
示例#8
0
def test_numeric_columns_equal_abs():
    data = '''a|b|expected
1|1|True
2|2.1|True
3|4|False
4|NULL|False
NULL|4|False
NULL|NULL|True'''
    df = pd.read_csv(six.StringIO(data), sep='|')
    actual_out = datacompy.columns_equal(df.a, df.b, abs_tol=0.2)
    expect_out = df['expected']
    assert_series_equal(expect_out, actual_out, check_names=False)
示例#9
0
def test_numeric_columns_equal_rel():
    data = """a|b|expected
1|1|True
2|2.1|True
3|4|False
4|NULL|False
NULL|4|False
NULL|NULL|True"""
    df = pd.read_csv(six.StringIO(data), sep="|")
    actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
示例#10
0
def test_date_columns_unequal():
    """I want datetime fields to match with dates stored as strings
    """
    df = pd.DataFrame([{"a": "2017-01-01", "b": "2017-01-02"}, {"a": "2017-01-01"}])
    df["a_dt"] = pd.to_datetime(df["a"])
    df["b_dt"] = pd.to_datetime(df["b"])
    assert datacompy.columns_equal(df.a, df.a_dt).all()
    assert datacompy.columns_equal(df.b, df.b_dt).all()
    assert datacompy.columns_equal(df.a_dt, df.a).all()
    assert datacompy.columns_equal(df.b_dt, df.b).all()
    assert not datacompy.columns_equal(df.b_dt, df.a).any()
    assert not datacompy.columns_equal(df.a_dt, df.b).any()
    assert not datacompy.columns_equal(df.a, df.b_dt).any()
    assert not datacompy.columns_equal(df.b, df.a_dt).any()
示例#11
0
def test_bad_date_columns():
    """If strings can't be coerced into dates then it should be false for the
    whole column.
    """
    df = pd.DataFrame([{
        'a': '2017-01-01',
        'b': '2017-01-01'
    }, {
        'a': '2017-01-01',
        'b': '217-01-01'
    }])
    df['a_dt'] = pd.to_datetime(df['a'])
    assert not datacompy.columns_equal(df.a_dt, df.b).any()
示例#12
0
def test_date_columns_equal():
    data = '''a|b|expected
2017-01-01|2017-01-01|True
2017-01-02|2017-01-02|True
2017-10-01|2017-10-10|False
2017-01-01||False
|2017-01-01|False
||True'''
    df = pd.read_csv(six.StringIO(data), sep='|')
    #First compare just the strings
    actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2)
    expect_out = df['expected']
    assert_series_equal(expect_out, actual_out, check_names=False)

    #Then compare converted to datetime objects
    df['a'] = pd.to_datetime(df['a'])
    df['b'] = pd.to_datetime(df['b'])
    actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2)
    expect_out = df['expected']
    assert_series_equal(expect_out, actual_out, check_names=False)
    #and reverse
    actual_out_rev = datacompy.columns_equal(df.b, df.a, rel_tol=0.2)
    assert_series_equal(expect_out, actual_out_rev, check_names=False)
示例#13
0
def test_mixed_column():
    df = pd.DataFrame(
        [
            {"a": "hi", "b": "hi", "expected": True},
            {"a": 1, "b": 1, "expected": True},
            {"a": np.inf, "b": np.inf, "expected": True},
            {"a": Decimal("1"), "b": Decimal("1"), "expected": True},
            {"a": 1, "b": "1", "expected": False},
            {"a": 1, "b": "yo", "expected": False},
        ]
    )
    actual_out = datacompy.columns_equal(df.a, df.b)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
示例#14
0
def test_infinity_and_beyond():
    df = pd.DataFrame(
        [
            {"a": np.inf, "b": np.inf, "expected": True},
            {"a": -np.inf, "b": -np.inf, "expected": True},
            {"a": -np.inf, "b": np.inf, "expected": False},
            {"a": np.inf, "b": -np.inf, "expected": False},
            {"a": 1, "b": 1, "expected": True},
            {"a": 1, "b": 0, "expected": False},
        ]
    )
    actual_out = datacompy.columns_equal(df.a, df.b)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
示例#15
0
def test_decimal_columns_equal_rel():
    df = pd.DataFrame(
        [
            {"a": Decimal("1"), "b": Decimal("1"), "expected": True},
            {"a": Decimal("1.3"), "b": Decimal("1.3"), "expected": True},
            {"a": Decimal("1.000003"), "b": Decimal("1.000003"), "expected": True},
            {"a": Decimal("1.000000004"), "b": Decimal("1.000000003"), "expected": True},
            {"a": Decimal("1.3"), "b": Decimal("1.2"), "expected": False},
            {"a": np.nan, "b": np.nan, "expected": True},
            {"a": np.nan, "b": Decimal("1"), "expected": False},
            {"a": Decimal("1"), "b": np.nan, "expected": False},
        ]
    )
    actual_out = datacompy.columns_equal(df.a, df.b, abs_tol=0.001)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
示例#16
0
def test_date_columns_unequal():
    """I want datetime fields to match with dates stored as strings
    """
    df = pd.DataFrame([{
        'a': '2017-01-01',
        'b': '2017-01-02'
    }, {
        'a': '2017-01-01'
    }])
    df['a_dt'] = pd.to_datetime(df['a'])
    df['b_dt'] = pd.to_datetime(df['b'])
    assert datacompy.columns_equal(df.a, df.a_dt).all()
    assert datacompy.columns_equal(df.b, df.b_dt).all()
    assert datacompy.columns_equal(df.a_dt, df.a).all()
    assert datacompy.columns_equal(df.b_dt, df.b).all()
    assert not datacompy.columns_equal(df.b_dt, df.a).any()
    assert not datacompy.columns_equal(df.a_dt, df.b).any()
    assert not datacompy.columns_equal(df.a, df.b_dt).any()
    assert not datacompy.columns_equal(df.b, df.a_dt).any()
示例#17
0
def test_string_columns_equal_with_ignore_spaces():
    data = """a|b|expected
Hi|Hi|True
Yo|Yo|True
Hey|Hey |True
résumé|resume|False
résumé|résumé|True
💩|💩|True
💩|🤔|False
 | |True
  |       |True
datacompy|DataComPy|False
something||False
|something|False
||True"""
    df = pd.read_csv(six.StringIO(data), sep="|")
    actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2, ignore_spaces=True)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
示例#18
0
def test_string_columns_equal():
    data = '''a|b|expected
Hi|Hi|True
Yo|Yo|True
Hey|Hey |False
résumé|resume|False
résumé|résumé|True
💩|💩|True
💩|🤔|False
 | |True
  | |False
datacompy|DataComPy|False
something||False
|something|False
||True'''
    df = pd.read_csv(six.StringIO(data), sep='|')
    actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2)
    expect_out = df['expected']
    assert_series_equal(expect_out, actual_out, check_names=False)
示例#19
0
def test_decimal_columns_equal_rel():
    df = pd.DataFrame([{
        'a': Decimal('1'),
        'b': Decimal('1'),
        'expected': True
    }, {
        'a': Decimal('1.3'),
        'b': Decimal('1.3'),
        'expected': True
    }, {
        'a': Decimal('1.000003'),
        'b': Decimal('1.000003'),
        'expected': True
    }, {
        'a': Decimal('1.000000004'),
        'b': Decimal('1.000000003'),
        'expected': True
    }, {
        'a': Decimal('1.3'),
        'b': Decimal('1.2'),
        'expected': False
    }, {
        'a': np.nan,
        'b': np.nan,
        'expected': True
    }, {
        'a': np.nan,
        'b': Decimal('1'),
        'expected': False
    }, {
        'a': Decimal('1'),
        'b': np.nan,
        'expected': False
    }])
    actual_out = datacompy.columns_equal(df.a, df.b, abs_tol=0.001)
    expect_out = df['expected']
    assert_series_equal(expect_out, actual_out, check_names=False)