def test_sql_date_parsing(text_files): connection_string = dc.get_connection_info( os.path.join(tests_folder, 'connection_file.ini'), 'sqlitedb') assert connection_string == "Driver=SQLite3 ODBC Driver;Database=sqlite.db" sql_texts = dc.get_sql_texts(tests_folder) result = dc.CompareDataFrame.from_sql(sql_texts['sql_use_date_prompt'], connection_string, params=['2014-04-01', '2014-04-04']) expected = dc.CompareDataFrame( { "Nums": [1, 2, 3, 4], "Chars": ["right", "b", "c", "d"], "Extra": ["one", "two", "three", "four"] }, columns=['Nums', 'Chars', 'Extra']) result.get_member_difference(expected, to_file=True) in_right_not_in_left = pd.read_csv('in_right_not_in_left.txt', sep='\t') in_left_not_in_right = pd.read_csv('in_left_not_in_right.txt', sep='\t') # Due to date filtering, expect two rows although 3 in t1 assert in_right_not_in_left.shape[0] == 2 assert in_left_not_in_right.shape[0] == 0
def test_dump_values_same(text_files): # Test that NaN/null are equivalent as required for ETL testing connection_string = dc.get_connection_info( os.path.join(tests_folder, 'connection_file.ini'), 'sqlitedb') sql_texts = dc.get_sql_texts(tests_folder) result = dc.CompareDataFrame.from_sql(sql_texts['sql_lite_nulls'], connection_string) # Side effect of above that ints and floats may not always be equal when expected expected = dc.CompareDataFrame( { 'Nums': [1, 2, 3], 'NumNulls': [np.nan, 3.3, 4.7], 'SomeInt': [1.0, 2.0, 3.0] }, columns=['Nums', 'NumNulls', 'SomeInt']) result.create_value_comparable_lists(expected, to_file=True, value_precision=1) value_difference = pd.read_csv('value_difference.txt', sep='\t') print('The different values are: {}'.format(value_difference)) assert value_difference.shape[0] == 0
def test_sql_dump_value_difference(text_files): connection_string = dc.get_connection_info(os.path.join(tests_folder, 'connection_file.ini'), 'sqlitedb') assert connection_string == "Driver=SQLite3 ODBC Driver;Database=sqlite.db" sql_text = "SELECT * FROM t1" result = dc.CompareDataFrame.from_sql(sql_text, connection_string) expected = dc.CompareDataFrame( pd.DataFrame( {"Nums": [1, 2, 3, 4], "Chars": ["right", "b", "c", "d"], "Extra": ["one", "two", "three", "four"]}, columns=['Nums', 'Chars', 'Extra'])) result.create_value_comparable_lists(expected, to_file=True) value_difference = pd.read_csv('value_difference.txt', sep='\t') assert "left | right" in value_difference['Chars_1'].values
def test_compare_value_difference(text_files): connection_string = dc.get_connection_info(os.path.join(tests_folder, 'connection_file.ini'), 'sqlitedb') assert connection_string == "Driver=SQLite3 ODBC Driver;Database=sqlite.db" sql_text = "SELECT * FROM t1" result = dc.CompareDataFrame.from_sql(sql_text, connection_string) expected = dc.CompareDataFrame( pd.DataFrame( {"Nums": [1, 2, 3, 4], "Chars": ["right", "b", "c", "d"], "Extra": ["one", "two", "three", "four"]}, columns=['Nums', 'Chars', 'Extra'])) left, right = result.create_value_comparable_lists(expected) assert left == right
def test_dump_values_same(text_files): # Test that NaN/null are equivalent as required for ETL testing connection_string = dc.get_connection_info(os.path.join(tests_folder, 'connection_file.ini'), 'sqlitedb') sql_texts = dc.get_sql_texts(tests_folder) result = dc.CompareDataFrame.from_sql(sql_texts['sql_lite_nulls'], connection_string) # Side effect of above that ints and floats may not always be equal when expected expected = dc.CompareDataFrame( {'Nums': [1, 2, 3], 'NumNulls': [np.nan, 3.3, 4.7], 'SomeInt': [1.0,2.0,3.0]}, columns=['Nums', 'NumNulls','SomeInt']) result.create_value_comparable_lists(expected, to_file=True, value_precision=1) value_difference = pd.read_csv('value_difference.txt', sep='\t') print('The different values are: {}'.format(value_difference)) assert value_difference.shape[0] == 0
def test_sql_date_parsing(text_files): connection_string = dc.get_connection_info(os.path.join(tests_folder, 'connection_file.ini'), 'sqlitedb') assert connection_string == "Driver=SQLite3 ODBC Driver;Database=sqlite.db" sql_texts = dc.get_sql_texts(tests_folder) result = dc.CompareDataFrame.from_sql(sql_texts['sql_use_date_prompt'], connection_string, params=['2014-04-01', '2014-04-04']) expected = dc.CompareDataFrame( {"Nums": [1, 2, 3, 4], "Chars": ["right", "b", "c", "d"], "Extra": ["one", "two", "three", "four"]}, columns=['Nums', 'Chars', 'Extra']) result.get_member_difference(expected, to_file=True) in_right_not_in_left = pd.read_csv('in_right_not_in_left.txt', sep='\t') in_left_not_in_right = pd.read_csv('in_left_not_in_right.txt', sep='\t') # Due to date filtering, expect two rows although 3 in t1 assert in_right_not_in_left.shape[0] == 2 assert in_left_not_in_right.shape[0] == 0
def test_sql_dump_value_difference(text_files): connection_string = dc.get_connection_info( os.path.join(tests_folder, 'connection_file.ini'), 'sqlitedb') assert connection_string == "Driver=SQLite3 ODBC Driver;Database=sqlite.db" sql_text = "SELECT * FROM t1" result = dc.CompareDataFrame.from_sql(sql_text, connection_string) expected = dc.CompareDataFrame( pd.DataFrame( { "Nums": [1, 2, 3, 4], "Chars": ["right", "b", "c", "d"], "Extra": ["one", "two", "three", "four"] }, columns=['Nums', 'Chars', 'Extra'])) result.create_value_comparable_lists(expected, to_file=True) value_difference = pd.read_csv('value_difference.txt', sep='\t') assert "left | right" in value_difference['Chars_1'].values
def test_compare_value_difference(text_files): connection_string = dc.get_connection_info( os.path.join(tests_folder, 'connection_file.ini'), 'sqlitedb') assert connection_string == "Driver=SQLite3 ODBC Driver;Database=sqlite.db" sql_text = "SELECT * FROM t1" result = dc.CompareDataFrame.from_sql(sql_text, connection_string) expected = dc.CompareDataFrame( pd.DataFrame( { "Nums": [1, 2, 3, 4], "Chars": ["right", "b", "c", "d"], "Extra": ["one", "two", "three", "four"] }, columns=['Nums', 'Chars', 'Extra'])) left, right = result.create_value_comparable_lists(expected) assert left == right
# From terminal in current directory run: # py.test -vv # that's it! import datacompare as dc connection_string = dc.get_connection_info('connection_file.ini', 'salesdb') sql_texts = dc.get_sql_texts() result = dc.CompareDataFrame.from_sql(sql_texts['example_sales_new'], connection_string, params=['2015-04-01', '2015-04-02']) expected = dc.CompareDataFrame.from_sql(sql_texts['example_sales_old'], connection_string, params=['2015-04-01', '2015-04-02']) def test_with_pytest_members_are_same(): left, right = result.get_member_difference(expected) assert left == right def test_with_pytest_values_are_same(): left, right = result.create_value_comparable_lists(expected) assert left == right