def test_infile(): test_et = et.ExtractTable() test_et.infile = good_inf1 assert test_et.infile == good_inf1 with pytest.raises(Exception): test_et.value = good_val1a with pytest.raises(Exception): test_et.column = bad_col with pytest.raises(Exception): test_et.infile = bad_inf with pytest.raises(Exception): test_et.infile = good_inf2 extract = test_et.extract() with pytest.raises(Exception): test_et.infile = zip_inf test_et2 = et.ExtractTable() test_et2.infile = zip_inf with pytest.raises(Exception): test_et.value = good_val1a with pytest.raises(Exception): test_et.column = bad_col with pytest.raises(Exception): test_et.infile = bad_inf with pytest.raises(Exception): test_et.infile = good_inf2 extract = test_et2.extract()
def test_constructor_errors(): with pytest.raises(Exception): test_et = et.ExtractTable(bad_inf) with pytest.raises(Exception): test_et = et.ExtractTable(good_inf1, None, bad_col) with pytest.raises(Exception): test_et = et.ExtractTable(good_inf1, None, bad_col, bad_val) with pytest.raises(Exception): test_et = et.ExtractTable(good_inf1, None, good_col1a, bad_val)
def test_compare_column_sums(): df1 = pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]], columns=['COL1', 'COL2', 'COL3']) df2 = pd.DataFrame(data=[[4, 5], [1, 2]], columns=['col2', 'col1']) df3 = pd.DataFrame(data=[['asdf', 'fdsa'], ['foo', 'bar']], columns=['c1', 'c2']) with pytest.raises(Exception): results = dq.compare_column_sums(pd.DataFrame(), pd.DataFrame(), 'asdf', 'asdf') with pytest.raises(Exception): results = dq.compare_column_sums(pd.DataFrame(), df1, 'asdf', 'asdf') with pytest.raises(Exception): results = dq.compare_column_sums(df1, df2, 'col2', 'COL1') with pytest.raises(Exception): results = dq.compare_column_sums(df1, df2, ['col2'], ['COL1']) with pytest.raises(Exception): results = dq.compare_column_sums(df1, df2, df1.columns, df2.columns) with pytest.raises(Exception): results = dq.compare_column_sums(df1, df2, ['COL1', 'COL2'], ['col1', 'col3']) with pytest.raises(Exception): results = dq.compare_column_sums(df1, df2, 'COL1', ['col1']) with pytest.raises(Exception): results = dq.compare_column_sums(df1, df3, 'COL1', 'c1') results = dq.compare_column_sums(df1, df2, ['COL1'], ['col1']) assert results == [('COL1 [vs] col1', -2)] results = dq.compare_column_sums(df1, df2, ['COL1', 'COL3'], ['col1', 'col2']) assert results == [('COL1 [vs] col1', -2), ('COL3 [vs] col2', 4)] mggg_gdf1 = et.ExtractTable(mggg_gdf, column='PRECINCT').extract() medsl_df1 = et.ExtractTable(medsl_df, column='precinct').extract() mggg_cols = ['AG18D', 'AG18R', 'COMP18D'] medsl_cols = [ 'Attorney General democrat', 'Attorney General republican', 'Comptroller democrat' ] results = dq.compare_column_sums(mggg_gdf1, medsl_df1, mggg_cols, medsl_cols) mggg_sums = dq.sum_column_values(mggg_gdf1, mggg_cols) medsl_sums = dq.sum_column_values(medsl_df, medsl_cols) to_comp = list( map( lambda tup1, tup2: ('{} [vs] {}'.format(tup1[0], tup2[0]), (tup1[1] - tup2[1])), mggg_sums, medsl_sums)) assert set(results) == set(to_comp)
def test_list_values(): test_et = et.ExtractTable() with pytest.raises(Exception): cols = test_et.list_values() test_et.infile = good_inf1 vals = test_et.list_values(good_col1a) assert type(vals) == np.ndarray assert (vals == np.array(full_vals1)).all() vals = test_et.list_values(good_col1a, unique=True) assert type(vals) == np.ndarray assert set(vals) == set(np.unique(np.array(full_vals1, dtype=object))) test_et.column = good_col1b vals = test_et.list_values(good_col1a) assert type(vals) == np.ndarray assert (vals == np.array(full_vals1)).all() vals = test_et.list_values(good_col1a, unique=True) assert type(vals) == np.ndarray assert set(vals) == set(np.unique(np.array(full_vals1, dtype=object))) test_et.column = good_col1a vals = test_et.list_values() assert type(vals) == np.ndarray assert (vals == np.array(full_vals1)).all() vals = test_et.list_values(unique=True) assert type(vals) == np.ndarray assert set(vals) == set(np.unique(np.array(full_vals1, dtype=object)))
def test_outfile(): test_et = et.ExtractTable(good_inf1) test_et.outfile = good_out assert test_et.outfile == PosixPath(good_out) test_et.outfile = dne_out assert test_et.outfile == PosixPath(dne_out)
def test_empty_constructor(): test_et = et.ExtractTable() assert test_et.infile is None assert test_et.outfile is None assert test_et.column is None assert test_et.value is None with pytest.raises(Exception): extracted = test_et.extract()
def test_value(): test_et = et.ExtractTable() test_et.infile = good_inf1 test_et.column = good_col1a test_et.value = good_val1a assert test_et.value == good_val1a test_et.value = 'b' assert test_et.value == 'b' extract = test_et.extract()
def test_column(): test_et = et.ExtractTable() test_et.infile = good_inf1 test_et.column = good_col1a assert test_et.column == good_col1a with pytest.raises(Exception): test_et.value = bad_val with pytest.raises(Exception): test_et.column = bad_col extract = test_et.extract()
def test_constructor(): test_et = et.ExtractTable(good_inf1) assert test_et.infile == good_inf1 assert test_et.outfile is None assert test_et.column is None assert test_et.value is None test_et = et.ExtractTable(good_inf1, good_out) assert test_et.infile == good_inf1 assert test_et.outfile == PosixPath(good_out) assert test_et.column is None assert test_et.value is None test_et = et.ExtractTable(good_inf1, good_out, good_col1a) assert test_et.infile == good_inf1 assert test_et.outfile == PosixPath(good_out) assert test_et.column == good_col1a assert test_et.value is None test_et = et.ExtractTable(good_inf1, good_out, good_col1a, good_val1a) assert test_et.infile == good_inf1 assert test_et.outfile == PosixPath(good_out) assert test_et.column == good_col1a assert test_et.value == good_val1a test_et = et.ExtractTable(good_inf1, None, good_col1a) assert test_et.infile == good_inf1 assert test_et.outfile == None assert test_et.column == good_col1a assert test_et.value == None test_et = et.ExtractTable(good_inf1, column=good_col1a) assert test_et.infile == good_inf1 assert test_et.outfile == None assert test_et.column == good_col1a assert test_et.value == None test_et = et.ExtractTable(good_inf1, column=good_col1a, value=good_val1a) assert test_et.infile == good_inf1 assert test_et.outfile == None assert test_et.column == good_col1a assert test_et.value == good_val1a test_et = et.ExtractTable(good_inf1, None, good_col1a, good_vals1a) assert test_et.infile == good_inf1 assert test_et.outfile == None assert test_et.column == good_col1a assert test_et.value == good_vals1a
def test_extract_to_file(): del_outs() test_et = et.ExtractTable(good_inf1, good_out) test_et.extract_to_file() assert os.path.isfile(good_out) test_et.outfile = dne_out assert not os.path.isfile(dne_out) assert not os.path.isdir(dne_dir) test_et.extract_to_file() assert os.path.isfile(dne_out) del_outs()
def test_list_columns(): test_et = et.ExtractTable() with pytest.raises(Exception): cols = test_et.list_columns() test_et = et.read_file(good_inf1) cols = test_et.list_columns() assert type(cols) == np.ndarray assert (cols == np.array(full_cols1)).all() test_et = et.read_file(good_inf2) cols = test_et.list_columns() assert type(cols) == np.ndarray assert (cols == np.array(full_cols2, dtype=object)).all()
def test_setters_2(): test_et = et.ExtractTable() test_et.infile = good_inf1 test_et.column = good_col1a test_et.value = good_val1a assert test_et.column == good_col1a assert test_et.value == good_val1a test_et.column = good_col1b assert test_et.column == good_col1b assert test_et.value is None test_et.value = good_val1b assert test_et.value == good_val1b test_et2 = et.ExtractTable() test_et2.infile = good_inf2 test_et2.column = good_col2 test_et2.value = good_val2 assert test_et2.infile == good_inf2 assert test_et2.column == good_col2 assert test_et2.value == good_val2
def test_extract(): test_et = et.ExtractTable(good_inf1) gdf1 = gpd.read_file(good_inf1) gdf1 = gdf1.rename(columns={'field_1': 'Unnamed: 0'}) # Note: gpd has inconsistent naming compared with pd extract = test_et.extract() assert type(extract) == gpd.GeoDataFrame assert extract.equals(gdf1) test_et.column = good_col1a gdf1 = gdf1.set_index(good_col1a) extract = test_et.extract() assert type(extract) == gpd.GeoDataFrame assert extract.equals(gdf1) test_et.value = good_val1a gdf1 = gpd.GeoDataFrame(gdf1.loc[good_val1a]) extract = test_et.extract() assert type(extract) == gpd.GeoDataFrame assert extract.equals(gdf1)
def test_compare_column_values(): # remove 'no' prefix once ready to test df1 = pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]], columns=['COL1', 'COL2', 'COL3']) df2 = pd.DataFrame(data=[[4, 5], [1, 2]], columns=['col2', 'col1']) df3 = pd.DataFrame(data=[['asdf', 'fdsa'], ['foo', 'bar']], columns=['c1', 'c2']) with pytest.raises(Exception): results = dq.compare_column_values(pd.DataFrame(), pd.DataFrame(), 'asdf', 'asdf') with pytest.raises(Exception): results = dq.compare_column_values(pd.DataFrame(), df1, 'asdf', 'asdf') with pytest.raises(Exception): results = dq.compare_column_values(df1, df2, 'col2', 'COL1') with pytest.raises(Exception): results = dq.compare_column_values(df1, df2, ['col2'], ['COL1']) with pytest.raises(Exception): results = dq.compare_column_values(df1, df2, 'COL1', 'col2') with pytest.raises(Exception): results = dq.compare_column_values(df1, df2, df1.columns, df2.columns) with pytest.raises(Exception): results = dq.compare_column_values(df1, df2, ['COL1', 'COL2'], ['col1', 'col2'], [1], ['adsf']) with pytest.raises(Exception): results = dq.compare_column_values(df1, df2, ['COL1'], ['col1'], [1], [-1]) with pytest.raises(Exception): results = dq.compare_column_values(df1, df3, ['COL1'], ['c1']) with pytest.raises(Exception): results = dq.compare_column_values(df1, df2, ['COL1'], ['col2'], [0, 1], [1]) with pytest.raises(Exception): results = dq.compare_column_values(df1, df2, ['COL1'], ['col2'], [0, 1], [0, 5]) results = dq.compare_column_values(df1, df2, ['COL1'], ['col1']) assert results == {'COL1 [vs] col1': [('0 [vs] 0', -4), ('1 [vs] 1', 2)]} results = dq.compare_column_values(df1, df2, ['COL3'], ['col2']) assert results == {'COL3 [vs] col2': [('0 [vs] 0', -1), ('1 [vs] 1', 5)]} results = dq.compare_column_values(df1, df2, ['COL1', 'COL2'], ['col1', 'col2']) assert results == { 'COL1 [vs] col1': [('0 [vs] 0', -4), ('1 [vs] 1', 2)], 'COL2 [vs] col2': [('0 [vs] 0', -2), ('1 [vs] 1', 4)] } results = dq.compare_column_values(df1, df2, ['COL1'], ['col1'], [0], [1]) assert results == {'COL1 [vs] col1': [('0 [vs] 1', -1)]} results = dq.compare_column_values(df1, df2, ['COL1'], ['col1'], [0, 1], [1, 0]) assert results == {'COL1 [vs] col1': [('0 [vs] 1', -1), ('1 [vs] 0', -1)]} results = dq.compare_column_values(df1, df1, ['COL1'], ['COL2'], [0], [0]) assert results == {'COL1 [vs] COL2': [('0 [vs] 0', -1)]} mggg_gdf1 = et.ExtractTable(mggg_gdf, column='PRECINCT').extract() medsl_df1 = et.ExtractTable(medsl_df, column='precinct').extract() results = dq.compare_column_values( mggg_gdf1, medsl_df1, ['AG18D'], ['Attorney General democrat'], ['Plainfield - DISTRICT 1-1-1a Town Hall'], ['1a Town Hall']) _, diff = results['AG18D [vs] Attorney General democrat'][0] ct_et = et.ExtractTable(mggg_gdf, column='PRECINCT', value='Plainfield - DISTRICT 1-1-1a Town Hall') medsl_et = et.ExtractTable(medsl_df, column='precinct', value='1a Town Hall') assert diff == abs(ct_et.extract()['AG18D'][0] - medsl_et.extract()['Attorney General democrat'][0])
def notest_large(): large_file = '' test_et = et.ExtractTable(large_file, 'tests/dumps/large.zip', column='NAME10') test_et.extract_to_file()