def test_officer_allegation_dataframe_is_filled_when_load_officer_into_db(self): testLoader = Loader() df = pd.DataFrame([{ "cr_id": '1008899', "number_of_officer_rows": 1, "beat_id": 433, "officer_race": ['White'], "officer_gender": ['M'], "officer_age": ['40-49'], "officer_years_on_force": ['0-4'] }]) for panda_row in df.itertuples(): testLoader.load_officers_into_db(1, "1008899", panda_row) actual_data = testLoader.new_officer_allegation_data expected_data = pd.DataFrame([ { "allegation_id": '1008899', "recc_finding": "NA", "recc_outcome": "NA", "final_finding": "NA", "final_outcome": "NA", "final_outcome_class": "NA", } ]) assert_frame_equal(actual_data, expected_data, check_like=True)
def test_unknown_officer_dataframe_is_filled_when_load_officer_into_db(self): testLoader = Loader() df = pd.DataFrame([{ "cr_id": '1008899', "number_of_officer_rows": 1, "beat_id": 433, "officer_race": ['White'], "officer_gender": ['M'], "officer_age": ['40-49'], "officer_years_on_force": ['0-4'] }]) for panda_row in df.itertuples(): testLoader.load_officers_into_db(1, "1008899", panda_row) actual_data = testLoader.new_officer_unknown_data expected_data = pd.DataFrame([ { "data_officerallegation_id": 1, "age": '40-49', "race": 'White', "gender": 'M', "years_on_force": '0-4' } ]) assert_frame_equal(actual_data, expected_data, check_like=True, check_dtype=False)
def test_get_new_data(self): expected_new_data = expected_load_data testLoader = Loader() testLoader.load_into_db(expected_transformed_data_with_beat_id) new_data = testLoader.get_new_allegation_data() assert_frame_equal(new_data, expected_new_data, check_dtype=False, check_like=True)
def test_get_matches(self): expected_matches = expected_load_data testLoader = Loader() testLoader.load_into_db(expected_transformed_data_with_beat_id) testLoader.load_into_db(expected_transformed_data_with_beat_id) matches = testLoader.get_allegation_matches() assert_frame_equal(matches, expected_matches, check_dtype=False, check_like=True)
def test_load_data_with_matches_into_database(self): insert_allegation_into_database(DataAllegation(cr_id="1087378")) insert_allegation_into_database(DataAllegation(cr_id="1087387")) expected_matches = [ transformed_data_with_rows.iloc[1], transformed_data_with_rows.iloc[2] ] expected_new_data = [ pd.Series(transformed_data.iloc[0][0]), pd.Series(transformed_data.iloc[3][0]), pd.Series(transformed_data.iloc[4][0]) ] testLoader = Loader() testLoader.load_into_db(transformed_data_with_rows) matches = testLoader.get_matches() assert (pd.Series(expected_matches[0][0]).equals(matches[0])) assert (pd.Series(expected_matches[1][0]).equals(matches[1])) new_data = testLoader.get_new_data() assert (expected_new_data[0].equals(new_data[0])) assert (expected_new_data[1].equals(new_data[1])) assert (expected_new_data[2].equals(new_data[2])) queried_officer_data = DataOfficerAllegation.query.all() testLoader.load_into_db(transformed_data_with_rows) assert (len(queried_officer_data) == len( DataOfficerAllegation.query.all()))
def copa_scrape(): scraped_data = scrape_data() transformer = CopaScrapeTransformer() transformer.transform(scraped_data) transformed_data = transformer.get_transformed_data() loader = Loader() loader.load_into_db(transformed_data) saver = Saver() saver.save_to_csv( strip_zeroes_from_beat_id(loader.get_allegation_matches()), "match_data.csv") saver.save_to_csv( strip_zeroes_from_beat_id(loader.get_new_allegation_data()), "new_allegation_data.csv") saver.save_to_csv( cast_col_to_int(loader.new_officer_unknown_data, "data_officerallegation_id"), "new_officer_unknown.csv") saver.save_to_csv(loader.new_officer_allegation_data, "new_officer_allegation.csv") # do further processing on officer unknown return Response(status=200, response='Success')
def test_load_data_into_empty_database(self): Loader().load_into_db(expected_transformed_data_with_beat_id) queried_allegation_data = DataAllegation.query.all() assert (len(queried_allegation_data) == len(transformed_data_with_beat_id)) assert (queried_allegation_data[0].cr_id == transformed_data_with_beat_id.cr_id[0]) assert (queried_allegation_data[4].cr_id == transformed_data_with_beat_id.cr_id[4]) queried_officer_data = DataOfficerAllegation.query.all() assert (len(queried_officer_data) == expected_transformed_data_with_beat_id['number_of_officer_rows'].sum()) fourth_cr_id = transformed_data_with_beat_id['cr_id'][2] assert (queried_officer_data[3].allegation_id == fourth_cr_id)
def copa_scrape(): scraped_data = scrape_data() transformer = CopaScrapeTransformer() transformer.transform(scraped_data) transformed_data = transformer.get_transformed_data() loader = Loader() loader.load_into_db(transformed_data) saver = Saver() saver.save_to_csv(loader.get_new_data(), "new_data.csv") saver.save_to_csv(loader.get_matches(), "match_data.csv") return Response(status=200, response='Success')
def test_load_data_with_beat_id(self): testLoader = Loader() testLoader.load_into_db(transformed_data_with_beat_id) queried_data_allegation = DataAllegation.query.all() assert (queried_data_allegation[0].beat_id == 111)
def test_load_officer_data(self): testLoader = Loader() testLoader.load_into_db(expected_transformed_data_with_beat_id) queried_data_officerunknown = DataOfficerUnknown.query.all() assert (len(queried_data_officerunknown) == 6)