def test_get_cherrypicked_samples_test_db_connection_close(app): """ Test Scenario - Check that connection is close when we call get_cherrypicked_samples """ samples = ["MCM001", "MCM002", "MCM003", "MCM004", "MCM005"] plate_barcodes = ["123", "456"] with app.app_context(): with patch("sqlalchemy.create_engine") as mock_sql_engine: mock_db_connection = Mock() mock_sql_engine().connect.return_value = mock_db_connection get_cherrypicked_samples(samples, plate_barcodes) mock_db_connection.close.assert_called_once()
def test_get_cherrypicked_samples_repeat_tests_sentinel_and_beckman( app, freezer, mlwh_sentinel_and_beckman_cherrypicked, event_wh_data): # the following come from MLWH_SAMPLE_STOCK_RESOURCE and # MLWH_SAMPLE_LIGHTHOUSE_SAMPLE in fixture_data root_sample_ids = [ "root_1", "root_2", "root_3", "root_4", "root_5", "root_1" ] plate_barcodes = ["pb_1", "pb_2", "pb_3", "pb_4", "pb_5", "pb_6"] # root_1 will match 2 samples, but only one of those will match a Sentinel event (on pb_1) # root_2 will match a single sample with a matching Sentinel event (on pb_2) # root_3 will match 2 samples, but not match either a Sentinel or Beckman event # root_4 will match 2 samples, but only one of those will match a Beckman event (on pb_4) # root_5 will match a single sample with a matching Beckman event (on pb_5) # We also chunk to further test different scenarios expected_rows = [ ["root_1", "pb_1", "positive", "A1"], ["root_2", "pb_2", "positive", "A1"], ["root_4", "pb_4", "positive", "A1"], ["root_5", "pb_5", "positive", "A1"], ] expected_columns = [ FIELD_ROOT_SAMPLE_ID, FIELD_PLATE_BARCODE, "Result_lower", FIELD_COORDINATE ] expected = pd.DataFrame(np.array(expected_rows), columns=expected_columns, index=[0, 1, 2, 3]) with app.app_context(): chunk_size = 2 returned_samples = get_cherrypicked_samples(root_sample_ids, plate_barcodes, chunk_size) pd.testing.assert_frame_equal(expected, returned_samples)
def test_get_cherrypicked_samples_sentinel_and_beckman(app): expected = [ pd.DataFrame( [ # Sentinel "MCM001", "MCM006", # Beckman "MCM001", "MCM003", "MCM005", ], columns=[FIELD_ROOT_SAMPLE_ID], index=[0, 1, 2, 3, 4], ), # Cherrypicking query response ] samples = ["MCM001", "MCM002", "MCM003", "MCM004", "MCM005", "MCM006"] plate_barcodes = ["123", "456"] with app.app_context(): with patch("sqlalchemy.create_engine", return_value=Mock()): with patch( "pandas.read_sql", side_effect=expected, ): returned_samples = get_cherrypicked_samples( samples, plate_barcodes) assert returned_samples.at[0, FIELD_ROOT_SAMPLE_ID] == "MCM001" assert returned_samples.at[1, FIELD_ROOT_SAMPLE_ID] == "MCM006" assert returned_samples.at[2, FIELD_ROOT_SAMPLE_ID] == "MCM003" assert returned_samples.at[3, FIELD_ROOT_SAMPLE_ID] == "MCM005"
def test_get_cherrypicked_samples_repeat_tests_no_sentinel( app, mlwh_beckman_cherrypicked, event_wh_data): # the following come from MLWH_SAMPLE_LIGHTHOUSE_SAMPLE in fixture_data root_sample_ids = ["root_4", "root_5", "root_4"] plate_barcodes = ["pb_4", "pb_5", "pb_6"] # root_4 will match 2 samples, but only one of those will match an event (on sample uuid) # therefore we only get 1 of the samples called 'root_4' back (the one on plate 'pb_4') # this also checks we don't get a duplicate row for root_4 / pb_4, despite it cropped up in 2 # different 'chunks' expected_rows = [["root_4", "pb_4", "positive", "A1"], ["root_5", "pb_5", "positive", "A1"]] expected_columns = [ FIELD_ROOT_SAMPLE_ID, FIELD_PLATE_BARCODE, "Result_lower", FIELD_COORDINATE ] expected = pd.DataFrame(np.array(expected_rows), columns=expected_columns, index=[0, 1]) with app.app_context(): chunk_size = 2 returned_samples = get_cherrypicked_samples(root_sample_ids, plate_barcodes, chunk_size) # The view could be returning the rows in a different order, which we solve by sorting and # reindexing the rows for returned_samples, so we can compare with our expected frame resorted_returned_samples = returned_samples.sort_values( by=FIELD_ROOT_SAMPLE_ID, ignore_index=True) pd.testing.assert_frame_equal(expected, resorted_returned_samples)
def test_get_cherrypicked_samples_chunking_no_sentinel(app): # Note: This represents the results of three different (Sentinel, Beckman) sets of # database queries, each Sentinel query getting indexed from 0. Do not change the # indices here unless you have modified the behaviour of the query. query_results = [ pd.DataFrame(["MCM001"], columns=[FIELD_ROOT_SAMPLE_ID], index=[0]), # Cherrypicking query resp. pd.DataFrame(["MCM003"], columns=[FIELD_ROOT_SAMPLE_ID], index=[0]), # Cherrypicking query resp. pd.DataFrame(["MCM005"], columns=[FIELD_ROOT_SAMPLE_ID], index=[0]), # Cherrypicking query resp. ] expected = pd.DataFrame(["MCM001", "MCM003", "MCM005"], columns=[FIELD_ROOT_SAMPLE_ID], index=[0, 1, 2]) samples = ["MCM001", "MCM002", "MCM003", "MCM004", "MCM005"] plate_barcodes = ["123", "456"] with app.app_context(): with patch("sqlalchemy.create_engine", return_value=Mock()): with patch( "pandas.read_sql", side_effect=query_results, ): returned_samples = get_cherrypicked_samples( samples, plate_barcodes, 2) pd.testing.assert_frame_equal(expected, returned_samples)
def test_get_cherrypicked_samples_repeat_tests_no_beckman( app, mlwh_sentinel_cherrypicked, event_wh_data): # the following come from MLWH_SAMPLE_STOCK_RESOURCE in fixture_data root_sample_ids = ["root_1", "root_2", "root_1"] plate_barcodes = ["pb_1", "pb_2", "pb_3"] # root_1 will match 2 samples, but only one of those will match an event (on Sanger Sample Id) # therefore we only get 1 of the samples called 'root_1' back (the one on plate 'pb_1') # this also checks we don't get a duplicate row for root_1 / pb_1, despite it cropped up in 2 # different 'chunks' expected_rows = [["root_1", "pb_1", "positive", "A1"], ["root_2", "pb_2", "positive", "A1"]] expected_columns = [ FIELD_ROOT_SAMPLE_ID, FIELD_PLATE_BARCODE, "Result_lower", FIELD_COORDINATE ] expected = pd.DataFrame(np.array(expected_rows), columns=expected_columns, index=[0, 1]) with app.app_context(): chunk_size = 2 returned_samples = get_cherrypicked_samples(root_sample_ids, plate_barcodes, chunk_size) print(returned_samples) pd.testing.assert_frame_equal(expected, returned_samples)
def test_get_cherrypicked_samples(app, freezer): expected = pd.DataFrame( ["MCM001", "MCM003", "MCM005"], columns=[FIELD_ROOT_SAMPLE_ID], index=[0, 1, 2] ) samples = ["MCM001", "MCM002", "MCM003", "MCM004", "MCM005"] plate_barcodes = ["123", "456"] with app.app_context(): with patch("sqlalchemy.create_engine", return_value=Mock()): with patch( "pandas.read_sql", return_value=expected, ): returned_samples = get_cherrypicked_samples(samples, plate_barcodes) assert returned_samples.at[0, FIELD_ROOT_SAMPLE_ID] == "MCM001" assert returned_samples.at[1, FIELD_ROOT_SAMPLE_ID] == "MCM003" assert returned_samples.at[2, FIELD_ROOT_SAMPLE_ID] == "MCM005"