def test_generate_samples(self): participant_id = self.send_post('Participant', {})['participantId'] self.send_consent(participant_id) self.send_post( 'Participant/%s/BiobankOrder' % participant_id, load_biobank_order_json( from_client_participant_id(participant_id))) # Sanity check that the orders were created correctly. bo_dao = BiobankOrderDao() self.assertEquals(1, bo_dao.count()) order = bo_dao.get_all()[0] self.assertEquals( 11, len(bo_dao.get_with_children(order.biobankOrderId).samples)) self.send_post('DataGen', { 'create_biobank_samples': True, 'samples_missing_fraction': 0.0 }) upsert_from_latest_csv( ) # Run the (usually offline) Biobank CSV import job. self.assertEquals(11, BiobankStoredSampleDao().count()) ps = ParticipantSummaryDao().get( from_client_participant_id(participant_id)) self.assertEquals(SampleStatus.RECEIVED, ps.samplesToIsolateDNA) self.assertEquals(10, ps.numBaselineSamplesArrived)
def test_end_to_end(self): dao = BiobankStoredSampleDao() self.assertEquals(dao.count(), 0) # Create 3 participants and pass their (random) IDs into sample rows. summary_dao = ParticipantSummaryDao() biobank_ids = [] participant_ids = [] for _ in xrange(3): participant = self.participant_dao.insert(Participant()) summary_dao.insert(self.participant_summary(participant)) participant_ids.append(participant.participantId) biobank_ids.append(participant.biobankId) self.assertEquals( summary_dao.get( participant.participantId).numBaselineSamplesArrived, 0) test1, test2, test3 = random.sample(_BASELINE_TESTS, 3) samples_file = test_data.open_biobank_samples(*biobank_ids, test1=test1, test2=test2, test3=test3) input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central( clock.CLOCK.now()).strftime( biobank_samples_pipeline.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file.read()) biobank_samples_pipeline.upsert_from_latest_csv() self.assertEquals(dao.count(), 3) self._check_summary(participant_ids[0], test1, '2016-11-29T12:19:32') self._check_summary(participant_ids[1], test2, '2016-11-29T12:38:58') self._check_summary(participant_ids[2], test3, '2016-11-29T12:41:26')
def test_old_csv_not_imported(self): now = clock.CLOCK.now() too_old_time = now - datetime.timedelta(hours=25) input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central( too_old_time).strftime( biobank_samples_pipeline.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, '') with self.assertRaises(biobank_samples_pipeline.DataError): biobank_samples_pipeline.upsert_from_latest_csv()
def test_end_to_end(self): dao = BiobankStoredSampleDao() self.assertEquals(dao.count(), 0) # Create 3 participants and pass their (random) IDs into sample rows. summary_dao = ParticipantSummaryDao() biobank_ids = [] participant_ids = [] nids = 16 # equal to the number of parent rows in 'biobank_samples_1.csv' cids = 1 # equal to the number of child rows in 'biobank_samples_1.csv' for _ in xrange(nids): participant = self.participant_dao.insert(Participant()) summary_dao.insert(self.participant_summary(participant)) participant_ids.append(participant.participantId) biobank_ids.append(participant.biobankId) self.assertEquals( summary_dao.get( participant.participantId).numBaselineSamplesArrived, 0) test_codes = random.sample(_BASELINE_TESTS, nids) samples_file = test_data.open_biobank_samples(biobank_ids=biobank_ids, tests=test_codes) lines = samples_file.split('\n')[1:] # remove field name line input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central( clock.CLOCK.now()).strftime( biobank_samples_pipeline.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) biobank_samples_pipeline.upsert_from_latest_csv() self.assertEquals(dao.count(), nids - cids) for x in range(0, nids): cols = lines[x].split('\t') if cols[10].strip(): # skip child sample continue # If status is 'In Prep', then sample confirmed timestamp should be empty if cols[2] == 'In Prep': self.assertEquals(len(cols[11]), 0) else: status = SampleStatus.RECEIVED ts_str = cols[11] # DA-814 - Participant Summary test status should be: Unset, Received or Disposed only. # If sample is disposed, then check disposed timestamp, otherwise check confirmed timestamp. # DA-871 - Only check status is disposed when reason code is a bad disposal. if cols[2] == 'Disposed' and get_sample_status_enum_value( cols[8]) > SampleStatus.UNKNOWN: status = SampleStatus.DISPOSED ts_str = cols[9] ts = datetime.datetime.strptime(ts_str, '%Y/%m/%d %H:%M:%S') self._check_summary(participant_ids[x], test_codes[x], ts, status)
def import_biobank_samples(): # Note that crons always have a 10 minute deadline instead of the normal 60s; additionally our # offline service uses basic scaling with has no deadline. logging.info('Starting samples import.') written, timestamp = biobank_samples_pipeline.upsert_from_latest_csv() logging.info('Import complete (%d written), generating report.', written) logging.info('Generating reconciliation report.') biobank_samples_pipeline.write_reconciliation_report(timestamp) logging.info('Generated reconciliation report.') return json.dumps({'written': written})