def testDeidentifiedExport_participantIds(self): TableExporter.export_tables('rdr', ['ppi_participant_view'], 'dir', deidentify=True) p1 = self._participant_with_defaults( participantId=1, version=2, biobankId=2, providerLink=make_primary_provider_link_for_name('PITT')) ParticipantDao().insert(p1) p2 = self._participant_with_defaults( participantId=2, version=3, biobankId=3, providerLink=make_primary_provider_link_for_name('PITT')) ParticipantDao().insert(p2) tasks = self.taskqueue_stub.get_filtered_tasks() self.assertEqual(len(tasks), 1) csv_path = deferred.run(tasks[0].payload) with cloudstorage_api.open('/' + csv_path, mode='r') as output: reader = csv.reader(output) rows = list(reader)[1:] self.assertEqual(2, len(rows)) pmi_ids = set([p1.participantId, p2.participantId]) obf_ids = set([row[0] for row in rows]) self.assertFalse(pmi_ids.intersection(obf_ids), 'should be no overlap between pmi_ids and obfuscated IDs') self.assertEquals(2, len(obf_ids))
def setUp(self, use_mysql=True, with_data=True): super(DvOrderApiTestBase, self).setUp(use_mysql=use_mysql, with_data=with_data) self.test_data = { "subject": "Patient/P123456789", "awardee": "PITT", "organization": "PITT_BANNER_HEALTH", "patient_status": "YES", "user": "******", "site": "hpo-site-monroeville", "authored": "2019-04-26T12:11:41", "comment": "This is comment" } self.participant_dao = ParticipantDao() self.summary_dao = ParticipantSummaryDao() self.hpo_dao = HPODao() self.hpo = self.hpo_dao.get_by_name('PITT') self.participant = Participant(hpoId=self.hpo.hpoId, participantId=123456789, biobankId=7) self.participant_dao.insert(self.participant) self.summary = self.participant_summary(self.participant) self.summary_dao.insert(self.summary)
def test_validation_no_answer(self): self.participant = Participant(participantId=123, biobankId=555) ParticipantDao().insert(self.participant) self.participant_id = to_client_participant_id( self.participant.participantId) summary = ParticipantSummaryDao().insert( self.participant_summary(self.participant)) result = check_ppi_data_api._get_validation_result( summary.email, {FIRST_NAME_QUESTION_CODE: 'NotAnswered'}) self.assertEquals(1, result.tests_count) self.assertEquals(1, result.errors_count) self.assertEquals(1, len(result.messages)) self.assertIn(FIRST_NAME_QUESTION_CODE, result.messages[0]) # test using phone number as lookup value in API. summary.loginPhoneNumber = '5555555555' ParticipantSummaryDao().update(summary) result = check_ppi_data_api._get_validation_result( summary.loginPhoneNumber, {FIRST_NAME_QUESTION_CODE: 'NotAnswered'}) self.assertEquals(1, result.tests_count) self.assertEquals(1, result.errors_count) self.assertEquals(1, len(result.messages)) self.assertIn(FIRST_NAME_QUESTION_CODE, result.messages[0])
def setUp(self): super(ParticipantSummaryDaoTest, self).setUp(use_mysql=True) self.dao = ParticipantSummaryDao() self.order_dao = BiobankOrderDao() self.measurement_dao = PhysicalMeasurementsDao() self.participant_dao = ParticipantDao() self.no_filter_query = Query([], None, 2, None) self.one_filter_query = Query( [FieldFilter("participantId", Operator.EQUALS, 1)], None, 2, None) self.two_filter_query = Query([ FieldFilter("participantId", Operator.EQUALS, 1), FieldFilter("hpoId", Operator.EQUALS, PITT_HPO_ID) ], None, 2, None) self.ascending_biobank_id_query = Query([], OrderBy("biobankId", True), 2, None) self.descending_biobank_id_query = Query([], OrderBy("biobankId", False), 2, None) self.enrollment_status_order_query = Query([], OrderBy( "enrollmentStatus", True), 2, None) self.hpo_id_order_query = Query([], OrderBy("hpoId", True), 2, None) self.first_name_order_query = Query([], OrderBy("firstName", True), 2, None)
def generate_samples(fraction_missing): """Creates fake sample CSV data in GCS. Args: fraction_missing: This many samples which exist as BiobankStoredSamples will not have rows generated in the fake CSV. """ bucket_name = config.getSetting(config.BIOBANK_SAMPLES_BUCKET_NAME) now = clock.CLOCK.now() file_name = '/%s/fake_%s.csv' % (bucket_name, now.strftime(INPUT_CSV_TIME_FORMAT)) num_rows = 0 sample_id_start = random.randint(1000000, 10000000) with cloudstorage_api.open(file_name, mode='w') as dest: writer = csv.writer(dest, delimiter="\t") writer.writerow(_HEADERS) biobank_order_dao = BiobankOrderDao() with biobank_order_dao.session() as session: rows = biobank_order_dao.get_ordered_samples_sample( session, 1 - fraction_missing, _BATCH_SIZE) for biobank_id, collected_time, test in rows: if collected_time is None: logging.warning( 'biobank_id=%s test=%s skipped (collected=%s)', biobank_id, test, collected_time) continue minutes_delta = random.randint( 0, _MAX_MINUTES_BETWEEN_SAMPLE_COLLECTED_AND_CONFIRMED) confirmed_time = collected_time + datetime.timedelta( minutes=minutes_delta) writer.writerow([ sample_id_start + num_rows, None, # no parent confirmed_time.strftime(_TIME_FORMAT), to_client_biobank_id(biobank_id), test, confirmed_time.strftime(_TIME_FORMAT), 'KIT' ]) # reuse confirmed time as created time num_rows += 1 participant_dao = ParticipantDao() with participant_dao.session() as session: rows = participant_dao.get_biobank_ids_sample( session, _PARTICIPANTS_WITH_ORPHAN_SAMPLES, _BATCH_SIZE) for biobank_id, sign_up_time in rows: minutes_delta = random.randint( 0, _MAX_MINUTES_BETWEEN_PARTICIPANT_CREATED_AND_CONFIRMED) confirmed_time = sign_up_time + datetime.timedelta( minutes=minutes_delta) tests = random.sample(BIOBANK_TESTS, random.randint(1, len(BIOBANK_TESTS))) for test in tests: writer.writerow([ sample_id_start + num_rows, None, confirmed_time.strftime(_TIME_FORMAT), to_client_biobank_id(biobank_id), test, confirmed_time.strftime(_TIME_FORMAT), 'KIT' ]) num_rows += 1 logging.info("Generated %d samples in %s.", num_rows, file_name)
def setUp(self): super(MarkGhostParticipantsTest, self).setUp(use_mysql=True) NdbTestBase.doSetUp(self) TestBase.setup_fake(self) config.override_setting(config.GHOST_ID_BUCKET, [_FAKE_BUCKET]) self.participant_dao = ParticipantDao() self.p_history = ParticipantHistoryDao()
def setUp(self, **kwargs): super(MetricsEhrApiTestBase, self).setUp(use_mysql=True, **kwargs) self.dao = ParticipantDao() self.ps_dao = ParticipantSummaryDao() self.ehr_receipt_dao = EhrReceiptDao() self.ps = ParticipantSummary() self.calendar_dao = CalendarDao() self.site_dao = SiteDao() self.hpo_dao = HPODao() self.org_dao = OrganizationDao() self.hpo_test = self._make_hpo(hpoId=TEST_HPO_ID, name=TEST_HPO_NAME, displayName='Test', organizationType=OrganizationType.UNSET) self.hpo_foo = self._make_hpo(hpoId=10, name='FOO', displayName='Foo') self.hpo_bar = self._make_hpo(hpoId=11, name='BAR', displayName='Bar') self.org_foo_a = self._make_org(organizationId=10, externalId='FOO_A', displayName='Foo A', hpoId=self.hpo_foo.hpoId) self.org_bar_a = self._make_org(organizationId=11, externalId='BAR_A', displayName='Bar A', hpoId=self.hpo_bar.hpoId)
def insert_with_session(self, session, obj): obj.version = 1 if obj.logPosition is not None: raise BadRequest('%s.logPosition must be auto-generated.' % self.model_type.__name__) obj.logPosition = LogPosition() if obj.biobankOrderId is None: raise BadRequest('Client must supply biobankOrderId.') existing_order = self.get_with_children_in_session( session, obj.biobankOrderId) if existing_order: existing_order_dict = self._order_as_dict(existing_order) new_dict = self._order_as_dict(obj) if existing_order_dict == new_dict: # If an existing matching order exists, just return it without trying to create it again. return existing_order else: raise Conflict('Order with ID %s already exists' % obj.biobankOrderId) self._update_participant_summary(session, obj) inserted_obj = super(BiobankOrderDao, self).insert_with_session(session, obj) if inserted_obj.collectedSiteId is not None: ParticipantDao().add_missing_hpo_from_site( session, inserted_obj.participantId, inserted_obj.collectedSiteId) self._update_history(session, obj) return inserted_obj
def _update_participant_summary(self, session, obj): participant_id = obj.participantId if participant_id is None: raise BadRequest('participantId is required') participant_summary_dao = ParticipantSummaryDao() participant = ParticipantDao().get_for_update(session, participant_id) if not participant: raise BadRequest( "Can't submit physical measurements for unknown participant %s" % participant_id) participant_summary = participant.participantSummary if not participant_summary: raise BadRequest( "Can't submit physical measurements for participant %s without consent" % participant_id) raise_if_withdrawn(participant_summary) participant_summary.physicalMeasurementsTime = obj.created participant_summary.physicalMeasurementsFinalizedTime = obj.finalized participant_summary.physicalMeasurementsCreatedSiteId = obj.createdSiteId participant_summary.physicalMeasurementsFinalizedSiteId = obj.finalizedSiteId participant_summary.lastModified = clock.CLOCK.now() if participant_summary.physicalMeasurementsStatus != PhysicalMeasurementsStatus.COMPLETED: participant_summary.physicalMeasurementsStatus = PhysicalMeasurementsStatus.COMPLETED participant_summary_dao.update_enrollment_status( participant_summary) session.merge(participant_summary) return participant_summary
def _upsert_samples_from_csv(csv_reader): """Inserts/updates BiobankStoredSamples from a csv.DictReader.""" missing_cols = set(CsvColumns.ALL) - set(csv_reader.fieldnames) if missing_cols: raise DataError( 'CSV is missing columns %s, had columns %s.' % (missing_cols, csv_reader.fieldnames)) samples_dao = BiobankStoredSampleDao() biobank_id_prefix = get_biobank_id_prefix() written = 0 try: samples = [] with ParticipantDao().session() as session: for row in csv_reader: sample = _create_sample_from_row(row, biobank_id_prefix) if sample: # DA-601 - Ensure biobank_id exists before accepting a sample record. if session.query(Participant).filter( Participant.biobankId == sample.biobankId).count() < 1: logging.error('Bio bank Id ({0}) does not exist in the Participant table.'. format(sample.biobankId)) continue samples.append(sample) if len(samples) >= _BATCH_SIZE: written += samples_dao.upsert_all(samples) samples = [] if samples: written += samples_dao.upsert_all(samples) return written except ValueError, e: raise DataError(e)
def setUp(self, **kwargs): super(SyncConsentFilesTest, self).setUp(use_mysql=True, **kwargs) NdbTestBase.doSetUp(self) TestBase.setup_fake(self) self.org_dao = OrganizationDao() self.site_dao = SiteDao() self.participant_dao = ParticipantDao() self.summary_dao = ParticipantSummaryDao()
def testInsert_withdrawnParticipantFails(self): self.participant.withdrawalStatus = WithdrawalStatus.NO_USE ParticipantDao().update(self.participant) self._make_summary() summary = ParticipantSummaryDao().get(self.participant.participantId) self.assertIsNone(summary.physicalMeasurementsStatus) with self.assertRaises(Forbidden): self.dao.insert(self._make_physical_measurements())
def get_with_children(self, questionnaire_response_id): with self.session() as session: query = session.query(QuestionnaireResponse) \ .options(subqueryload(QuestionnaireResponse.answers)) result = query.get(questionnaire_response_id) if result: ParticipantDao().validate_participant_reference(session, result) return result
def test_auto_pair_called(self): pid_numeric = from_client_participant_id(self.participant_id) participant_dao = ParticipantDao() self.send_consent(self.participant_id) self.send_consent(self.participant_id_2) self.assertEquals(participant_dao.get(pid_numeric).hpoId, UNSET_HPO_ID) self._insert_measurements(datetime.datetime.utcnow().isoformat()) self.assertNotEqual(participant_dao.get(pid_numeric).hpoId, UNSET_HPO_ID)
def setUp(self): super(BiobankOrderApiTest, self).setUp() self.participant = Participant(participantId=123, biobankId=555) self.participant_dao = ParticipantDao() self.participant_dao.insert(self.participant) self.summary_dao = ParticipantSummaryDao() self.path = ('Participant/%s/BiobankOrder' % to_client_participant_id(self.participant.participantId))
def setUp(self): super(PhysicalMeasurementsDaoTest, self).setUp() self.participant = Participant(participantId=1, biobankId=2) ParticipantDao().insert(self.participant) self.dao = PhysicalMeasurementsDao() self.participant_summary_dao = ParticipantSummaryDao() self.measurement_json = json.dumps(load_measurement_json(self.participant.participantId, TIME_1.isoformat())) self.biobank = BiobankOrderDao()
def setUp(self, with_data=True, use_mysql=False): super(GenomicSetValidationBaseTestCase, self).setUp(with_data=with_data, use_mysql=use_mysql) self.participant_dao = ParticipantDao() self.summary_dao = ParticipantSummaryDao() self.genomic_set_dao = GenomicSetDao() self.genomic_member_dao = GenomicSetMemberDao() self._participant_i = 0 self.setup_data()
def test_order_for_withdrawn_participant_fails(self): self.participant.withdrawalStatus = WithdrawalStatus.NO_USE ParticipantDao().update(self.participant) ParticipantSummaryDao().insert( self.participant_summary(self.participant)) with self.assertRaises(Forbidden): self.dao.insert( self._make_biobank_order( participantId=self.participant.participantId))
def setUp(self): super(BiobankSamplesPipelineTest, self).setUp(use_mysql=True) NdbTestBase.doSetUp(self) TestBase.setup_fake(self) config.override_setting(config.BASELINE_SAMPLE_TEST_CODES, _BASELINE_TESTS) # Everything is stored as a list, so override bucket name as a 1-element list. config.override_setting(config.BIOBANK_SAMPLES_BUCKET_NAME, [_FAKE_BUCKET]) self.participant_dao = ParticipantDao()
def test_get_for_withdrawn_participant_fails(self): ParticipantSummaryDao().insert(self.participant_summary(self.participant)) self.dao.insert(self._make_biobank_order( biobankOrderId='1', participantId=self.participant.participantId)) self.participant.version += 1 self.participant.withdrawalStatus = WithdrawalStatus.NO_USE ParticipantDao().update(self.participant) with self.assertRaises(Forbidden): self.dao.get(1)
def setUp(self): super(GenomicSetFileHandlerTest, self).setUp(use_mysql=True) NdbTestBase.doSetUp(self) TestBase.setup_fake(self) # Everything is stored as a list, so override bucket name as a 1-element list. config.override_setting(config.GENOMIC_SET_BUCKET_NAME, [_FAKE_BUCKET]) config.override_setting(config.BIOBANK_SAMPLES_BUCKET_NAME, [_FAKE_BUCKET]) config.override_setting(config.GENOMIC_BIOBANK_MANIFEST_FOLDER_NAME, [_FAKE_BUCKET_FOLDER]) self.participant_dao = ParticipantDao() self.summary_dao = ParticipantSummaryDao()
def _update_participant_summary(self, session, obj, is_amendment=False): participant_id = obj.participantId if participant_id is None: raise BadRequest('participantId is required') participant_summary_dao = ParticipantSummaryDao() participant = ParticipantDao().get_for_update(session, participant_id) if not participant: raise BadRequest( "Can't submit physical measurements for unknown participant %s" % participant_id) participant_summary = participant.participantSummary if not participant_summary: raise BadRequest( "Can't submit physical measurements for participant %s without consent" % participant_id) raise_if_withdrawn(participant_summary) participant_summary.lastModified = clock.CLOCK.now() is_distinct_visit = participant_summary_dao.calculate_distinct_visits( participant_id, obj.finalized, obj.physicalMeasurementsId) if obj.status and obj.status == PhysicalMeasurementsStatus.CANCELLED and is_distinct_visit \ and not is_amendment: participant_summary.numberDistinctVisits -= 1 # These fields set on measurement that is cancelled and doesn't have a previous good measurement if obj.status and obj.status == PhysicalMeasurementsStatus.CANCELLED and not \ self.has_uncancelled_pm(session, participant): participant_summary.physicalMeasurementsStatus = PhysicalMeasurementsStatus.CANCELLED participant_summary.physicalMeasurementsTime = None participant_summary.physicalMeasurementsFinalizedSiteId = None # These fields set on any measurement not cancelled elif obj.status != PhysicalMeasurementsStatus.CANCELLED: # new PM or if a PM was restored, it is complete again. participant_summary.physicalMeasurementsStatus = PhysicalMeasurementsStatus.COMPLETED participant_summary.physicalMeasurementsTime = obj.created participant_summary.physicalMeasurementsFinalizedTime = obj.finalized participant_summary.physicalMeasurementsCreatedSiteId = obj.createdSiteId participant_summary.physicalMeasurementsFinalizedSiteId = obj.finalizedSiteId if is_distinct_visit and not is_amendment: participant_summary.numberDistinctVisits += 1 elif obj.status and obj.status == PhysicalMeasurementsStatus.CANCELLED and \ self.has_uncancelled_pm(session, participant): get_latest_pm = self.get_latest_pm(session, participant) participant_summary.physicalMeasurementsFinalizedTime = get_latest_pm.finalized participant_summary.physicalMeasurementsTime = get_latest_pm.created participant_summary.physicalMeasurementsCreatedSiteId = get_latest_pm.createdSiteId participant_summary.physicalMeasurementsFinalizedSiteId = get_latest_pm.finalizedSiteId participant_summary_dao.update_enrollment_status(participant_summary) session.merge(participant_summary) return participant_summary
def setUp(self, with_data=True, use_mysql=True): super(EhrReceiptDaoTest, self).setUp(with_data=with_data, use_mysql=use_mysql) self.setup_fake() self.calendar_dao = CalendarDao() self.org_dao = OrganizationDao() self.hpo_dao = HPODao() self.participant_dao = ParticipantDao() self.summary_dao = ParticipantSummaryDao() self.ehr_receipt_dao = EhrReceiptDao() self._setup_initial_data()
def testInsert_getFailsForWithdrawnParticipant(self): self._make_summary() self.dao.insert(self._make_physical_measurements()) self.participant.version += 1 self.participant.withdrawalStatus = WithdrawalStatus.NO_USE ParticipantDao().update(self.participant) with self.assertRaises(Forbidden): self.dao.get(1) with self.assertRaises(Forbidden): self.dao.query(Query([FieldFilter('participantId', Operator.EQUALS, self.participant.participantId)], None, 10, None))
def _initialize_query(self, session, query_def): participant_id = None for field_filter in query_def.field_filters: if field_filter.field_name == 'participantId': participant_id = field_filter.value break # Sync queries don't specify a participant ID, and can return measurements for participants # who have subsequently withdrawn; for all requests that do specify a participant ID, # make sure the participant exists and is not withdrawn. if participant_id: ParticipantDao().validate_participant_id(session, participant_id) return super(PhysicalMeasurementsDao, self)._initialize_query(session, query_def)
def test_metrics_redaction(self): self._create_data() with FakeClock(TIME): PublicMetricsExport.export('123') # Withdraw particpant. pdao = ParticipantDao() p1 = pdao.get(1) p1.withdrawalStatus = WithdrawalStatus.NO_USE pdao.update(p1) PublicMetricsExport.export('123') self.assert_total_count_per_key(2) # now, 2 qualified participants
def setUp(self): super(DvOrderDaoTestBase, self).setUp(use_mysql=True) self.dao = DvOrderDao() self.code_dao = CodeDao() self.participant_dao = ParticipantDao() self.summary_dao = ParticipantSummaryDao() self.participant = Participant(participantId=123456789, biobankId=7) self.participant_dao.insert(self.participant) self.summary = self.participant_summary(self.participant) self.summary_dao.insert(self.summary)
def insert_with_session(self, session, obj): is_amendment = False obj.logPosition = LogPosition() obj.final = True obj.created = clock.CLOCK.now() resource_json = json.loads(obj.resource) finalized_date = resource_json['entry'][0]['resource'].get('date') if finalized_date: obj.finalized = parse_date(finalized_date) for extension in resource_json['entry'][0]['resource'].get( 'extension', []): url = extension.get('url') if url not in _ALL_EXTENSIONS: logging.info( 'Ignoring unsupported extension for PhysicalMeasurements: %r. Expected one of: %s', url, _ALL_EXTENSIONS) continue if url == _AMENDMENT_URL: self._update_amended(obj, extension, url, session) is_amendment = True break participant_summary = self._update_participant_summary( session, obj, is_amendment) existing_measurements = (session.query(PhysicalMeasurements).filter( PhysicalMeasurements.participantId == obj.participantId).all()) if existing_measurements: new_dict = self._measurements_as_dict(obj) for measurements in existing_measurements: if self._measurements_as_dict(measurements) == new_dict: # If there are already measurements that look exactly like this, return them # without inserting new measurements. return measurements PhysicalMeasurementsDao.set_measurement_ids(obj) inserted_obj = super(PhysicalMeasurementsDao, self).insert_with_session(session, obj) if not is_amendment: # Amendments aren't expected to have site ID extensions. if participant_summary.biospecimenCollectedSiteId is None: ParticipantDao().add_missing_hpo_from_site( session, inserted_obj.participantId, inserted_obj.finalizedSiteId) # Flush to assign an ID to the measurements, as the client doesn't provide one. session.flush() # Update the resource to contain the ID. resource_json['id'] = str(obj.physicalMeasurementsId) obj.resource = json.dumps(resource_json) return obj
def _update_participant_summary(self, session, created, participant_id): if participant_id is None: raise BadRequest('participantId is required') participant_summary_dao = ParticipantSummaryDao() participant = ParticipantDao().get_for_update(session, participant_id) if not participant: raise BadRequest("Can't submit physical measurements for unknown participant %s" % participant_id) participant_summary = participant.participantSummary if not participant_summary: raise BadRequest("Can't submit physical measurements for participant %s without consent" % participant_id) raise_if_withdrawn(participant_summary) if (not participant_summary.physicalMeasurementsStatus or participant_summary.physicalMeasurementsStatus == PhysicalMeasurementsStatus.UNSET): participant_summary.physicalMeasurementsStatus = PhysicalMeasurementsStatus.COMPLETED if not participant_summary.physicalMeasurementsTime: participant_summary.physicalMeasurementsTime = created participant_summary_dao.update_enrollment_status(participant_summary) session.merge(participant_summary)
def setUp(self): super(BigQuerySyncDaoTest, self).setUp(use_mysql=True, with_consent_codes=True) self.dao = ParticipantDao() with self.dao.session() as session: self.site = session.query(Site).filter( Site.googleGroup == 'hpo-site-monroeville').first() self.hpo = session.query(HPO).filter(HPO.name == 'PITT').first() with clock.FakeClock(self.TIME_1): self.participant = Participant(participantId=123, biobankId=555) self.participant.hpoId = self.hpo.hpoId self.participant.siteId = self.site.siteId self.dao.insert(self.participant) ps = ParticipantSummary( participantId=123, biobankId=555, firstName='john', lastName='doe', withdrawalStatus=WithdrawalStatus.NOT_WITHDRAWN, suspensionStatus=SuspensionStatus.NOT_SUSPENDED) ps.hpoId = self.hpo.hpoId ps.siteId = self.site.siteId self.summary = ParticipantSummaryDao().insert(ps) self.pm_json = json.dumps( load_measurement_json(self.participant.participantId, self.TIME_1.isoformat())) self.pm = PhysicalMeasurementsDao().insert( self._make_physical_measurements()) with clock.FakeClock(self.TIME_2): self.dao = BiobankOrderDao() self.bio_order = BiobankOrderDao().insert( self._make_biobank_order( participantId=self.participant.participantId))