def generate_samples(fraction_missing):
    """Creates fake sample CSV data in GCS.

  Args:
    fraction_missing: This many samples which exist as BiobankStoredSamples will not have rows
        generated in the fake CSV.
  """
    bucket_name = config.getSetting(config.BIOBANK_SAMPLES_BUCKET_NAME)
    now = clock.CLOCK.now()
    file_name = '/%s/fake_%s.csv' % (bucket_name,
                                     now.strftime(INPUT_CSV_TIME_FORMAT))
    num_rows = 0
    sample_id_start = random.randint(1000000, 10000000)
    with cloudstorage_api.open(file_name, mode='w') as dest:
        writer = csv.writer(dest, delimiter="\t")
        writer.writerow(_HEADERS)
        biobank_order_dao = BiobankOrderDao()
        with biobank_order_dao.session() as session:
            rows = biobank_order_dao.get_ordered_samples_sample(
                session, 1 - fraction_missing, _BATCH_SIZE)
            for biobank_id, collected_time, test in rows:
                if collected_time is None:
                    logging.warning(
                        'biobank_id=%s test=%s skipped (collected=%s)',
                        biobank_id, test, collected_time)
                    continue
                minutes_delta = random.randint(
                    0, _MAX_MINUTES_BETWEEN_SAMPLE_COLLECTED_AND_CONFIRMED)
                confirmed_time = collected_time + datetime.timedelta(
                    minutes=minutes_delta)
                writer.writerow([
                    sample_id_start + num_rows,
                    None,  # no parent
                    confirmed_time.strftime(_TIME_FORMAT),
                    to_client_biobank_id(biobank_id),
                    test,
                    confirmed_time.strftime(_TIME_FORMAT),
                    'KIT'
                ])  # reuse confirmed time as created time
                num_rows += 1
        participant_dao = ParticipantDao()
        with participant_dao.session() as session:
            rows = participant_dao.get_biobank_ids_sample(
                session, _PARTICIPANTS_WITH_ORPHAN_SAMPLES, _BATCH_SIZE)
            for biobank_id, sign_up_time in rows:
                minutes_delta = random.randint(
                    0, _MAX_MINUTES_BETWEEN_PARTICIPANT_CREATED_AND_CONFIRMED)
                confirmed_time = sign_up_time + datetime.timedelta(
                    minutes=minutes_delta)
                tests = random.sample(BIOBANK_TESTS,
                                      random.randint(1, len(BIOBANK_TESTS)))
                for test in tests:
                    writer.writerow([
                        sample_id_start + num_rows, None,
                        confirmed_time.strftime(_TIME_FORMAT),
                        to_client_biobank_id(biobank_id), test,
                        confirmed_time.strftime(_TIME_FORMAT), 'KIT'
                    ])
                    num_rows += 1
    logging.info("Generated %d samples in %s.", num_rows, file_name)
def open_biobank_samples(
      biobank_id1, biobank_id2, biobank_id3,
      test1=None, test2=None, test3=None):
  """Returns an readable stream for the biobank samples CSV."""
  with open(data_path('biobank_samples_1.csv')) as f:
    csv_str = f.read() % {
      'biobank_id1': to_client_biobank_id(biobank_id1),
      'biobank_id2': to_client_biobank_id(biobank_id2),
      'biobank_id3': to_client_biobank_id(biobank_id3),
      'test1': test1 or random.choice(BIOBANK_TESTS),
      'test2': test2 or random.choice(BIOBANK_TESTS),
      'test3': test3 or random.choice(BIOBANK_TESTS),
    }
  return StringIO.StringIO(csv_str)
示例#3
0
 def to_client_json(self, model):
   client_json = {
     'participantId': to_client_participant_id(model.participantId),
     'externalId': model.externalId,
     'hpoId': model.hpoId,
     'awardee': model.hpoId,
     'organization': model.organizationId,
     'siteId': model.siteId,
     'biobankId': to_client_biobank_id(model.biobankId),
     'lastModified': model.lastModified.isoformat(),
     'signUpTime': model.signUpTime.isoformat(),
     'providerLink': json.loads(model.providerLink),
     'withdrawalStatus': model.withdrawalStatus,
     'withdrawalReason': model.withdrawalReason,
     'withdrawalReasonJustification': model.withdrawalReasonJustification,
     'withdrawalTime': model.withdrawalTime,
     'withdrawalAuthored': model.withdrawalAuthored,
     'suspensionStatus': model.suspensionStatus,
     'suspensionTime': model.suspensionTime
   }
   format_json_hpo(client_json, self.hpo_dao, 'hpoId'),
   format_json_org(client_json, self.organization_dao, 'organization'),
   format_json_site(client_json, self.site_dao, 'site'),
   format_json_enum(client_json, 'withdrawalStatus')
   format_json_enum(client_json, 'withdrawalReason')
   format_json_enum(client_json, 'suspensionStatus')
   format_json_date(client_json, 'withdrawalTime')
   format_json_date(client_json, 'suspensionTime')
   client_json['awardee'] = client_json['hpoId']
   if 'siteId' in client_json:
     del client_json['siteId']
   return client_json
def _new_row(sample_id, biobank_id, test, confirmed_time):
    row = []
    disposed_time = confirmed_time + datetime.timedelta(minutes=random.randint(
        0, _MAX_MINUTES_BETWEEN_SAMPLE_CONFIRMED_AND_DISPOSED))
    for col in CsvColumns.ALL:
        if col == CsvColumns.SAMPLE_ID:
            row.append(sample_id)
        elif col == CsvColumns.PARENT_ID:
            row.append(None)
        elif col == CsvColumns.CONFIRMED_DATE:
            row.append(confirmed_time.strftime(_TIME_FORMAT))
        elif col == CsvColumns.EXTERNAL_PARTICIPANT_ID:
            row.append(to_client_biobank_id(biobank_id))
        elif col == CsvColumns.BIOBANK_ORDER_IDENTIFIER:
            row.append('KIT')
        elif col == CsvColumns.TEST_CODE:
            row.append(test)
        elif col == CsvColumns.CREATE_DATE:
            row.append(confirmed_time.strftime(_TIME_FORMAT))
        elif col == CsvColumns.STATUS:
            # TODO: Do we want a distribution of statuses here?
            row.append('consumed')
        elif col == CsvColumns.DISPOSAL_DATE:
            row.append(disposed_time.strftime(_TIME_FORMAT))
        elif col == CsvColumns.SAMPLE_FAMILY:
            # TODO: Is there a need for a more realistic value here?
            row.append('family_id')
        else:
            raise ValueError(
                "unsupported biobank CSV column: '{}'".format(col))
    return row
示例#5
0
def open_biobank_samples(biobank_ids, tests):
    """
  Returns a string representing the biobank samples CSV file. The number of records returned
  is equal to the number of biobank_ids passed.
  :param biobank_ids: list of biobank ids.
  :param tests: list of tests
  :return: StringIO object
  """
    nids = len(biobank_ids)
    # get the same number of sample lines as biobank_ids, plus header line.
    lines = open(data_path('biobank_samples_1.csv')).readlines()[:nids + 1]
    csv_str = lines[0]  # include header line every time.

    for x in range(0, nids):
        # if we don't have a test code for this index, use a random one.
        try:
            test_code = tests[x]
        except IndexError:
            test_code = random.choice(BIOBANK_TESTS)

        csv_str += lines[x + 1].format(biobank_id=to_client_biobank_id(
            biobank_ids[x]),
                                       test=test_code)

    return csv_str
    def to_client_json(self, model):
        result = model.asdict()
        # Participants that withdrew more than 48 hours ago should have fields other than
        # WITHDRAWN_PARTICIPANT_FIELDS cleared.
        if (model.withdrawalStatus == WithdrawalStatus.NO_USE and
            (model.withdrawalTime is None or model.withdrawalTime <
             clock.CLOCK.now() - WITHDRAWN_PARTICIPANT_VISIBILITY_TIME)):
            result = {k: result.get(k) for k in WITHDRAWN_PARTICIPANT_FIELDS}

        elif model.withdrawalStatus != WithdrawalStatus.NO_USE and \
          model.suspensionStatus == SuspensionStatus.NO_CONTACT:
            for i in SUSPENDED_PARTICIPANT_FIELDS:
                result[i] = UNSET

        result['participantId'] = to_client_participant_id(model.participantId)
        biobank_id = result.get('biobankId')
        if biobank_id:
            result['biobankId'] = to_client_biobank_id(biobank_id)
        date_of_birth = result.get('dateOfBirth')
        if date_of_birth:
            result['ageRange'] = get_bucketed_age(date_of_birth,
                                                  clock.CLOCK.now())
        else:
            result['ageRange'] = UNSET

        if result.get('primaryLanguage') is None:
            result['primaryLanguage'] = UNSET

        if 'organizationId' in result:
            result['organization'] = result['organizationId']
            del result['organizationId']
            format_json_org(result, self.organization_dao, 'organization')

        format_json_hpo(result, self.hpo_dao, 'hpoId')
        result['awardee'] = result['hpoId']
        _initialize_field_type_sets()
        for fieldname in _DATE_FIELDS:
            format_json_date(result, fieldname)
        for fieldname in _CODE_FIELDS:
            format_json_code(result, self.code_dao, fieldname)
        for fieldname in _ENUM_FIELDS:
            format_json_enum(result, fieldname)
        for fieldname in _SITE_FIELDS:
            format_json_site(result, self.site_dao, fieldname)
        if (model.withdrawalStatus == WithdrawalStatus.NO_USE
                or model.suspensionStatus == SuspensionStatus.NO_CONTACT):
            result['recontactMethod'] = 'NO_CONTACT'
        # Strip None values.
        result = {k: v for k, v in result.iteritems() if v is not None}

        return result
示例#7
0
    def test_sample_from_row(self):
        samples_file = test_data.open_biobank_samples(112, 222, 333)
        reader = csv.DictReader(samples_file, delimiter='\t')
        row = reader.next()
        sample = biobank_samples_pipeline._create_sample_from_row(
            row, get_biobank_id_prefix())
        self.assertIsNotNone(sample)

        cols = biobank_samples_pipeline._Columns
        self.assertEquals(sample.biobankStoredSampleId, row[cols.SAMPLE_ID])
        self.assertEquals(to_client_biobank_id(sample.biobankId),
                          row[cols.EXTERNAL_PARTICIPANT_ID])
        self.assertEquals(sample.test, row[cols.TEST_CODE])
        confirmed_date = self._naive_utc_to_naive_central(sample.confirmed)
        self.assertEquals(
            confirmed_date.strftime(
                biobank_samples_pipeline._INPUT_TIMESTAMP_FORMAT),
            row[cols.CONFIRMED_DATE])
        received_date = self._naive_utc_to_naive_central(sample.created)
        self.assertEquals(
            received_date.strftime(
                biobank_samples_pipeline._INPUT_TIMESTAMP_FORMAT),
            row[cols.CREATE_DATE])
    def _filter_order_fields(self, resource, pid):
        fhir_resource = SimpleFhirR4Reader(resource)
        summary = ParticipantSummaryDao().get(pid)
        if not summary:
            raise BadRequest('No summary for participant id: {}'.format(pid))
        code_dict = summary.asdict()
        format_json_code(code_dict, self.code_dao, 'genderIdentityId')
        format_json_code(code_dict, self.code_dao, 'stateId')
        if 'genderIdentity' in code_dict and code_dict['genderIdentity']:
            if code_dict['genderIdentity'] == 'GenderIdentity_Woman':
                gender_val = 'F'
            elif code_dict['genderIdentity'] == 'GenderIdentity_Man':
                gender_val = 'M'
            else:
                gender_val = 'U'
        else:
            gender_val = 'U'

        order_id = int(fhir_resource.basedOn[0].identifier.value)
        with self.session() as session:
            result = session.query(BiobankDVOrder.barcode).filter(
                BiobankDVOrder.order_id == order_id).first()
            barcode = None if not result else result if isinstance(
                result, str) else result.barcode

        # MayoLink api has strong opinions on what should be sent and the order of elements. Dont touch.
        order = {
            'order': {
                'collected': fhir_resource.occurrenceDateTime,
                'account': '',
                'number': barcode,
                'patient': {
                    'medical_record_number':
                    str(to_client_biobank_id(summary.biobankId)),
                    'first_name':
                    '*',
                    'last_name':
                    str(to_client_biobank_id(summary.biobankId)),
                    'middle_name':
                    '',
                    'birth_date':
                    '3/3/1933',
                    'gender':
                    gender_val,
                    'address1':
                    summary.streetAddress,
                    'address2':
                    summary.streetAddress2,
                    'city':
                    summary.city,
                    'state':
                    code_dict['state'],
                    'postal_code':
                    str(summary.zipCode),
                    'phone':
                    str(summary.phoneNumber),
                    'account_number':
                    None,
                    'race':
                    summary.race,
                    'ethnic_group':
                    None
                },
                'physician': {
                    'name': 'None',  # must be a string value, not None.
                    'phone': None,
                    'npi': None
                },
                'report_notes':
                fhir_resource.extension.get(url=DV_ORDER_URL).valueString,
                'tests': {
                    'test': {
                        'code': '1SAL2',
                        'name': 'PMI Saliva, FDA Kit',
                        'comments': None
                    }
                },
                'comments': 'Salivary Kit Order, direct from participant'
            }
        }
        return order
示例#9
0
    def test_reconciliation_query(self):
        self.setup_codes([RACE_QUESTION_CODE], CodeType.QUESTION)
        self.setup_codes([RACE_AIAN_CODE, RACE_WHITE_CODE], CodeType.ANSWER)
        self._questionnaire_id = self.create_questionnaire(
            'questionnaire3.json')
        # MySQL and Python sub-second rounding differs, so trim micros from generated times.
        order_time = clock.CLOCK.now().replace(microsecond=0)
        old_order_time = order_time - datetime.timedelta(days=10)
        within_24_hours = order_time + datetime.timedelta(hours=23)
        old_within_24_hours = old_order_time + datetime.timedelta(hours=23)
        late_time = order_time + datetime.timedelta(hours=25)
        old_late_time = old_order_time + datetime.timedelta(hours=25)
        file_time = order_time + datetime.timedelta(
            hours=23) + datetime.timedelta(minutes=59)
        two_days_ago = file_time - datetime.timedelta(days=2)

        # On time, recent order and samples; shows up in rx
        p_on_time = self._insert_participant()
        # Extra samples ordered now aren't considered missing or late.
        self._insert_order(p_on_time,
                           'GoodOrder',
                           BIOBANK_TESTS[:4],
                           order_time,
                           finalized_tests=BIOBANK_TESTS[:3],
                           kit_id='kit1',
                           tracking_number='t1',
                           collected_note=u'\u2013foo',
                           processed_note='bar',
                           finalized_note='baz')
        self._insert_samples(p_on_time, BIOBANK_TESTS[:2],
                             ['GoodSample1', 'GoodSample2'], 'OGoodOrder',
                             within_24_hours,
                             within_24_hours - datetime.timedelta(hours=1))

        # On time order and samples from 10 days ago; shows up in rx
        p_old_on_time = self._insert_participant(race_codes=[RACE_AIAN_CODE])
        # Old missing samples from 10 days ago don't show up in missing or late.
        self._insert_order(p_old_on_time,
                           'OldGoodOrder',
                           BIOBANK_TESTS[:3],
                           old_order_time,
                           kit_id='kit2')
        self._insert_samples(p_old_on_time, BIOBANK_TESTS[:2],
                             ['OldGoodSample1', 'OldGoodSample2'],
                             'OOldGoodOrder', old_within_24_hours,
                             old_within_24_hours - datetime.timedelta(hours=1))

        # Late, recent order and samples; shows up in rx and late. (But not missing, as it hasn't been
        # 36 hours since the order.)
        p_late_and_missing = self._insert_participant()
        # Extra missing sample doesn't show up as missing as it hasn't been 24 hours yet.
        o_late_and_missing = self._insert_order(p_late_and_missing,
                                                'SlowOrder', BIOBANK_TESTS[:3],
                                                order_time)
        self._insert_samples(p_late_and_missing, [BIOBANK_TESTS[0]],
                             ['LateSample'], 'OSlowOrder', late_time,
                             late_time - datetime.timedelta(minutes=59))

        # ordered sample not finalized with stored sample should be in missing.
        p_not_finalized = self._insert_participant()
        self._insert_order(p_not_finalized,
                           'UnfinalizedOrder',
                           BIOBANK_TESTS[:2],
                           order_time,
                           finalized_tests=BIOBANK_TESTS[:1])
        self._insert_samples(p_not_finalized, [BIOBANK_TESTS[1]],
                             ['missing_order'], 'OUnfinalizedOrder',
                             order_time,
                             order_time - datetime.timedelta(hours=1))

        # Late order and samples from 10 days ago; shows up in rx (but not missing, as it was too
        # long ago.
        p_old_late_and_missing = self._insert_participant()
        self._insert_order(p_old_late_and_missing, 'OldSlowOrder',
                           BIOBANK_TESTS[:2], old_order_time)
        self._insert_samples(p_old_late_and_missing, [BIOBANK_TESTS[0]],
                             ['OldLateSample'], 'OOldSlowOrder', old_late_time,
                             old_late_time - datetime.timedelta(minutes=59))

        # Order with missing sample from 2 days ago; shows up in missing.
        p_two_days_missing = self._insert_participant()
        # The third test doesn't wind up in missing, as it was never finalized.
        self._insert_order(p_two_days_missing,
                           'TwoDaysMissingOrder',
                           BIOBANK_TESTS[:3],
                           two_days_ago,
                           finalized_tests=BIOBANK_TESTS[:2])

        # Recent samples with no matching order; shows up in missing.
        p_extra = self._insert_participant(race_codes=[RACE_WHITE_CODE])
        self._insert_samples(p_extra, [BIOBANK_TESTS[-1]],
                             ['NobodyOrderedThisSample'], 'OExtraOrderNotSent',
                             order_time,
                             order_time - datetime.timedelta(minutes=59))

        # Old samples with no matching order; Does not show up.
        p_old_extra = self._insert_participant(race_codes=[RACE_AIAN_CODE])
        self._insert_samples(p_old_extra, [BIOBANK_TESTS[-1]],
                             ['OldNobodyOrderedThisSample'],
                             'OOldExtrOrderNotSent', old_order_time,
                             old_order_time - datetime.timedelta(hours=1))

        # Withdrawn participants don't show up in any reports except withdrawal report.

        p_withdrawn_old_on_time = self._insert_participant(
            race_codes=[RACE_AIAN_CODE])
        # This updates the version of the participant and its HPO ID.
        self._insert_order(p_withdrawn_old_on_time, 'OldWithdrawnGoodOrder',
                           BIOBANK_TESTS[:2], old_order_time)
        p_withdrawn_old_on_time = self.participant_dao.get(
            p_withdrawn_old_on_time.participantId)
        self._insert_samples(
            p_withdrawn_old_on_time, BIOBANK_TESTS[:2],
            ['OldWithdrawnGoodSample1', 'OldWithdrawnGoodSample2'],
            'OOldWithdrawnGoodOrder', old_within_24_hours,
            old_within_24_hours - datetime.timedelta(hours=1))
        self._withdraw(p_withdrawn_old_on_time, within_24_hours)

        p_withdrawn_late_and_missing = self._insert_participant()
        self._insert_order(p_withdrawn_late_and_missing, 'WithdrawnSlowOrder',
                           BIOBANK_TESTS[:2], order_time)
        self._insert_samples(p_withdrawn_late_and_missing, [BIOBANK_TESTS[0]],
                             ['WithdrawnLateSample'], 'OWithdrawnSlowOrder',
                             late_time,
                             late_time - datetime.timedelta(minutes=59))
        p_withdrawn_late_and_missing = (self.participant_dao.get(
            p_withdrawn_late_and_missing.participantId))
        self._withdraw(p_withdrawn_late_and_missing, within_24_hours)

        p_withdrawn_old_late_and_missing = self._insert_participant()
        self._insert_order(p_withdrawn_old_late_and_missing,
                           'WithdrawnOldSlowOrder', BIOBANK_TESTS[:2],
                           old_order_time)
        self._insert_samples(p_withdrawn_old_late_and_missing,
                             [BIOBANK_TESTS[0]], ['WithdrawnOldLateSample'],
                             'OWithdrawnOldSlowOrder', old_late_time,
                             old_late_time - datetime.timedelta(minutes=59))
        p_withdrawn_old_late_and_missing = (self.participant_dao.get(
            p_withdrawn_old_late_and_missing.participantId))
        self._withdraw(p_withdrawn_old_late_and_missing, old_late_time)

        p_withdrawn_extra = self._insert_participant(
            race_codes=[RACE_WHITE_CODE])
        self._insert_samples(p_withdrawn_extra, [BIOBANK_TESTS[-1]],
                             ['WithdrawnNobodyOrderedThisSample'],
                             'OWithdrawnOldSlowOrder', order_time,
                             order_time - datetime.timedelta(hours=1))
        self._withdraw(p_withdrawn_extra, within_24_hours)

        p_withdrawn_old_extra = self._insert_participant(
            race_codes=[RACE_AIAN_CODE])
        self._insert_samples(p_withdrawn_old_extra, [BIOBANK_TESTS[-1]],
                             ['WithdrawnOldNobodyOrderedThisSample'],
                             'OwithdrawnOldSlowOrder', old_order_time,
                             old_order_time - datetime.timedelta(hours=1))
        self._withdraw(p_withdrawn_old_extra, within_24_hours)

        p_withdrawn_race_change = self._insert_participant(
            race_codes=[RACE_AIAN_CODE])
        p_withdrawn_race_change_id = to_client_participant_id(
            p_withdrawn_race_change.participantId)
        self._submit_race_questionnaire_response(p_withdrawn_race_change_id,
                                                 [RACE_WHITE_CODE])
        self._withdraw(p_withdrawn_race_change, within_24_hours)

        # for the same participant/test, 3 orders sent and only 2 samples received. Shows up in both
        # missing (we are missing one sample) and late (the two samples that were received were after
        # 24 hours.)
        p_repeated = self._insert_participant()
        for repetition in xrange(3):
            self._insert_order(
                p_repeated, 'RepeatedOrder%d' % repetition, [BIOBANK_TESTS[0]],
                two_days_ago + datetime.timedelta(hours=repetition))
            if repetition != 2:
                self._insert_samples(
                    p_repeated, [BIOBANK_TESTS[0]],
                    ['RepeatedSample%d' % repetition],
                    'ORepeatedOrder%d' % repetition,
                    within_24_hours + datetime.timedelta(hours=repetition),
                    within_24_hours + datetime.timedelta(hours=repetition - 1))

        received, late, missing, withdrawals = 'rx.csv', 'late.csv', 'missing.csv', 'withdrawals.csv'
        exporter = InMemorySqlExporter(self)
        biobank_samples_pipeline._query_and_write_reports(
            exporter, file_time, received, late, missing, withdrawals)

        exporter.assertFilesEqual((received, late, missing, withdrawals))

        # sent-and-received: 4 on-time, 2 late, none of the missing/extra/repeated ones;
        # includes orders/samples from more than 7 days ago
        exporter.assertRowCount(received, 8)
        exporter.assertColumnNamesEqual(received, _CSV_COLUMN_NAMES)
        row = exporter.assertHasRow(
            received, {
                'biobank_id': to_client_biobank_id(p_on_time.biobankId),
                'sent_test': BIOBANK_TESTS[0],
                'received_test': BIOBANK_TESTS[0]
            })

        # p_repeated has 2 received and 2 late.
        exporter.assertHasRow(
            received, {
                'biobank_id': to_client_biobank_id(p_repeated.biobankId),
                'sent_test': BIOBANK_TESTS[0],
                'received_test': BIOBANK_TESTS[0],
                'sent_order_id': 'ORepeatedOrder1'
            })
        exporter.assertHasRow(
            received, {
                'biobank_id': to_client_biobank_id(p_repeated.biobankId),
                'sent_test': BIOBANK_TESTS[0],
                'received_test': BIOBANK_TESTS[0],
                'sent_order_id': 'ORepeatedOrder0'
            })
        exporter.assertHasRow(
            late, {
                'biobank_id': to_client_biobank_id(p_repeated.biobankId),
                'sent_test': BIOBANK_TESTS[0],
                'received_test': BIOBANK_TESTS[0],
                'sent_order_id': 'ORepeatedOrder0'
            })
        exporter.assertHasRow(
            late, {
                'biobank_id': to_client_biobank_id(p_repeated.biobankId),
                'sent_test': BIOBANK_TESTS[0],
                'received_test': BIOBANK_TESTS[0],
                'sent_order_id': 'ORepeatedOrder1'
            })
        exporter.assertHasRow(
            missing, {
                'biobank_id': to_client_biobank_id(p_not_finalized.biobankId),
                'sent_order_id': 'OUnfinalizedOrder'
            })

        # Also check the values of all remaining fields on one row.
        self.assertEquals(row['source_site_name'],
                          'Monroeville Urgent Care Center')
        self.assertEquals(row['source_site_mayolink_client_number'], '7035769')
        self.assertEquals(row['source_site_hpo'], 'PITT')
        self.assertEquals(row['source_site_hpo_type'], 'HPO')
        self.assertEquals(row['finalized_site_name'],
                          'Phoenix Urgent Care Center')
        self.assertEquals(row['finalized_site_mayolink_client_number'],
                          '7035770')
        self.assertEquals(row['finalized_site_hpo'], 'PITT')
        self.assertEquals(row['finalized_site_hpo_type'], 'HPO')
        self.assertEquals(row['finalized_username'], '*****@*****.**')
        self.assertEquals(row['sent_finalized_time'],
                          database_utils.format_datetime(order_time))
        self.assertEquals(row['sent_collection_time'],
                          database_utils.format_datetime(order_time))
        self.assertEquals(row['sent_processed_time'],
                          database_utils.format_datetime(order_time))
        self.assertEquals(row['received_time'],
                          database_utils.format_datetime(within_24_hours))
        self.assertEquals(
            row['Sample Family Create Date'],
            database_utils.format_datetime(within_24_hours -
                                           datetime.timedelta(hours=1)))
        self.assertEquals(row['sent_count'], '1')
        self.assertEquals(row['received_count'], '1')
        self.assertEquals(row['sent_order_id'], 'OGoodOrder')
        self.assertEquals(row['received_sample_id'], 'GoodSample1')
        self.assertEquals(row['biospecimen_kit_id'], 'kit1')
        self.assertEquals(row['fedex_tracking_number'], 't1')
        self.assertEquals(row['is_native_american'], 'N')
        self.assertEquals(row['notes_collected'], u'\u2013foo')
        self.assertEquals(row['notes_processed'], 'bar')
        self.assertEquals(row['notes_finalized'], 'baz')
        self.assertEquals(row['sent_order_id'], 'OGoodOrder')
        # the other sent-and-received rows
        exporter.assertHasRow(
            received, {
                'biobank_id': to_client_biobank_id(p_on_time.biobankId),
                'sent_test': BIOBANK_TESTS[1]
            })
        exporter.assertHasRow(
            received, {
                'biobank_id': to_client_biobank_id(
                    p_late_and_missing.biobankId),
                'sent_test': BIOBANK_TESTS[0]
            })
        exporter.assertHasRow(
            received, {
                'biobank_id': to_client_biobank_id(p_old_on_time.biobankId),
                'sent_test': BIOBANK_TESTS[0],
                'is_native_american': 'Y'
            })
        exporter.assertHasRow(
            received, {
                'biobank_id': to_client_biobank_id(p_old_on_time.biobankId),
                'sent_test': BIOBANK_TESTS[1],
                'is_native_american': 'Y'
            })
        exporter.assertHasRow(
            received, {
                'biobank_id':
                to_client_biobank_id(p_old_late_and_missing.biobankId),
                'sent_test':
                BIOBANK_TESTS[0],
                'is_native_american':
                'N'
            })

        # sent-and-received: 2 late; don't include orders/samples from more than 7 days ago
        exporter.assertRowCount(late, 3)
        exporter.assertColumnNamesEqual(late, _CSV_COLUMN_NAMES)
        exporter.assertHasRow(
            late, {
                'biobank_id': to_client_biobank_id(
                    p_late_and_missing.biobankId),
                'sent_order_id': 'O%s' % o_late_and_missing.biobankOrderId,
                'elapsed_hours': '24',
                'is_native_american': 'N'
            })
        exporter.assertHasRow(
            late, {
                'biobank_id': to_client_biobank_id(p_repeated.biobankId),
                'elapsed_hours': '46'
            })

        # orders/samples where something went wrong; don't include orders/samples from more than 7
        # days ago, or where 24 hours hasn't elapsed yet.
        exporter.assertRowCount(missing, 5)
        exporter.assertColumnNamesEqual(missing, _CSV_COLUMN_NAMES)
        # sample received, nothing ordered
        exporter.assertHasRow(
            missing, {
                'biobank_id': to_client_biobank_id(p_extra.biobankId),
                'sent_order_id': 'OExtraOrderNotSent'
            })
        # order received, no sample
        exporter.assertHasRow(
            missing, {
                'biobank_id': to_client_biobank_id(
                    p_two_days_missing.biobankId),
                'sent_order_id': 'OTwoDaysMissingOrder',
                'sent_test': BIOBANK_TESTS[0],
                'is_native_american': 'N'
            })
        exporter.assertHasRow(
            missing, {
                'biobank_id': to_client_biobank_id(
                    p_two_days_missing.biobankId),
                'sent_order_id': 'OTwoDaysMissingOrder',
                'sent_test': BIOBANK_TESTS[1]
            })

        # 3 orders sent, only 2 received
        multi_sample_row = exporter.assertHasRow(
            missing, {
                'biobank_id': to_client_biobank_id(p_repeated.biobankId),
                'sent_count': '1',
                'received_count': '0'
            })
        # Also verify the comma-joined fields of the row with multiple orders/samples.
        self.assertItemsEqual(multi_sample_row['sent_order_id'].split(','),
                              ['ORepeatedOrder2'])

        # We don't include the old withdrawal.
        exporter.assertRowCount(withdrawals, 5)
        exporter.assertHasRow(
            withdrawals, {
                'biobank_id':
                to_client_biobank_id(p_withdrawn_old_on_time.biobankId),
                'withdrawal_time':
                database_utils.format_datetime(within_24_hours),
                'is_native_american':
                'Y'
            })
        exporter.assertHasRow(
            withdrawals, {
                'biobank_id':
                to_client_biobank_id(p_withdrawn_late_and_missing.biobankId),
                'withdrawal_time':
                database_utils.format_datetime(within_24_hours),
                'is_native_american':
                'N'
            })
        exporter.assertHasRow(
            withdrawals, {
                'biobank_id': to_client_biobank_id(
                    p_withdrawn_extra.biobankId),
                'withdrawal_time':
                database_utils.format_datetime(within_24_hours),
                'is_native_american': 'N'
            })
        exporter.assertHasRow(
            withdrawals, {
                'biobank_id': to_client_biobank_id(
                    p_withdrawn_old_extra.biobankId),
                'withdrawal_time':
                database_utils.format_datetime(within_24_hours),
                'is_native_american': 'Y'
            })
        exporter.assertHasRow(
            withdrawals, {
                'biobank_id':
                to_client_biobank_id(p_withdrawn_race_change.biobankId),
                'withdrawal_time':
                database_utils.format_datetime(within_24_hours),
                'is_native_american':
                'N'
            })