Exemplos de upsert_from_latest_csv em Python, exemplos de offline.biobank_samples_pipeline.upsert_from_latest_csv em Python

Exemplo n.º 1

0

Exibir arquivo

    def test_generate_samples(self):
        participant_id = self.send_post('Participant', {})['participantId']
        self.send_consent(participant_id)
        self.send_post(
            'Participant/%s/BiobankOrder' % participant_id,
            load_biobank_order_json(
                from_client_participant_id(participant_id)))

        # Sanity check that the orders were created correctly.
        bo_dao = BiobankOrderDao()
        self.assertEquals(1, bo_dao.count())
        order = bo_dao.get_all()[0]
        self.assertEquals(
            11, len(bo_dao.get_with_children(order.biobankOrderId).samples))

        self.send_post('DataGen', {
            'create_biobank_samples': True,
            'samples_missing_fraction': 0.0
        })
        upsert_from_latest_csv(
        )  # Run the (usually offline) Biobank CSV import job.

        self.assertEquals(11, BiobankStoredSampleDao().count())
        ps = ParticipantSummaryDao().get(
            from_client_participant_id(participant_id))
        self.assertEquals(SampleStatus.RECEIVED, ps.samplesToIsolateDNA)
        self.assertEquals(10, ps.numBaselineSamplesArrived)

Exemplo n.º 2

0

Exibir arquivo

    def test_end_to_end(self):
        dao = BiobankStoredSampleDao()
        self.assertEquals(dao.count(), 0)

        # Create 3 participants and pass their (random) IDs into sample rows.
        summary_dao = ParticipantSummaryDao()
        biobank_ids = []
        participant_ids = []
        for _ in xrange(3):
            participant = self.participant_dao.insert(Participant())
            summary_dao.insert(self.participant_summary(participant))
            participant_ids.append(participant.participantId)
            biobank_ids.append(participant.biobankId)
            self.assertEquals(
                summary_dao.get(
                    participant.participantId).numBaselineSamplesArrived, 0)
        test1, test2, test3 = random.sample(_BASELINE_TESTS, 3)
        samples_file = test_data.open_biobank_samples(*biobank_ids,
                                                      test1=test1,
                                                      test2=test2,
                                                      test3=test3)
        input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central(
            clock.CLOCK.now()).strftime(
                biobank_samples_pipeline.INPUT_CSV_TIME_FORMAT)
        self._write_cloud_csv(input_filename, samples_file.read())

        biobank_samples_pipeline.upsert_from_latest_csv()

        self.assertEquals(dao.count(), 3)
        self._check_summary(participant_ids[0], test1, '2016-11-29T12:19:32')
        self._check_summary(participant_ids[1], test2, '2016-11-29T12:38:58')
        self._check_summary(participant_ids[2], test3, '2016-11-29T12:41:26')

Exemplo n.º 3

0

Exibir arquivo

Arquivo: biobank_samples_pipeline_test.py Projeto: lrwb-aou/raw-data-repository

 def test_old_csv_not_imported(self):
     now = clock.CLOCK.now()
     too_old_time = now - datetime.timedelta(hours=25)
     input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central(
         too_old_time).strftime(
             biobank_samples_pipeline.INPUT_CSV_TIME_FORMAT)
     self._write_cloud_csv(input_filename, '')
     with self.assertRaises(biobank_samples_pipeline.DataError):
         biobank_samples_pipeline.upsert_from_latest_csv()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: biobank_samples_pipeline_test.py Projeto: lrwb-aou/raw-data-repository

    def test_end_to_end(self):
        dao = BiobankStoredSampleDao()
        self.assertEquals(dao.count(), 0)

        # Create 3 participants and pass their (random) IDs into sample rows.
        summary_dao = ParticipantSummaryDao()
        biobank_ids = []
        participant_ids = []
        nids = 16  # equal to the number of parent rows in 'biobank_samples_1.csv'
        cids = 1  # equal to the number of child rows in 'biobank_samples_1.csv'

        for _ in xrange(nids):
            participant = self.participant_dao.insert(Participant())
            summary_dao.insert(self.participant_summary(participant))
            participant_ids.append(participant.participantId)
            biobank_ids.append(participant.biobankId)
            self.assertEquals(
                summary_dao.get(
                    participant.participantId).numBaselineSamplesArrived, 0)

        test_codes = random.sample(_BASELINE_TESTS, nids)
        samples_file = test_data.open_biobank_samples(biobank_ids=biobank_ids,
                                                      tests=test_codes)
        lines = samples_file.split('\n')[1:]  # remove field name line

        input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central(
            clock.CLOCK.now()).strftime(
                biobank_samples_pipeline.INPUT_CSV_TIME_FORMAT)
        self._write_cloud_csv(input_filename, samples_file)
        biobank_samples_pipeline.upsert_from_latest_csv()

        self.assertEquals(dao.count(), nids - cids)

        for x in range(0, nids):
            cols = lines[x].split('\t')

            if cols[10].strip():  # skip child sample
                continue

            # If status is 'In Prep', then sample confirmed timestamp should be empty
            if cols[2] == 'In Prep':
                self.assertEquals(len(cols[11]), 0)
            else:
                status = SampleStatus.RECEIVED
                ts_str = cols[11]
                # DA-814 - Participant Summary test status should be: Unset, Received or Disposed only.
                # If sample is disposed, then check disposed timestamp, otherwise check confirmed timestamp.
                # DA-871 - Only check status is disposed when reason code is a bad disposal.
                if cols[2] == 'Disposed' and get_sample_status_enum_value(
                        cols[8]) > SampleStatus.UNKNOWN:
                    status = SampleStatus.DISPOSED
                    ts_str = cols[9]

                ts = datetime.datetime.strptime(ts_str, '%Y/%m/%d %H:%M:%S')
                self._check_summary(participant_ids[x], test_codes[x], ts,
                                    status)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: main.py Projeto: QiaohongHu/raw-data-repository

def import_biobank_samples():
    # Note that crons always have a 10 minute deadline instead of the normal 60s; additionally our
    # offline service uses basic scaling with has no deadline.
    logging.info('Starting samples import.')
    written, timestamp = biobank_samples_pipeline.upsert_from_latest_csv()
    logging.info('Import complete (%d written), generating report.', written)

    logging.info('Generating reconciliation report.')
    biobank_samples_pipeline.write_reconciliation_report(timestamp)
    logging.info('Generated reconciliation report.')
    return json.dumps({'written': written})