Python BQRecord示例，model.bq_base.BQRecord Python示例

示例#1

0

显示文件

文件： bigquery_schema_test.py 项目： sujaypatil96/raw-data-repository

    def test_schema_nested_data(self):
        """ test a BQRecord object with schema and nested data """
        record = BQRecord(schema=BQTestSchema,
                          data=self.full_data,
                          convert_to_enum=False)
        new_data = record.to_dict()

        self.assertEqual(self.full_data, new_data)
        # alter some data and verify we are not equal anymore.
        new_data['nested'][0]['int_field'] = 55
        self.assertNotEqual(self.full_data, new_data)

示例#2

0

显示文件

文件： bq_pdr_participant_summary_dao.py 项目： sujaypatil96/raw-data-repository

    def make_bqrecord(self, p_id, convert_to_enum=False, ps_bqr=None):
        """
    Build a Participant Summary BQRecord object for the given participant id.
    :param p_id: participant id
    :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum.
    :param ps_bqr: A BQParticipantSummary BQRecord object.
    :return: BQRecord object
    """
        # Since we are primarily a subset of the Participant Summary, call the full Participant Summary generator
        # and take what we need from it.
        if not ps_bqr:
            ps_bqr = BQParticipantSummaryGenerator().make_bqrecord(
                p_id, convert_to_enum=convert_to_enum)
        bqr = BQRecord(schema=BQPDRParticipantSummarySchema,
                       data=ps_bqr.to_dict(),
                       convert_to_enum=convert_to_enum)

        if hasattr(bqr, 'addr_zip') and getattr(bqr, 'addr_zip'):
            setattr(bqr, 'addr_zip', getattr(bqr, 'addr_zip')[:3])

        summary = bqr.to_dict()
        # Populate BQAnalyticsBiospecimenSchema if there are biobank orders.
        if hasattr(ps_bqr, 'biobank_orders'):
            data = {'biospec': list()}
            for order in ps_bqr.biobank_orders:
                # Count the number of DNA tests in this order.
                dna_tests = 0
                for test in order.get('bbo_samples', list()):
                    if test['bbs_dna_test'] == 1:
                        dna_tests += 1

                data['biospec'].append({
                    'biosp_status':
                    order.get('bbo_status', None),
                    'biosp_status_id':
                    order.get('bbo_status_id', None),
                    'biosp_order_time':
                    order.get('bbo_created', None),
                    'biosp_isolate_dna':
                    dna_tests
                })

            summary = self._merge_schema_dicts(summary, data)

        # Calculate UBR
        summary = self._merge_schema_dicts(summary,
                                           self._calculate_ubr(ps_bqr))

        bqr = BQRecord(schema=BQPDRParticipantSummarySchema,
                       data=summary,
                       convert_to_enum=convert_to_enum)
        return bqr

示例#3

0

显示文件

    def _prep_the_basics(self, p_id, ro_session):
        """
    Get the participant's race and gender selections
    :param p_id: participant id
    :param ro_session: Readonly DAO session object
    :return: dict
    """
        qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers',
                                      args=['TheBasics', p_id])
        if not qnans or len(qnans) == 0:
            return {}

        # get race question answers
        qnan = BQRecord(schema=None,
                        data=qnans[0])  # use only most recent questionnaire.
        data = {}
        if qnan.Race_WhatRaceEthnicity:
            rl = list()
            for val in qnan.Race_WhatRaceEthnicity.split(','):
                rl.append({
                    'race': val,
                    'race_id': self._lookup_code_id(val, ro_session)
                })
            data['races'] = rl
        # get gender question answers
        gl = list()
        if qnan.Gender_GenderIdentity:
            for val in qnan.Gender_GenderIdentity.split(','):
                if val == 'GenderIdentity_AdditionalOptions':
                    continue
                gl.append({
                    'gender': val,
                    'gender_id': self._lookup_code_id(val, ro_session)
                })
        # get additional gender answers, if any.
        if qnan.GenderIdentity_SexualityCloserDescription:
            for val in qnan.GenderIdentity_SexualityCloserDescription.split(
                    ','):
                gl.append({
                    'gender': val,
                    'gender_id': self._lookup_code_id(val, ro_session)
                })

        if len(gl) > 0:
            data['genders'] = gl

        data['education'] = qnan.EducationLevel_HighestGrade
        data['education_id'] = self._lookup_code_id(
            qnan.EducationLevel_HighestGrade, ro_session)
        data['income'] = qnan.Income_AnnualIncome
        data['income_id'] = self._lookup_code_id(qnan.Income_AnnualIncome,
                                                 ro_session)
        data['sex'] = qnan.BiologicalSexAtBirth_SexAtBirth
        data['sex_id'] = self._lookup_code_id(
            qnan.BiologicalSexAtBirth_SexAtBirth, ro_session)
        data['sexual_orientation'] = qnan.TheBasics_SexualOrientation
        data['sexual_orientation_id'] = self._lookup_code_id(
            qnan.TheBasics_SexualOrientation, ro_session)

        return data

示例#4

0

显示文件

 def make_bqrecord(self, hpo_id, convert_to_enum=False, backup=True):
   """
   Build a BQRecord object from the given hpo id.
   :param hpo_id: Primary key value from hpo table.
   :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum.
   :param backup: if True, get from backup database
   :return: BQRecord object
   """
   ro_dao = BigQuerySyncDao(backup=backup)
   with ro_dao.session() as ro_session:
     row = ro_session.execute(text('select * from hpo where hpo_id = :id'), {'id': hpo_id}).first()
     data = ro_dao.to_dict(row)
     return BQRecord(schema=BQHPOSchema, data=data, convert_to_enum=convert_to_enum)

示例#5

0

显示文件

    def make_bqrecord(self, p_id, convert_to_enum=False):
        """
    Build a Participant Summary BQRecord object for the given participant id.
    :param p_id: participant id
    :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum.
    :return: BQRecord object
    """
        if not self.ro_dao:
            self.ro_dao = BigQuerySyncDao(backup=True)

        with self.ro_dao.session() as session:
            # prep participant info from Participant record
            summary = self._prep_participant(p_id, session)
            # prep ConsentPII questionnaire information
            summary = self._merge_schema_dicts(
                summary, self._prep_consentpii_answers(p_id, session))
            # prep questionnaire modules information, includes gathering extra consents.
            summary = self._merge_schema_dicts(
                summary, self._prep_modules(p_id, session))
            # prep physical measurements
            summary = self._merge_schema_dicts(
                summary, self._prep_physical_measurements(p_id, session))
            # prep race and gender
            summary = self._merge_schema_dicts(
                summary, self._prep_the_basics(p_id, session))
            # prep biobank orders and samples
            summary = self._merge_schema_dicts(
                summary, self._prep_biobank_info(p_id, session))
            # calculate enrollment status for participant
            summary = self._merge_schema_dicts(
                summary, self._calculate_enrollment_status(summary))
            # calculate distinct visits
            summary = self._merge_schema_dicts(
                summary, self._calculate_distinct_visits(summary))

            return BQRecord(schema=BQParticipantSummarySchema,
                            data=summary,
                            convert_to_enum=convert_to_enum)

示例#6

0

显示文件

文件： bigquery_schema_test.py 项目： sujaypatil96/raw-data-repository

 def test_record_from_bq_data(self):
     """ test receiving data from bigquery """
     schema = BQSchema(schemaFromBQ)
     record = BQRecord(schema=schema, data=self.bq_data)
     new_data = record.to_dict()
     self.assertEqual(self.full_data, new_data)

示例#7

0

显示文件

文件： bigquery_schema_test.py 项目： sujaypatil96/raw-data-repository

 def test_schema_with_data(self):
     """ test a BQRecord object with schema and data """
     record = BQRecord(schema=BQTestSchema, data=self.partial_data)
     self.assertEqual(self.partial_data, record.to_dict())

示例#8

0

显示文件

文件： bigquery_schema_test.py 项目： sujaypatil96/raw-data-repository

 def test_schema_no_data(self):
     """ test a BQRecord object with only schema """
     record = BQRecord(schema=BQTestSchema, data=None)
     # add partial data
     record.update_values(self.partial_data)
     self.assertEqual(self.partial_data, record.to_dict())

示例#9

0

显示文件

文件： bq_questionaire_dao.py 项目： sujaypatil96/raw-data-repository

    def make_bqrecord(self,
                      p_id,
                      module_id,
                      latest=False,
                      convert_to_enum=False):
        """
    Generate a list of questionnaire module BQRecords for the given participant id.
    :param p_id: participant id
    :param module_id: A questionnaire module id, IE: 'TheBasics'.
    :param latest: only process the most recent response if True
    :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum.
    :return: BQTable object, List of BQRecord objects
    """
        if not self.ro_dao:
            self.ro_dao = BigQuerySyncDao(backup=True)

        if module_id == 'TheBasics':
            table = BQPDRTheBasics
        elif module_id == 'ConsentPII':
            table = BQPDRConsentPII
        elif module_id == 'Lifestyle':
            table = BQPDRLifestyle
        elif module_id == 'OverallHealth':
            table = BQPDROverallHealth
        elif module_id == 'DVEHRSharing':
            table = BQPDRDVEHRSharing
        elif module_id == 'EHRConsentPII':
            table = BQPDREHRConsentPII
        elif module_id == 'FamilyHistory':
            table = BQPDRFamilyHistory
        elif module_id == 'HealthcareAccess':
            table = BQPDRHealthcareAccess
        elif module_id == 'PersonalMedicalHistory':
            table = BQPDRPersonalMedicalHistory
        else:
            logging.info(
                'Generator: ignoring questionnaire module id {0}.'.format(
                    module_id))
            return None, list()

        qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers',
                                      args=[module_id, p_id])
        if not qnans or len(qnans) == 0:
            return None, list()

        bqrs = list()
        for qnan in qnans:
            bqr = BQRecord(schema=table().get_schema(),
                           data=qnan,
                           convert_to_enum=convert_to_enum)
            bqr.participant_id = p_id  # reset participant_id.

            fields = bqr.get_fields()
            for field in fields:
                fld_name = field['name']
                if fld_name in ('id', 'created', 'modified', 'authored',
                                'language', 'participant_id',
                                'questionnaire_response_id'):
                    continue

                fld_value = getattr(bqr, fld_name, None)
                if fld_value is None:  # Let empty strings pass.
                    continue
                # question responses values need to be coerced to a String type.
                if isinstance(fld_value, (datetime.date, datetime.datetime)):
                    setattr(bqr, fld_name, fld_value.isoformat())
                else:
                    try:
                        setattr(bqr, fld_name, str(fld_value))
                    except UnicodeEncodeError:
                        setattr(bqr, fld_name, unicode(fld_value))

                # Truncate zip codes to 3 digits
                if fld_name in ('StreetAddress_PIIZIP',
                                'EmploymentWorkAddress_ZipCode'
                                ) and len(fld_value) > 2:
                    setattr(bqr, fld_name, fld_value[:3])

            bqrs.append(bqr)
            if latest:
                break

        return table, bqrs

示例#10

0

显示文件

    def _prep_modules(self, p_id, ro_session):
        """
    Find all questionnaire modules the participant has completed and loop through them.
    :param p_id: participant id
    :param ro_session: Readonly DAO session object
    :return: dict
    """
        code_id_query = ro_session.query(func.max(QuestionnaireConcept.codeId)).\
                            filter(QuestionnaireResponse.questionnaireId ==
                                    QuestionnaireConcept.questionnaireId).label('codeId')
        query = ro_session.query(
                      QuestionnaireResponse.questionnaireResponseId, QuestionnaireResponse.authored,
                      QuestionnaireResponse.created, QuestionnaireResponse.language, code_id_query).\
                    filter(QuestionnaireResponse.participantId == p_id).\
                    order_by(QuestionnaireResponse.questionnaireResponseId)
        # sql = self.dao.query_to_text(query)
        results = query.all()

        data = dict()
        modules = list()
        consents = list()
        baseline_modules = ['TheBasics', 'OverallHealth', 'Lifestyle']
        try:
            baseline_modules = config.getSettingList(
                'baseline_ppi_questionnaire_fields')
        except ValueError:
            pass
        except AssertionError:  # unittest errors because of GCP SDK
            pass

        consent_modules = {
            # module: question code string
            'DVEHRSharing': 'DVEHRSharing_AreYouInterested',
            'EHRConsentPII': 'EHRConsentPII_ConsentPermission',
        }

        if results:
            for row in results:
                module_name = self._lookup_code_value(row.codeId, ro_session)
                modules.append({
                    'mod_module':
                    module_name,
                    'mod_baseline_module':
                    1
                    if module_name in baseline_modules else 0,  # Boolean field
                    'mod_authored':
                    row.authored,
                    'mod_created':
                    row.created,
                    'mod_language':
                    row.language,
                    'mod_status':
                    BQModuleStatusEnum.SUBMITTED.name,
                    'mod_status_id':
                    BQModuleStatusEnum.SUBMITTED.value,
                })

                # check if this is a module with consents.
                if module_name not in consent_modules:
                    continue
                qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers',
                                              args=[module_name, p_id])
                if qnans and len(qnans) > 0:
                    qnan = BQRecord(
                        schema=None,
                        data=qnans[0])  # use only most recent questionnaire.
                    consents.append({
                        'consent':
                        consent_modules[module_name],
                        'consent_id':
                        self._lookup_code_id(consent_modules[module_name],
                                             ro_session),
                        'consent_date':
                        parser.parse(qnan.authored).date()
                        if qnan.authored else None,
                        'consent_value':
                        qnan[consent_modules[module_name]],
                        'consent_value_id':
                        self._lookup_code_id(
                            qnan[consent_modules[module_name]], ro_session),
                    })

        if len(modules) > 0:
            data['modules'] = modules
            if len(consents) > 0:
                data['consents'] = consents

        return data

示例#11

0

显示文件

    def _prep_consentpii_answers(self, p_id, ro_session):
        """
    Get participant information from the ConsentPII questionnaire
    :param p_id: participant id
    :param ro_session: Readonly DAO session object
    :return: dict
    """
        qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers',
                                      args=['ConsentPII', p_id])
        if not qnans or len(qnans) == 0:
            # return the minimum data required when we don't have the questionnaire data.
            return {'email': None, 'is_ghost_id': 0}
        qnan = BQRecord(schema=None,
                        data=qnans[0])  # use only most recent response.

        # TODO: We may need to use the first response to set consent dates,
        #  unless the consent value changed across response records.

        data = {
            'first_name':
            qnan.PIIName_First,
            'middle_name':
            qnan.PIIName_Middle,
            'last_name':
            qnan.PIIName_Last,
            'date_of_birth':
            qnan.PIIBirthInformation_BirthDate,
            'primary_language':
            qnan.language,
            'email':
            qnan.ConsentPII_EmailAddress,
            'phone_number':
            qnan.PIIContactInformation_Phone,
            'login_phone_number':
            qnan.ConsentPII_VerifiedPrimaryPhoneNumber,
            'addresses': [{
                'addr_type':
                BQStreetAddressTypeEnum.RESIDENCE.name,
                'addr_type_id':
                BQStreetAddressTypeEnum.RESIDENCE.value,
                'addr_street_address_1':
                qnan.PIIAddress_StreetAddress,
                'addr_street_address_2':
                qnan.PIIAddress_StreetAddress2,
                'addr_city':
                qnan.StreetAddress_PIICity,
                'addr_state':
                qnan.StreetAddress_PIIState.replace('PIIState_', '').upper()
                if qnan.StreetAddress_PIIState else None,
                'addr_zip':
                qnan.StreetAddress_PIIZIP,
                'addr_country':
                'US'
            }],
            'consents': [
                {
                    'consent':
                    'ConsentPII',
                    'consent_id':
                    self._lookup_code_id('ConsentPII', ro_session),
                    'consent_date':
                    parser.parse(qnan.authored).date()
                    if qnan.authored else None,
                    'consent_value':
                    'ConsentPermission_Yes',
                    'consent_value_id':
                    self._lookup_code_id('ConsentPermission_Yes', ro_session),
                },
            ]
        }

        return data