def main(args): skip_count = 0 new_or_updated_count = 0 matched_count = 0 with open(args.file, 'r') as csv_file: sites_reader = csv.DictReader(csv_file) hpo_dao = HPODao() site_dao = SiteDao() existing_site_map = { site.googleGroup: site for site in site_dao.get_all() } with site_dao.session() as session: for row in sites_reader: site = _site_from_row(row, hpo_dao) if site is None: skip_count += 1 continue changed = _upsert_site(site, existing_site_map.get(site.googleGroup), site_dao, session, args.dry_run) if changed: new_or_updated_count += 1 else: matched_count += 1 logging.info( 'Done%s. %d skipped, %d sites new/updated, %d sites not changed.', ' (dry run)' if args.dry_run else '', skip_count, new_or_updated_count, matched_count)
class OrganizationHierarchySyncDao(BaseDao): def __init__(self): super(OrganizationHierarchySyncDao, self).__init__(HPO) self.hpo_dao = HPODao() self.organization_dao = OrganizationDao() self.site_dao = SiteDao() def from_client_json(self, resource_json, id_=None, expected_version=None, client_id=None): # pylint: disable=unused-argument try: fhir_org = lib_fhir.fhirclient_3_0_0.models.organization.Organization( resource_json) except FHIRValidationError: raise BadRequest('Invalid FHIR format in payload data.') if not fhir_org.meta or not fhir_org.meta.versionId: raise BadRequest('No versionId info found in payload data.') try: fhir_org.version = int(fhir_org.meta.versionId) except ValueError: raise BadRequest('Invalid versionId in payload data.') return fhir_org def to_client_json(self, hierarchy_org_obj): return hierarchy_org_obj.as_json() def get_etag(self, id_, pid): # pylint: disable=unused-argument return None def update(self, hierarchy_org_obj): obj_type = self._get_type(hierarchy_org_obj) operation_funcs = { 'AWARDEE': self._update_awardee, 'ORGANIZATION': self._update_organization, 'SITE': self._update_site } if obj_type not in operation_funcs: raise BadRequest('No awardee-type info found in payload data.') operation_funcs[obj_type](hierarchy_org_obj) def _update_awardee(self, hierarchy_org_obj): if hierarchy_org_obj.id is None: raise BadRequest('No id found in payload data.') awardee_id = self._get_value_from_identifier( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'awardee-id') if awardee_id is None: raise BadRequest( 'No organization-identifier info found in payload data.') is_obsolete = ObsoleteStatus( 'OBSOLETE') if not hierarchy_org_obj.active else None awardee_type = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'awardee-type') try: organization_type = OrganizationType(awardee_type) if organization_type == OrganizationType.UNSET: organization_type = None except TypeError: raise BadRequest( 'Invalid organization type {} for awardee {}'.format( awardee_type, awardee_id)) entity = HPO(name=awardee_id.upper(), displayName=hierarchy_org_obj.name, organizationType=organization_type, isObsolete=is_obsolete, resourceId=hierarchy_org_obj.id) existing_map = { entity.name: entity for entity in self.hpo_dao.get_all() } existing_entity = existing_map.get(entity.name) with self.hpo_dao.session() as session: if existing_entity: hpo_id = existing_entity.hpoId new_dict = entity.asdict() new_dict['hpoId'] = None existing_dict = existing_entity.asdict() existing_dict['hpoId'] = None if existing_dict == new_dict: logging.info('Not updating {}.'.format(new_dict['name'])) else: existing_entity.displayName = entity.displayName existing_entity.organizationType = entity.organizationType existing_entity.isObsolete = entity.isObsolete existing_entity.resourceId = entity.resourceId self.hpo_dao.update_with_session(session, existing_entity) else: entity.hpoId = len(existing_map) hpo_id = entity.hpoId self.hpo_dao.insert_with_session(session, entity) bq_hpo_update_by_id(hpo_id) def _update_organization(self, hierarchy_org_obj): if hierarchy_org_obj.id is None: raise BadRequest('No id found in payload data.') organization_id = self._get_value_from_identifier( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'organization-id') if organization_id is None: raise BadRequest( 'No organization-identifier info found in payload data.') is_obsolete = ObsoleteStatus( 'OBSOLETE') if not hierarchy_org_obj.active else None resource_id = self._get_reference(hierarchy_org_obj) hpo = self.hpo_dao.get_by_resource_id(resource_id) if hpo is None: raise BadRequest( 'Invalid partOf reference {} importing organization {}'.format( resource_id, organization_id)) entity = Organization(externalId=organization_id.upper(), displayName=hierarchy_org_obj.name, hpoId=hpo.hpoId, isObsolete=is_obsolete, resourceId=hierarchy_org_obj.id) existing_map = { entity.externalId: entity for entity in self.organization_dao.get_all() } existing_entity = existing_map.get(entity.externalId) with self.organization_dao.session() as session: if existing_entity: new_dict = entity.asdict() new_dict['organizationId'] = None existing_dict = existing_entity.asdict() existing_dict['organizationId'] = None if existing_dict == new_dict: logging.info('Not updating {}.'.format( new_dict['externalId'])) else: existing_entity.displayName = entity.displayName existing_entity.hpoId = entity.hpoId existing_entity.isObsolete = entity.isObsolete existing_entity.resourceId = entity.resourceId self.organization_dao.update_with_session( session, existing_entity) else: self.organization_dao.insert_with_session(session, entity) org_id = self.organization_dao.get_by_external_id( organization_id.upper()).organizationId bq_organization_update_by_id(org_id) def _update_site(self, hierarchy_org_obj): if hierarchy_org_obj.id is None: raise BadRequest('No id found in payload data.') google_group = self._get_value_from_identifier( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'site-id') if google_group is None: raise BadRequest( 'No organization-identifier info found in payload data.') google_group = google_group.lower() is_obsolete = ObsoleteStatus( 'OBSOLETE') if not hierarchy_org_obj.active else None resource_id = self._get_reference(hierarchy_org_obj) organization = self.organization_dao.get_by_resource_id(resource_id) if organization is None: raise BadRequest( 'Invalid partOf reference {} importing site {}'.format( resource_id, google_group)) launch_date = None launch_date_str = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'anticipatedLaunchDate', 'valueDate') if launch_date_str: try: launch_date = parse(launch_date_str).date() except ValueError: raise BadRequest('Invalid launch date {} for site {}'.format( launch_date_str, google_group)) name = hierarchy_org_obj.name mayolink_client_number = None mayolink_client_number_str = self._get_value_from_identifier( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'mayo-link-identifier') if mayolink_client_number_str: try: mayolink_client_number = int(mayolink_client_number_str) except ValueError: raise BadRequest( 'Invalid Mayolink Client # {} for site {}'.format( mayolink_client_number_str, google_group)) notes = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'notes') site_status_bool = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'schedulingStatusActive', 'valueBoolean') try: site_status = SiteStatus( 'ACTIVE' if site_status_bool else 'INACTIVE') except TypeError: raise BadRequest('Invalid site status {} for site {}'.format( site_status, google_group)) enrolling_status_bool = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'enrollmentStatusActive', 'valueBoolean') try: enrolling_status = EnrollingStatus( 'ACTIVE' if enrolling_status_bool else 'INACTIVE') except TypeError: raise BadRequest( 'Invalid enrollment site status {} for site {}'.format( enrolling_status_bool, google_group)) digital_scheduling_bool = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'digitalSchedulingStatusActive', 'valueBoolean') try: digital_scheduling_status = DigitalSchedulingStatus( 'ACTIVE' if digital_scheduling_bool else 'INACTIVE') except TypeError: raise BadRequest( 'Invalid digital scheduling status {} for site {}'.format( digital_scheduling_bool, google_group)) directions = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'directions') physical_location_name = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'locationName') address_1, address_2, city, state, zip_code = self._get_address( hierarchy_org_obj) phone = self._get_contact_point(hierarchy_org_obj, 'phone') admin_email_addresses = self._get_contact_point( hierarchy_org_obj, 'email') link = self._get_contact_point(hierarchy_org_obj, 'url') schedule_instructions = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'schedulingInstructions') entity = Site(siteName=name, googleGroup=google_group, mayolinkClientNumber=mayolink_client_number, organizationId=organization.organizationId, hpoId=organization.hpoId, siteStatus=site_status, enrollingStatus=enrolling_status, digitalSchedulingStatus=digital_scheduling_status, scheduleInstructions=schedule_instructions, scheduleInstructions_ES='', launchDate=launch_date, notes=notes, notes_ES='', directions=directions, physicalLocationName=physical_location_name, address1=address_1, address2=address_2, city=city, state=state, zipCode=zip_code, phoneNumber=phone, adminEmails=admin_email_addresses, link=link, isObsolete=is_obsolete, resourceId=hierarchy_org_obj.id) existing_map = { entity.googleGroup: entity for entity in self.site_dao.get_all() } existing_entity = existing_map.get(entity.googleGroup) with self.site_dao.session() as session: if existing_entity: self._populate_lat_lng_and_time_zone(entity, existing_entity) if entity.siteStatus == SiteStatus.ACTIVE and \ (entity.latitude is None or entity.longitude is None): raise BadRequest( 'Active site without geocoding: {}'.format( entity.googleGroup)) new_dict = entity.asdict() new_dict['siteId'] = None existing_dict = existing_entity.asdict() existing_dict['siteId'] = None if existing_dict == new_dict: logging.info('Not updating {}.'.format( new_dict['googleGroup'])) else: for k, v in entity.asdict().iteritems(): if k != 'siteId' and k != 'googleGroup': setattr(existing_entity, k, v) self.site_dao.update_with_session(session, existing_entity) else: self._populate_lat_lng_and_time_zone(entity, None) if entity.siteStatus == SiteStatus.ACTIVE and \ (entity.latitude is None or entity.longitude is None): raise BadRequest( 'Active site without geocoding: {}'.format( entity.googleGroup)) self.site_dao.insert_with_session(session, entity) site_id = self.site_dao.get_by_google_group(google_group).siteId bq_site_update_by_id(site_id) def _get_type(self, hierarchy_org_obj): obj_type = None type_arr = hierarchy_org_obj.type for type_item in type_arr: code_arr = type_item.coding for code_item in code_arr: if code_item.system == HIERARCHY_CONTENT_SYSTEM_PREFIX + 'type': obj_type = code_item.code break return obj_type def _get_value_from_identifier(self, hierarchy_org_obj, system): identifier_arr = hierarchy_org_obj.identifier for identifier in identifier_arr: if identifier.system == system: return identifier.value else: return None def _get_value_from_extention(self, hierarchy_org_obj, url, value_key='valueString'): extension_arr = hierarchy_org_obj.extension for extension in extension_arr: if extension.url == url: ext_json = extension.as_json() return ext_json[value_key] else: return None def _get_contact_point(self, hierarchy_org_obj, code): contact_arr = hierarchy_org_obj.contact for contact in contact_arr: telecom_arr = contact.telecom for telecom in telecom_arr: if telecom.system == code: return telecom.value else: return None def _get_address(self, hierarchy_org_obj): address = hierarchy_org_obj.address[0] address_1 = address.line[0] if len(address.line) > 0 else '' address_2 = address.line[1] if len(address.line) > 1 else '' city = address.city state = address.state postal_code = address.postalCode return address_1, address_2, city, state, postal_code def _get_reference(self, hierarchy_org_obj): try: return hierarchy_org_obj.partOf.reference.split('/')[1] except IndexError: return None def _populate_lat_lng_and_time_zone(self, site, existing_site): if site.address1 and site.city and site.state: if existing_site: if (existing_site.address1 == site.address1 and existing_site.city == site.city and existing_site.state == site.state and existing_site.latitude is not None and existing_site.longitude is not None and existing_site.timeZoneId is not None): # Address didn't change, use the existing lat/lng and time zone. site.latitude = existing_site.latitude site.longitude = existing_site.longitude site.timeZoneId = existing_site.timeZoneId return latitude, longitude = self._get_lat_long_for_site( site.address1, site.city, site.state) site.latitude = latitude site.longitude = longitude if latitude and longitude: site.timeZoneId = self._get_time_zone(latitude, longitude) else: if site.googleGroup not in self.status_exception_list: if site.siteStatus == self.ACTIVE: logging.warn( 'Active site must have valid address. Site: {}, Group: {}' .format(site.siteName, site.googleGroup)) def _get_lat_long_for_site(self, address_1, city, state): self.full_address = address_1 + ' ' + city + ' ' + state try: self.api_key = os.environ.get('API_KEY') self.gmaps = googlemaps.Client(key=self.api_key) try: geocode_result = self.gmaps.geocode(address_1 + '' + city + ' ' + state)[0] except IndexError: logging.warn('Bad address for {}, could not geocode.'.format( self.full_address)) return None, None if geocode_result: geometry = geocode_result.get('geometry') if geometry: location = geometry.get('location') if location: latitude = location.get('lat') longitude = location.get('lng') return latitude, longitude else: logging.warn('Can not find lat/long for %s', self.full_address) return None, None else: logging.warn('Geocode results failed for %s.', self.full_address) return None, None except ValueError as e: logging.exception('Invalid geocode key: %s. ERROR: %s', self.api_key, e) return None, None except IndexError as e: logging.exception( 'Geocoding failure Check that address is correct. ERROR: %s', e) return None, None def _get_time_zone(self, latitude, longitude): time_zone = self.gmaps.timezone(location=(latitude, longitude)) if time_zone['status'] == 'OK': time_zone_id = time_zone['timeZoneId'] return time_zone_id else: logging.info('can not retrieve time zone from %s', self.full_address) return None