Пример #1
0
def populate(
    dal: DalClinicalTrials,
    num_days: Optional[int] = None,
    chunk_size: Optional[int] = 1000,
    skip_populated: Optional[bool] = False,
    dry_run: Optional[bool] = False,
):

    with dal.session_scope() as session:  # type: sqlalchemy.orm.Session

        studies_chunks = find_recent_studies(
            session=session,
            num_days=num_days,
            chunk_size=chunk_size,
            skip_populated=skip_populated,
        )
        for studies_chunk in studies_chunks:
            studies = list(studies_chunk)  # type: List[Study]
            logger.warning(len(studies))
            for study in studies:
                logger.info(f"Processing study with ID {study.study_id}")

                for location in study.locations:
                    logger.info(
                        f"Processing location with ID {location.location_id}")

                    facility_id = location.facility_id
                    facility_canonical_id = (
                        location.facility.facility_canonical_id)

                    _prefix = "[DRY RUN] " if dry_run else ""

                    logger.info(
                        f"{_prefix}IODUing `StudyFacility` with a `study_id`"
                        f" of '{study.study_id}', `facility_id` of "
                        f"{facility_id}, and `facility_canonical_id` of "
                        f"'{facility_canonical_id}'.")

                    if not dry_run:
                        dal.iodu_study_facility(
                            study_id=study.study_id,
                            facility_id=facility_id,
                            facility_canonical_id=facility_canonical_id,
                        )
Пример #2
0
def find_recent_unmatched_facilities(dal: DalClinicalTrials):

    with dal.session_scope() as session:  # type: sqlalchemy.orm.Session
        query = session.query(Facility)
        query = query.join(Facility.studies)
        query = query.join(Study.study_dates)

        # Filter down to facilities not matched with a canonical facility.
        query = query.filter(Facility.facility_canonical_id.is_(None))

        # Filter down to studies updated in the last 2 days.
        query = query.filter(
            StudyDates.last_update_posted > (datetime.date.today() -
                                             datetime.timedelta(days=2)))

        query = query.group_by(Facility.facility_id)

        facilities_unmatched = query.all()

    return facilities_unmatched
Пример #3
0
def match(dal: DalClinicalTrials, retriever: RetrieverGoogleMaps):

    logger.info(
        "Retrieving unmatched facilities linked to studies updated in the "
        "past 2 days.")

    facilities_unmatched = find_recent_unmatched_facilities(dal=dal)

    logger.info(
        f"Retrieved {len(facilities_unmatched)} unmatched facilities linked "
        f"to studies updated in the past 2 days.")

    for facility in facilities_unmatched:

        logger.info(f"Processing facility {facility}.")

        logger.info(f"Matching facility {facility} against a Google Place.")

        place_response = find_facility_google_place(retriever=retriever,
                                                    facility=facility)

        # Skip empty responses.
        if not place_response or not place_response.get("candidates"):
            logger.warning(
                f"No Google Place match found for facility {facility}."
                f" Skipping.")
            continue

        # Retrieving Google Place ID from the first candidate.
        google_place_id = place_response["candidates"][0]["place_id"]

        logger.info(f"Google Place with ID '{google_place_id}' found matching "
                    f" facility {facility}.")

        with dal.session_scope() as session:  # type: sqlalchemy.orm.Session
            # noinspection PyTypeChecker
            facility_canonical = dal.get_by_attr(
                orm_class=FacilityCanonical,
                attr_name="google_place_id",
                attr_value=google_place_id,
                session=session,
            )  # type: FacilityCanonical

            if facility_canonical:
                facility_canonical_id = facility_canonical.facility_canonical_id

                logger.info(
                    f"Google Place with ID '{google_place_id}' previously "
                    f"stored as canonical facility {facility_canonical}.")
            else:
                logger.info(
                    f"Google Place with ID '{google_place_id}' not previously "
                    f"stored as a canonical facility.")

                logger.info(
                    f"Retrieving Google Place details for Google Place with "
                    f"ID '{google_place_id}'.")

                details_response = get_place_details(
                    google_place_id=google_place_id, retriever=retriever)

                if not details_response:
                    logger.warning(
                        f"No Google Place details retrieved for Google Place "
                        f"with ID '{google_place_id}'. Skipping.")
                    continue

                facility_canonical_id = iodu_canonical_facility_from_google(
                    dal=dal,
                    google_place_id=google_place_id,
                    google_response=details_response,
                )

            logger.info(f"Linking facility {facility} with canonical facility "
                        f"{facility_canonical}.")

            dal.update_attr_value(
                orm_class=Facility,
                pk=facility.facility_id,
                attr_name="facility_canonical_id",
                attr_value=facility_canonical_id,
                session=session,
            )