Exemplo n.º 1
0
    def test_get_region_same_name_different_ingest_type_reverse(
            self, mock_environment):
        mock_environment.return_value = 'staging'

        us_pa_direct_ingest_region = regions.get_region('us_pa',
                                                        is_direct_ingest=True)
        self.assertEqual(us_pa_direct_ingest_region.get_ingestor_class(),
                         UsPaController)

        us_pa_scraper_region = regions.get_region('us_pa',
                                                  is_direct_ingest=False)
        self.assertEqual(us_pa_scraper_region.get_ingestor_class(),
                         UsPaScraper)
    def setUp(self) -> None:
        super().setUp()
        view_builders = DirectIngestPreProcessedIngestViewCollector(
            get_region(STATE_CODE, is_direct_ingest=True), []
        ).collect_view_builders()
        self.view_builder = one(
            view
            for view in view_builders
            if view.file_tag == "sci_incarceration_period"
        )

        self.expected_result_columns = [
            "control_number",
            "inmate_number",
            "sequence_number",
            "start_movement_date",
            "end_movement_date",
            "location",
            "start_sentence_status_code",
            "end_sentence_status_code",
            "start_parole_status_code",
            "end_parole_status_code",
            "start_movement_code",
            "end_movement_code",
            "start_is_new_revocation",
            "start_is_admin_edge",
            "end_is_admin_edge",
            "sentence_type",
        ]
Exemplo n.º 3
0
def controller_for_region_code(
        region_code: str,
        allow_unlaunched: bool = False) -> BaseDirectIngestController:
    """Returns an instance of the region's controller, if one exists."""
    if region_code not in get_supported_direct_ingest_region_codes():
        raise DirectIngestError(
            msg=
            f"Unsupported direct ingest region [{region_code}] in project [{metadata.project_id()}]",
            error_type=DirectIngestErrorType.INPUT_ERROR,
        )

    try:
        region = regions.get_region(region_code, is_direct_ingest=True)
    except FileNotFoundError:
        raise DirectIngestError(
            msg=f"Region [{region_code}] has no registered manifest",
            error_type=DirectIngestErrorType.INPUT_ERROR,
        )

    if not allow_unlaunched and not region.is_ingest_launched_in_env():
        check_is_region_launched_in_env(region)

    controller = region.get_ingestor()

    if not isinstance(controller, BaseDirectIngestController):
        raise DirectIngestError(
            msg=
            f"Controller for direct ingest region [{region_code}] has unexpected type [{type(controller)}]",
            error_type=DirectIngestErrorType.INPUT_ERROR,
        )

    return controller
Exemplo n.º 4
0
    def test_raw_files_yaml_parses_all_regions(self) -> None:
        for region_code in self.region_dir_names:
            region = get_region(
                region_code,
                is_direct_ingest=True,
                region_module_override=self.region_module_override,
            )

            controller_class = region.get_ingestor_class()
            if not issubclass(controller_class, GcsfsDirectIngestController):
                continue

            builders = DirectIngestPreProcessedIngestViewCollector(
                region, controller_class.get_file_tag_rank_list()
            ).collect_view_builders()

            raw_file_manager = DirectIngestRegionRawFileConfig(
                region_code=region.region_code,
                region_module=self.region_module_override,
            )

            if builders or raw_file_manager.raw_file_configs:
                if region.raw_data_bq_imports_enabled_env is not None:
                    self.test.assertTrue(raw_file_manager.raw_file_configs)
                config_file_tags = set()
                for config in raw_file_manager.raw_file_configs.values():
                    self.test.assertTrue(
                        config.file_tag not in config_file_tags,
                        f"Multiple raw file configs defined with the same "
                        f"file_tag [{config.file_tag}]",
                    )
                    config_file_tags.add(config.file_tag)
Exemplo n.º 5
0
def infer_release():
    """Runs infer release for the given regions."""
    region_codes = validate_regions(
        get_str_param_values("region", request.args))
    regions = [get_region(region_code) for region_code in region_codes]

    for region in regions:
        with monitoring.push_tags(
            {monitoring.TagKey.REGION: region.region_code}):
            if region.agency_type != "jail":
                continue

            session = sessions.get_most_recent_completed_session(
                region.region_code)
            if session:
                logging.info(
                    "Got most recent completed session for [%s] with "
                    "start time [%s]",
                    region.region_code,
                    session.start,
                )
                persistence.infer_release_on_open_bookings(
                    region.region_code, session.start,
                    _get_custody_status(region))
                sessions.update_phase(session, scrape_phase.ScrapePhase.DONE)

    return "", HTTPStatus.OK
Exemplo n.º 6
0
def get_ingest_view_configs(
    region_code: str, ) -> List[DataDiscoveryStandardizedFileConfig]:
    """Collect ingest views for region; reads columns from their corresponding fixture csv"""
    if not StateCode.is_state_code(region_code):
        raise ValueError(
            f"Unknown region_code [{region_code}] received, must be a valid state code."
        )

    region_code = region_code.lower()

    views = DirectIngestPreProcessedIngestViewCollector(
        get_region(region_code, True), []).collect_view_builders()

    configs = []
    for view in views:
        try:
            # TODO(#6925) Infer columns from the mapping file rather than the fixture csv
            fixture_path = os.path.join(
                os.path.dirname(recidiviz.__file__),
                f"tests/ingest/direct/direct_ingest_fixtures/{region_code}/{view.ingest_view_name}.csv",
            )

            with open(fixture_path, "r") as f:
                columns = f.readline().split(",")
        except FileNotFoundError:
            continue

        standardized_config = DataDiscoveryStandardizedFileConfig(
            file_tag=view.ingest_view_name,
            columns=columns,
        )

        configs.append(standardized_config)

    return configs
Exemplo n.º 7
0
    def _stop_scraper(region: str):
        logging.info("Trying to stop scraper for region [%s].", region)
        for scrape_type in scrape_types:
            key = ScrapeKey(region_code=region, scrape_type=scrape_type)
            session = sessions.get_current_session(key)
            if not session:
                logging.info(
                    "No [%s] scrape to stop for region: [%s]", scrape_type,
                    region)
                continue

            region_scraper = regions.get_region(region).get_ingestor()
            was_stopped = region_scraper.stop_scrape(scrape_type,
                                                     respect_is_stoppable)
            if was_stopped:
                closed_sessions = sessions.close_session(key)
                for closed_session in closed_sessions:
                    sessions.update_phase(closed_session,
                                          scrape_phase.ScrapePhase.PERSIST)
                if next_phase:
                    logging.info("Enqueueing %s for region [%s].",
                                 next_phase, region)
                    ScraperCloudTaskManager().create_scraper_phase_task(
                        region_code=region,
                        url=next_phase_url)
Exemplo n.º 8
0
    def test_region_controller_builds(self):
        for dir_path in self._get_existing_region_dir_paths():
            region_code = os.path.basename(dir_path)

            region = get_region(region_code, is_direct_ingest=True)
            with local_project_id_override('project'):
                self.assertIsNotNone(region.get_ingestor())
Exemplo n.º 9
0
def run_scraper(args: argparse.Namespace) -> None:
    use_in_memory_sqlite_database(JailsBase)

    region_codes = validate_regions(args.region.split(","))
    if not region_codes:
        sys.exit(1)
    failed_regions = []
    valid_region_codes = cast(Set[str], region_codes)
    for region_code in valid_region_codes:
        logging.info("***")
        logging.info("***")
        logging.info("Starting scraper for region: [%s]", region_code)
        logging.info("***")
        logging.info("***")
        try:
            run_scraper_for_region(regions.get_region(region_code), args)
        except Exception:
            print(traceback.format_exc())
            failed_regions.append(region_code)

    if failed_regions:
        logging.info("***")
        logging.info(
            "The following regions raised errors during scraping: " "[%s]",
            failed_regions,
        )
    def test_collect_and_build_ingest_view_builders(
            self, _name: str, project_id: str,
            environment: GCPEnvironment) -> None:
        with patch("recidiviz.utils.environment.get_gcp_environment",
                   return_value=environment):
            with patch("recidiviz.utils.metadata.project_id",
                       return_value=project_id):
                for region_code in self.region_dir_names:
                    region = get_region(
                        region_code,
                        is_direct_ingest=True,
                        region_module_override=self.region_module_override,
                    )

                    with patch(
                            "recidiviz.utils.metadata.project_id",
                            return_value="recidiviz-456",
                    ):
                        controller = DirectIngestControllerFactory.build(
                            ingest_bucket_path=self.
                            primary_ingest_bucket_for_region(region),
                            allow_unlaunched=True,
                        )

                    builders = DirectIngestPreProcessedIngestViewCollector(
                        region, controller.get_file_tag_rank_list()
                    ).collect_view_builders()
                    for builder in builders:
                        builder.build()
Exemplo n.º 11
0
def load_target_list(scrape_key: ScrapeKey,
                     given_names: str = "",
                     surname: str = ""):
    """Starts docket loading based on scrape type and region.

    Determines correct scrape type and kicks off target list generation,
    loading of docket items based on the target list.

    Args:
        scrape_key: (ScrapeKey) The scraper to load docket for
        given_names: Given names of where to begin
        surname: Surname of where to begin

    Returns:
        N/A
    """
    logging.info("Getting target list for scraper: [%s]", scrape_key)

    if scrape_key.scrape_type is constants.ScrapeType.BACKGROUND:
        region = regions.get_region(scrape_key.region_code)
        if region.names_file is not None:
            name_list_file = region.names_file
            filename = FILENAME_PREFIX + name_list_file

            query_name = \
                (surname, given_names) if surname or given_names else None

            load_background_target_list(scrape_key, filename, query_name)
        else:
            load_empty_message(scrape_key)
Exemplo n.º 12
0
    def generate_raw_file_docs_for_region(self, region_code: str) -> Dict[str, str]:
        """Generates documentation for all raw file configs for the given region and
        returns all of it as a combined string.

        Returns one Markdown-formatted string per raw file, mapped to its filename, as
        well as a header file with a table of contents.
        """
        region_config = DirectIngestRegionRawFileConfig(region_code=region_code)

        sorted_file_tags = sorted(region_config.raw_file_tags)

        if StateCode.is_state_code(region_code):
            state_code = StateCode(region_code.upper())
            state_name = state_code.get_state().name

            file_header = STATE_RAW_DATA_FILE_HEADER_TEMPLATE.format(
                state_name=state_name, state_code_lower=state_code.value.lower()
            )
        else:
            file_header = ""

        raw_file_configs = [
            region_config.raw_file_configs[file_tag] for file_tag in sorted_file_tags
        ]

        config_paths_by_file_tag = {
            file_tag: file_config.file_path
            for file_tag, file_config in region_config.raw_file_configs.items()
        }

        file_tags_with_raw_file_configs = [
            raw_file_config.file_tag for raw_file_config in raw_file_configs
        ]

        region = regions.get_region(region_code=region_code, is_direct_ingest=True)

        view_collector = DirectIngestPreProcessedIngestViewCollector(region, [])
        views_by_raw_file = self.get_referencing_views(view_collector)
        touched_configs = self._get_touched_raw_data_configs(
            region_config.yaml_config_file_dir
        )

        raw_file_table = self._generate_raw_file_table(
            config_paths_by_file_tag,
            file_tags_with_raw_file_configs,
            views_by_raw_file,
            touched_configs,
        )

        docs_per_file: Dict[str, str] = {
            f"{config.file_tag}.md": self._generate_docs_for_raw_config(config)
            for config in raw_file_configs
        }

        docs_per_file[STATE_RAW_DATA_FILE_HEADER_PATH] = (
            file_header + "\n" + raw_file_table
        )

        return docs_per_file
Exemplo n.º 13
0
def _region_for_region_code(region_code: str) -> Region:
    try:
        return regions.get_region(region_code.lower(), is_direct_ingest=True)
    except FileNotFoundError as e:
        raise DirectIngestError(
            msg=f"Region [{region_code}] has no registered manifest",
            error_type=DirectIngestErrorType.INPUT_ERROR,
        ) from e
    def testParse(self):
        region = regions.get_region('us_ma_middlesex', is_direct_ingest=True)
        controller = region.get_ingestor()

        metadata = IngestMetadata(region.region_code, region.jurisdiction_id,
                                  _FAKE_START_TIME,
                                  controller.get_enum_overrides())

        ingest_info = UsMaMiddlesexParser().parse(_ROSTER_JSON)

        expected_info = IngestInfo()
        p1 = expected_info.create_person(
            person_id='12345       ',
            birthdate='1111-01-01 00:00:00.000',
            gender='M',
            ethnicity='HISPANIC',
            place_of_residence='123 ST DORCHESTER MA 01234     ')

        b1 = p1.create_booking(booking_id='1.0',
                               admission_date='2017-01-01 00:00:00.000',
                               admission_reason='BAIL MITTIMUS',
                               facility='MAIN      ')
        b1.create_charge(charge_id='1245.0',
                         statute='90/24/K',
                         name='OUI-LIQUOR, 2ND OFFENSE c90 ss24',
                         case_number='111.0',
                         court_type='Middlesex SC (81)',
                         charge_notes='Other')
        b1.create_charge(charge_id='1502.0',
                         offense_date='2017-01-28 00:00:00',
                         statute='90/23/J',
                         name='OUI while license suspended for OUI',
                         case_number='222.0',
                         court_type='Middlesex SC (81)',
                         charge_notes='Drug or Alcohol',
                         status='DISMISSED').create_bond(bond_id='12345.0')
        b1.create_hold(hold_id='00000.0',
                       jurisdiction_name='Middlesex SC (81)')

        p2 = expected_info.create_person(
            person_id='10472       ',
            birthdate='1111-02-02 00:00:00.000',
            gender='M',
            race='BLACK or AFRICAN AMERICAN',
            place_of_residence='456 ST MALDEN MA 98765      ')
        b2 = p2.create_booking(booking_id='333.0',
                               admission_date='2018-02-02 00:00:00.000',
                               admission_reason='SENTENCE MITTIMUS',
                               facility='MAIN      ')
        b2.create_arrest(agency='Cambridge PD')
        b2.create_charge(charge_id='12341234.0',
                         statute='269/10/J',
                         name='FIREARM, CARRY WITHOUT LICENSE c269 ss10',
                         case_number='555.0',
                         charge_notes='Other',
                         court_type='Cambridge DC (52)')

        self.validate_ingest(ingest_info, expected_info, metadata)
Exemplo n.º 15
0
    def _start_scraper(region, scrape_type):
        scrape_key = ScrapeKey(region, scrape_type)

        most_recent_session = next(
            sessions.get_sessions(
                region_code=scrape_key.region_code,
                include_closed=True,
                most_recent_only=True,
                scrape_type=scrape_key.scrape_type,
            ),
            None,
        )
        if most_recent_session and not most_recent_session.phase.has_persisted(
        ):
            raise Exception("Session already running for region [%s]. Could "
                            "not start a new session" % region)

        logging.info(
            "Purging pubsub queue for scrape_key: [%s] and pubsub_type: [%s]",
            scrape_key,
            BATCH_PUBSUB_TYPE,
        )
        pubsub_helper.purge(scrape_key, BATCH_PUBSUB_TYPE)

        logging.info("Starting new scraper for: [%s]", scrape_key)
        scraper = regions.get_region(region).get_scraper()

        current_session = sessions.create_session(scrape_key)

        # Help avoid race condition with new session info
        # vs updating that w/first task.
        time.sleep(1)

        # Clear prior query docket for this scrape type and start adding new
        # items in a background thread. In the case that there is a large
        # names list, loading it can take some time. Loading it in the
        # background allows us to start the scraper before it is fully
        # loaded.
        tracker.purge_docket_and_session(scrape_key)
        # Note, the request context isn't copied when launching this thread, so
        # any logs from within `load_target_list` will not be associated with
        # the start scraper request.
        load_docket_thread = threading.Thread(
            target=structured_logging.with_context(docket.load_target_list),
            args=(scrape_key, given_names, surname),
        )
        load_docket_thread.start()

        # Start scraper, if the docket is empty this will wait for a bounded
        # period of time for an item to be published (~90 seconds).
        logging.info("Starting [%s]/[%s] scrape...", region, scrape_type)
        scraper.start_scrape(scrape_type)

        sessions.update_phase(current_session, scrape_phase.ScrapePhase.SCRAPE)

        # Wait for the docket to be loaded
        load_docket_thread.join()
Exemplo n.º 16
0
    def test_region_controller_exists_and_builds(self):
        for dir_path in self._get_existing_region_dir_paths():
            region_code = os.path.basename(dir_path)
            controller_path = os.path.join(dir_path, f'{region_code}_controller.py')
            self.assertTrue(os.path.exists(controller_path), f'Path [{controller_path}] does not exist.')

            region = get_region(region_code, is_direct_ingest=True)
            with local_project_id_override('project'):
                self.assertIsNotNone(region.get_ingestor_class())
Exemplo n.º 17
0
    def __init__(self, region_name, system_level: SystemLevel):
        """Initialize the controller.

        Args:
            region_name: (str) the name of the region to be collected.
        """

        self.region = regions.get_region(region_name, is_direct_ingest=True)
        self.system_level = system_level
        self.cloud_task_manager = DirectIngestCloudTaskManagerImpl()
Exemplo n.º 18
0
    def test_collect_ingest_views(self):
        with local_project_id_override('project'):
            for region_code in self._get_existing_region_dir_names():
                region = get_region(region_code, is_direct_ingest=True)

                controller_class = region.get_ingestor_class()
                if not issubclass(controller_class, GcsfsDirectIngestController):
                    continue

                _ = DirectIngestPreProcessedIngestViewCollector(
                    region, controller_class.get_file_tag_rank_list()).collect_views()
Exemplo n.º 19
0
    def test_raw_files_yaml_parses_all_regions(self):
        for region_code in self._get_existing_region_dir_names():
            region = get_region(region_code, is_direct_ingest=True)

            raw_file_manager = DirectIngestRegionRawFileConfig(region_code=region.region_code)

            if region.raw_data_bq_imports_enabled_env is not None:
                self.assertTrue(raw_file_manager.raw_file_configs)

            for config in raw_file_manager.raw_file_configs.values():
                self.assertTrue(config.primary_key_cols)
Exemplo n.º 20
0
    def test_region_controller_builds(self) -> None:
        for dir_path in self.region_dir_paths:
            region_code = os.path.basename(dir_path)

            region = get_region(
                region_code,
                is_direct_ingest=True,
                region_module_override=self.region_module_override,
            )
            with local_project_id_override("project"):
                self.test.assertIsNotNone(region.get_ingestor())
Exemplo n.º 21
0
 def _region_for_bucket(ingest_bucket_path: GcsfsBucketPath) -> Region:
     region_code = get_region_code_from_direct_ingest_bucket(
         ingest_bucket_path.bucket_name
     )
     if not region_code:
         raise ValueError(
             f"Found no region code for bucket [{ingest_bucket_path.uri()}]"
         )
     return regions.get_region(
         region_code=region_code.lower(),
         is_direct_ingest=True,
     )
 def __init__(
     self,
     region_code: str,
     allowed_root_entity_classes_override: Optional[List[
         Type[DatabaseEntity]]] = None,
 ) -> None:
     self.region_code = region_code.upper()
     self.region = get_region(region_code=self.region_code,
                              is_direct_ingest=True)
     self.allowed_root_entity_classes: List[Type[DatabaseEntity]] = (
         [schema.StatePerson] if not allowed_root_entity_classes_override
         else allowed_root_entity_classes_override)
Exemplo n.º 23
0
def _regions_matching_environment(region_codes: Set[str]) -> Set[str]:
    """Filter to regions with the matching environment.

    If we are running locally, include all supported regions.
    """
    if not environment.in_gae():
        return region_codes
    gae_env = environment.get_gae_environment()
    return {
        region_code
        for region_code in region_codes
        if regions.get_region(region_code).environment == gae_env
    }
Exemplo n.º 24
0
    def generate_raw_file_docs_for_region(self, region_code: str) -> str:
        """Generates documentation for all raw file configs for the given region and returns all of it
        as a combined string."""
        region_config = DirectIngestRegionRawFileConfig(
            region_code=region_code)

        sorted_file_tags = sorted(region_config.raw_file_tags)

        if StateCode.is_state_code(region_code):
            state_code = StateCode(region_code.upper())
            state_name = state_code.get_state()

            file_header = STATE_RAW_DATA_FILE_HEADER_TEMPLATE.format(
                state_name=state_name,
                state_code_lower=state_code.value.lower())
        else:
            file_header = ""

        raw_file_configs = [
            region_config.raw_file_configs[file_tag]
            for file_tag in sorted_file_tags
        ]

        config_paths_by_file_tag = {
            file_tag: file_config.file_path
            for file_tag, file_config in
            region_config.raw_file_configs.items()
        }

        file_tags_with_raw_file_configs = [
            raw_file_config.file_tag for raw_file_config in raw_file_configs
        ]

        region = regions.get_region(region_code=region_code,
                                    is_direct_ingest=True)

        view_collector = DirectIngestPreProcessedIngestViewCollector(
            region, [])
        views_by_raw_file = self.get_referencing_views(view_collector)

        raw_file_table = self._generate_raw_file_table(
            config_paths_by_file_tag, file_tags_with_raw_file_configs,
            views_by_raw_file)

        docs_per_file = [
            self._generate_docs_for_raw_config(config)
            for config in raw_file_configs
        ]

        return file_header + "\n" + raw_file_table + "\n" + "\n\n".join(
            docs_per_file)
Exemplo n.º 25
0
def persist_to_database(
    region_code: str, session_start_time: datetime.datetime
) -> bool:
    """Reads all of the ingest infos from Datastore for a region and persists
    them to the database.
    """
    region = regions.get_region(region_code)
    overrides = region.get_scraper_enum_overrides()

    ingest_info_data_list = _get_batch_ingest_info_list(region_code, session_start_time)

    logging.info("Received %s total ingest infos", len(ingest_info_data_list))
    if ingest_info_data_list:
        proto, failed_tasks = _get_proto_from_batch_ingest_info_data_list(
            ingest_info_data_list
        )

        if not proto.people:
            logging.error("Scrape session returned 0 people.")
            return False

        for batch_ingest_info_datum in failed_tasks.values():
            logging.error(
                "Task with trace_id %s failed with error %s",
                batch_ingest_info_datum.trace_id,
                batch_ingest_info_datum.error,
            )
        if _should_abort(len(failed_tasks), len(proto.people)):
            logging.error(
                "Too many scraper tasks failed(%s), aborting write", len(failed_tasks)
            )
            return False

        metadata = IngestMetadata(
            region=region_code,
            jurisdiction_id=region.jurisdiction_id,
            ingest_time=session_start_time,
            facility_id=region.facility_id,
            enum_overrides=overrides,
            system_level=SystemLevel.COUNTY,
            database_key=SQLAlchemyDatabaseKey.for_schema(SchemaType.JAILS),
        )

        did_write = persistence.write(proto, metadata)
        if did_write:
            datastore_ingest_info.batch_delete_ingest_infos_for_region(region_code)

        return did_write

    logging.error("No ingest infos received from Datastore")
    return False
Exemplo n.º 26
0
    def __init__(self, region_name):
        """Initialize the parent scraper object.

        Args:
            region_name: (string) name of the region of the child scraper.

        """

        # Passing verify=False in the requests produces a warning,
        # disable it here.
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

        self.region = regions.get_region(region_name)
        self.scraper_work_url = '/scraper/work/{}'.format(region_name)
    def setUp(self) -> None:
        super().setUp()
        view_builders = DirectIngestPreProcessedIngestViewCollector(
            get_region(STATE_CODE, is_direct_ingest=True),
            []).collect_view_builders()
        self.view_builder = one(view for view in view_builders
                                if view.file_tag == "person_external_ids")

        self.expected_result_columns = [
            "recidiviz_master_person_id",
            "control_numbers",
            "inmate_numbers",
            "parole_numbers",
        ]
Exemplo n.º 28
0
def get_validation_region_configs() -> Dict[str, ValidationRegionConfig]:
    """Reads all region configs for regions with configs defined in the recidiviz.validation.config.regions module. This
    is the set of regions we will run validations for, subject to the constraints defined in their validation config
    files.
    """

    validation_region_configs = {}
    for region_code, region_module_path in _get_validation_region_module_paths():
        region = regions.get_region(region_code.lower(), is_direct_ingest=True)
        if region.is_ingest_launched_in_env():
            config_path = os.path.join(region_module_path, f'{region_code.lower()}_validation_config.yaml')
            validation_region_configs[region_code.upper()] = ValidationRegionConfig.from_yaml(config_path)

    return validation_region_configs
Exemplo n.º 29
0
def update_phase(session: ScrapeSession, phase: scrape_phase.ScrapePhase):
    """Updates the phase of the session to the given phase."""
    #  TODO(#1665): remove once dangling PERSIST session investigation
    #   is complete.
    logging.info("Updating phase from %s to %s", session.phase, phase)

    previous_phase = session.phase

    session.phase = phase
    retry_grpc(NUM_GRPC_RETRIES, ds().put, session.to_entity())

    if previous_phase == scrape_phase.ScrapePhase.RELEASE and \
       phase == scrape_phase.ScrapePhase.DONE:
        jid = regions.get_region(session.region).jurisdiction_id
        store_scraper_success(ScraperSuccess(), jid)
Exemplo n.º 30
0
    def test_region_controller_exists_and_builds(self) -> None:
        for dir_path in self.region_dir_paths:
            region_code = os.path.basename(dir_path)
            controller_path = os.path.join(dir_path, f"{region_code}_controller.py")
            self.test.assertTrue(
                os.path.exists(controller_path),
                f"Path [{controller_path}] does not exist.",
            )

            region = get_region(
                region_code,
                is_direct_ingest=True,
                region_module_override=self.region_module_override,
            )
            with local_project_id_override("project"):
                self.test.assertIsNotNone(region.get_ingestor_class())