def _instantiate_extractor(yaml_filename: str, primary_key_override: Callable = None) \ -> CsvDataExtractor: yaml_path = os.path.join(os.path.dirname(__file__), '../testdata/data_extractor/yaml', yaml_filename) return CsvDataExtractor(yaml_path, primary_key_override_callback=primary_key_override)
def _instantiate_extractor( yaml_filename: str, primary_key_override: Optional[PrimaryKeyOverrideCallable] = None, ) -> CsvDataExtractor: yaml_path = os.path.join(os.path.dirname(__file__), "../testdata/data_extractor/yaml", yaml_filename) return CsvDataExtractor(yaml_path, hook_context=None, primary_key_override_callback=primary_key_override)
def _parse(self, args: GcsfsIngestArgs, contents_handle: GcsfsFileContentsHandle) -> IngestInfo: file_tag = self.file_tag(args.file_path) gating_context = IngestGatingContext( file_tag=file_tag, ingest_instance=self.ingest_instance) if file_tag not in self.get_file_tag_rank_list(): raise DirectIngestError( msg=f"No mapping found for tag [{file_tag}]", error_type=DirectIngestErrorType.INPUT_ERROR, ) file_mapping = self._yaml_filepath(file_tag) row_pre_processors = self._get_row_pre_processors_for_file(file_tag) row_post_processors = self._get_row_post_processors_for_file(file_tag) file_post_processors = self._get_file_post_processors_for_file( file_tag) # pylint: disable=assignment-from-none primary_key_override_callback = self._get_primary_key_override_for_file( file_tag) # pylint: disable=assignment-from-none ancestor_chain_overrides_callback = ( self._get_ancestor_chain_overrides_callback_for_file(file_tag)) should_set_with_empty_values = ( gating_context.file_tag in self._get_files_to_set_with_empty_values()) data_extractor = CsvDataExtractor( file_mapping, gating_context, row_pre_processors, row_post_processors, file_post_processors, ancestor_chain_overrides_callback, primary_key_override_callback, self.system_level, should_set_with_empty_values, ) return data_extractor.extract_and_populate_data( contents_handle.get_contents_iterator())