示例#1
0
def _instantiate_extractor(yaml_filename: str,
                           primary_key_override: Callable = None) \
        -> CsvDataExtractor:
    yaml_path = os.path.join(os.path.dirname(__file__),
                             '../testdata/data_extractor/yaml', yaml_filename)
    return CsvDataExtractor(yaml_path,
                            primary_key_override_callback=primary_key_override)
def _instantiate_extractor(
    yaml_filename: str,
    primary_key_override: Optional[PrimaryKeyOverrideCallable] = None,
) -> CsvDataExtractor:
    yaml_path = os.path.join(os.path.dirname(__file__),
                             "../testdata/data_extractor/yaml", yaml_filename)
    return CsvDataExtractor(yaml_path,
                            hook_context=None,
                            primary_key_override_callback=primary_key_override)
示例#3
0
    def _parse(self, args: GcsfsIngestArgs,
               contents_handle: GcsfsFileContentsHandle) -> IngestInfo:
        file_tag = self.file_tag(args.file_path)
        gating_context = IngestGatingContext(
            file_tag=file_tag, ingest_instance=self.ingest_instance)

        if file_tag not in self.get_file_tag_rank_list():
            raise DirectIngestError(
                msg=f"No mapping found for tag [{file_tag}]",
                error_type=DirectIngestErrorType.INPUT_ERROR,
            )

        file_mapping = self._yaml_filepath(file_tag)

        row_pre_processors = self._get_row_pre_processors_for_file(file_tag)
        row_post_processors = self._get_row_post_processors_for_file(file_tag)
        file_post_processors = self._get_file_post_processors_for_file(
            file_tag)
        # pylint: disable=assignment-from-none
        primary_key_override_callback = self._get_primary_key_override_for_file(
            file_tag)
        # pylint: disable=assignment-from-none
        ancestor_chain_overrides_callback = (
            self._get_ancestor_chain_overrides_callback_for_file(file_tag))
        should_set_with_empty_values = (
            gating_context.file_tag
            in self._get_files_to_set_with_empty_values())

        data_extractor = CsvDataExtractor(
            file_mapping,
            gating_context,
            row_pre_processors,
            row_post_processors,
            file_post_processors,
            ancestor_chain_overrides_callback,
            primary_key_override_callback,
            self.system_level,
            should_set_with_empty_values,
        )

        return data_extractor.extract_and_populate_data(
            contents_handle.get_contents_iterator())