Пример #1
0
    def run_discovery(self, plugin_invoker, exec_args=[]):
        if not "discover" in plugin_invoker.capabilities:
            raise PluginLacksCapabilityError(
                f"Extractor '{self.name}' does not support schema discovery"
            )

        properties_file = plugin_invoker.files["catalog"]

        with properties_file.open("w") as catalog:
            result = plugin_invoker.invoke(
                "--discover",
                stdout=catalog,
                stderr=subprocess.PIPE,
                universal_newlines=True,
            )
            stdout, stderr = result.communicate()
            exit_code = result.returncode

        if exit_code != 0:
            properties_file.unlink()
            raise PluginExecutionError(
                f"Schema discovery failed: command {plugin_invoker.exec_args('--discover')} returned {exit_code}: {stderr.rstrip()}"
            )

        # test for the schema to be a valid catalog
        try:
            with properties_file.open("r") as catalog:
                schema_valid = Draft4Validator.check_schema(json.load(catalog))
        except Exception as err:
            properties_file.unlink()
            raise PluginExecutionError(
                "Schema discovery failed: invalid catalog output by --discovery."
            ) from err
Пример #2
0
    def apply_select(self, plugin_invoker, exec_args=[]):
        if (not "catalog" in plugin_invoker.capabilities
                and not "properties" in plugin_invoker.capabilities):
            raise PluginLacksCapabilityError(
                f"Extractor '{self.name}' does not support selection")

        properties_file = plugin_invoker.files["catalog"]

        try:
            with properties_file.open() as catalog:
                schema = json.load(catalog)

            reset_executor = SelectExecutor(["!*.*"])
            select_executor = SelectExecutor(plugin_invoker.select)

            reset_executor.visit(schema)
            select_executor.visit(schema)

            with properties_file.open("w") as catalog:
                json.dump(schema, catalog)
        except FileNotFoundError as err:
            raise PluginExecutionError(
                f"Selection failed: catalog file is missing.") from err
        except Exception as err:
            properties_file.unlink()
            raise PluginExecutionError(
                f"Selection failed: catalog file is invalid: {properties_file}"
            ) from err
Пример #3
0
    def apply_metadata_rules(self, plugin_invoker, exec_args=[]):
        if (
            not "catalog" in plugin_invoker.capabilities
            and not "properties" in plugin_invoker.capabilities
        ):
            raise PluginLacksCapabilityError(
                f"Extractor '{self.name}' does not support entity selection or metadata rules"
            )

        properties_file = plugin_invoker.files["catalog"]

        try:
            with properties_file.open() as catalog:
                schema = json.load(catalog)

            metadata_rules = [
                *select_metadata_rules(["!*.*"]),
                *select_metadata_rules(plugin_invoker.select),
                *config_metadata_rules(plugin_invoker.plugin_config),
            ]

            metadata_executor = MetadataExecutor(metadata_rules)
            metadata_executor.visit(schema)

            with properties_file.open("w") as catalog:
                json.dump(schema, catalog)
        except FileNotFoundError as err:
            raise PluginExecutionError(
                f"Applying metadata rules failed: catalog file is missing."
            ) from err
        except Exception as err:
            properties_file.unlink()
            raise PluginExecutionError(
                f"Applying metadata rules failed: catalog file is invalid: {properties_file}"
            ) from err
Пример #4
0
    def apply_catalog_rules(self, plugin_invoker, exec_args=[]):
        if (not "catalog" in plugin_invoker.capabilities
                and not "properties" in plugin_invoker.capabilities):
            raise PluginLacksCapabilityError(
                f"Extractor '{self.name}' does not support entity selection or catalog metadata and schema rules"
            )

        config = plugin_invoker.plugin_config_extras

        schema_rules = []
        metadata_rules = []

        # If a custom catalog is provided, don't apply catalog rules
        if not config["_catalog"]:
            schema_rules.extend(config_schema_rules(config["_schema"]))

            metadata_rules.extend(select_metadata_rules(["!*.*"]))
            metadata_rules.extend(select_metadata_rules(config["_select"]))
            metadata_rules.extend(config_metadata_rules(config["_metadata"]))

        # Always apply select filters (`meltano elt` `--select` and `--exclude` options)
        metadata_rules.extend(
            select_filter_metadata_rules(config["_select_filter"]))

        if not schema_rules and not metadata_rules:
            return

        catalog_path = plugin_invoker.files["catalog"]
        catalog_cache_key_path = plugin_invoker.files["catalog_cache_key"]

        try:
            with catalog_path.open() as catalog_file:
                catalog = json.load(catalog_file)

            if schema_rules:
                SchemaExecutor(schema_rules).visit(catalog)

            if metadata_rules:
                MetadataExecutor(metadata_rules).visit(catalog)

            with catalog_path.open("w") as catalog_file:
                json.dump(catalog, catalog_file, indent=2)

            cache_key = self.catalog_cache_key(plugin_invoker)
            if cache_key:
                catalog_cache_key_path.write_text(cache_key)
            else:
                try:
                    catalog_cache_key_path.unlink()
                except FileNotFoundError:
                    pass
        except FileNotFoundError as err:
            raise PluginExecutionError(
                f"Applying catalog rules failed: catalog file is missing."
            ) from err
        except Exception as err:
            catalog_path.unlink()
            raise PluginExecutionError(
                f"Applying catalog rules failed: catalog file is invalid: {err}"
            ) from err
Пример #5
0
    def discover_catalog(self, plugin_invoker, exec_args=[]):
        catalog_path = plugin_invoker.files["catalog"]
        catalog_cache_key_path = plugin_invoker.files["catalog_cache_key"]

        if catalog_path.exists():
            try:
                cached_key = catalog_cache_key_path.read_text()
                new_cache_key = self.catalog_cache_key(plugin_invoker)

                if cached_key == new_cache_key:
                    logger.debug(f"Using cached catalog file")
                    return
            except FileNotFoundError:
                pass

            logging.debug("Cached catalog is outdated, running discovery...")

        # We're gonna generate a new catalog, so delete the cache key.
        try:
            catalog_cache_key_path.unlink()
        except FileNotFoundError:
            pass

        custom_catalog_filename = plugin_invoker.plugin_config_extras[
            "_catalog"]
        if custom_catalog_filename:
            custom_catalog_path = plugin_invoker.project.root.joinpath(
                custom_catalog_filename)

            try:
                shutil.copy(custom_catalog_path, catalog_path)
                logger.info(f"Found catalog in {custom_catalog_path}")
            except FileNotFoundError as err:
                raise PluginExecutionError(
                    f"Could not find catalog file {custom_catalog_path}"
                ) from err
        else:
            self.run_discovery(plugin_invoker, catalog_path)

        # test for the result to be a valid catalog
        try:
            with catalog_path.open("r") as catalog_file:
                catalog = json.load(catalog_file)
                schema_valid = Draft4Validator.check_schema(catalog)
        except Exception as err:
            catalog_path.unlink()
            raise PluginExecutionError(
                f"Catalog discovery failed: invalid catalog: {err}") from err
Пример #6
0
    def run_discovery(self, plugin_invoker, catalog_path):
        if not "discover" in plugin_invoker.capabilities:
            raise PluginLacksCapabilityError(
                f"Extractor '{self.name}' does not support catalog discovery (the `discover` capability is not advertised)"
            )

        try:
            with catalog_path.open("w") as catalog:
                result = plugin_invoker.invoke(
                    "--discover",
                    stdout=catalog,
                    stderr=subprocess.PIPE,
                    universal_newlines=True,
                )
                stdout, stderr = result.communicate()
                exit_code = result.returncode
        except Exception:
            catalog_path.unlink()
            raise

        if exit_code != 0:
            catalog_path.unlink()
            raise PluginExecutionError(
                f"Catalog discovery failed: command {plugin_invoker.exec_args('--discover')} returned {exit_code}: {stderr.rstrip()}"
            )
Пример #7
0
    def list_all(self, session) -> ListSelectedExecutor:
        try:
            catalog = self.load_catalog(session)
        except FileNotFoundError as err:
            raise PluginExecutionError(
                f"Could not find catalog. Verify that the tap supports discovery mode and advertises the `discover` capability as well as either `catalog` or `properties`"
            ) from err

        list_all = ListSelectedExecutor()
        list_all.visit(catalog)

        return list_all
Пример #8
0
    def look_up_state(self, plugin_invoker, exec_args=[]):
        if "state" not in plugin_invoker.capabilities:
            raise PluginLacksCapabilityError(
                f"Extractor '{self.name}' does not support incremental state")

        state_path = plugin_invoker.files["state"]

        try:
            # Delete state left over from different pipeline run for same extractor
            state_path.unlink()
        except FileNotFoundError:
            pass

        elt_context = plugin_invoker.context
        if not elt_context or not elt_context.job:
            # Running outside pipeline context: incremental state could not be loaded
            return

        if elt_context.full_refresh:
            logger.info(
                "Performing full refresh, ignoring state left behind by any previous runs."
            )
            return

        custom_state_filename = plugin_invoker.plugin_config_extras["_state"]
        if custom_state_filename:
            custom_state_path = plugin_invoker.project.root.joinpath(
                custom_state_filename)

            try:
                shutil.copy(custom_state_path, state_path)
                logger.info(f"Found state in {custom_state_filename}")
            except FileNotFoundError as err:
                raise PluginExecutionError(
                    f"Could not find state file {custom_state_path}") from err

            return

        # the `state.json` is stored in the database
        state = {}
        incomplete_since = None
        finder = JobFinder(elt_context.job.job_id)

        state_job = finder.latest_with_payload(elt_context.session,
                                               flags=Payload.STATE)
        if state_job:
            logger.info(f"Found state from {state_job.started_at}.")
            incomplete_since = state_job.ended_at
            if "singer_state" in state_job.payload:
                merge(state_job.payload["singer_state"], state)

        incomplete_state_jobs = finder.with_payload(
            elt_context.session,
            flags=Payload.INCOMPLETE_STATE,
            since=incomplete_since)
        for state_job in incomplete_state_jobs:
            logger.info(
                f"Found and merged incomplete state from {state_job.started_at}."
            )
            if "singer_state" in state_job.payload:
                merge(state_job.payload["singer_state"], state)

        if state:
            with state_path.open("w") as state_file:
                json.dump(state, state_file, indent=2)
        else:
            logger.warning("No state was found, complete import.")