Пример #1
0
def _get_local_image_for_import(hash_or_tag: str,
                                repository: Repository) -> Tuple[Image, bool]:
    """
    Converts a remote repository and tag into an Image object that exists on the engine,
    optionally pulling the repository or cloning it into a temporary location.

    :param hash_or_tag: Hash/tag
    :param repository: Name of the repository (doesn't need to be local)
    :return: Image object and a boolean flag showing whether the repository should be deleted
    when the image is no longer needed.
    """
    tmp_repo = Repository(repository.namespace,
                          repository.repository + "_tmp_clone")
    repo_is_temporary = False

    logging.info("Resolving repository %s", repository)
    source_repo = lookup_repository(repository.to_schema(), include_local=True)
    if source_repo.engine.name != "LOCAL":
        clone(source_repo, local_repository=tmp_repo, download_all=False)
        source_image = tmp_repo.images[hash_or_tag]
        repo_is_temporary = True
    else:
        # For local repositories, first try to pull them to see if they are clones of a remote.
        if source_repo.upstream:
            source_repo.pull()
        source_image = source_repo.images[hash_or_tag]

    return source_image, repo_is_temporary
Пример #2
0
def test_mount_elasticsearch(local_engine_empty):
    # No ES running in this stack: this is just a test that we can instantiate the FDW.
    repo = Repository("test", "es_mount")
    try:
        mount(
            repo.to_schema(),
            "elasticsearch",
            dict(
                username=None,
                password=None,
                server="elasticsearch",
                port=9200,
                table_spec={
                    "table_1": {
                        "schema": {
                            "id": "text",
                            "@timestamp": "timestamp",
                            "query": "text",
                            "col_1": "text",
                            "col_2": "boolean",
                        },
                        "index": "index-pattern*",
                        "rowid_column": "id",
                        "query_column": "query",
                    }
                },
            ),
        )

        assert get_engine().get_full_table_schema(repo.to_schema(), "table_1") == [
            TableColumn(ordinal=1, name="id", pg_type="text", is_pk=False, comment=None),
            TableColumn(
                ordinal=2,
                name="@timestamp",
                pg_type="timestamp without time zone",
                is_pk=False,
                comment=None,
            ),
            TableColumn(ordinal=3, name="query", pg_type="text", is_pk=False, comment=None),
            TableColumn(ordinal=4, name="col_1", pg_type="text", is_pk=False, comment=None),
            TableColumn(ordinal=5, name="col_2", pg_type="boolean", is_pk=False, comment=None),
        ]

    finally:
        repo.delete()
Пример #3
0
def _execute_custom(node: Node, output: Repository) -> ProvenanceLine:
    assert output.head is not None
    command, args = parse_custom_command(node)

    # Locate the command in the config file and instantiate it.
    cmd_fq_class: str = cast(
        str,
        get_all_in_section(CONFIG, "commands").get(command))
    if not cmd_fq_class:
        raise SplitfileError(
            "Custom command {0} not found in the config! Make sure you add an entry to your"
            " config like so:\n  [commands]  \n{0}=path.to.command.Class".
            format(command))

    assert isinstance(cmd_fq_class, str)
    index = cmd_fq_class.rindex(".")
    try:
        cmd_class = getattr(import_module(cmd_fq_class[:index]),
                            cmd_fq_class[index + 1:])
    except AttributeError as e:
        raise SplitfileError(
            "Error loading custom command {0}".format(command)) from e
    except ImportError as e:
        raise SplitfileError(
            "Error loading custom command {0}".format(command)) from e

    get_engine().run_sql("SET search_path TO %s", (output.to_schema(), ))
    command = cmd_class()

    # Pre-flight check: get the new command hash and see if we can short-circuit and just check the image out.
    command_hash = command.calc_hash(repository=output, args=args)
    output_head = output.head.image_hash

    if command_hash is not None:
        image_hash = _combine_hashes([output_head, command_hash])
        try:
            output.images.by_hash(image_hash).checkout()
            logging.info(" ---> Using cache")
            return {"type": "CUSTOM"}
        except ImageNotFoundError:
            pass

    logging.info(" Executing custom command...")
    exec_hash = command.execute(repository=output, args=args)
    command_hash = command_hash or exec_hash or "{:064x}".format(
        getrandbits(256))

    image_hash = _combine_hashes([output_head, command_hash])
    logging.info(" ---> %s" % image_hash[:12])

    # Check just in case if the new hash produced by the command already exists.
    try:
        output.images.by_hash(image_hash).checkout()
    except ImageNotFoundError:
        # Full command as a commit comment
        output.commit(image_hash, comment=node.text)
    return {"type": "CUSTOM"}
Пример #4
0
def run_patched_sync(
    repository: Repository,
    base_image: Optional[Image],
    new_image_hash: str,
    delete_old: bool,
    failure: str,
    input_stream: Optional[BinaryIO] = None,
    output_stream: Optional[TextIO] = None,
):
    input_stream = input_stream or sys.stdin.buffer

    # Build a staging schema
    staging_schema = "sg_tmp_" + repository.to_schema()
    repository.object_engine.delete_schema(staging_schema)
    repository.object_engine.create_schema(staging_schema)
    repository.commit_engines()

    config = _prepare_config_params(repository)
    old_sync = target_postgres.DbSync

    stdout = sys.stdout
    target_postgres.DbSync = db_sync_wrapper(repository.images[new_image_hash],
                                             staging_schema)
    if output_stream:
        sys.stdout = output_stream
    try:
        singer_messages = io.TextIOWrapper(input_stream, encoding="utf-8")
        target_postgres.persist_lines(config, singer_messages)
        if delete_old and base_image:
            repository.images.delete([base_image.image_hash])
    except Exception:
        repository.rollback_engines()
        if failure == "delete-new":
            repository.images.delete([new_image_hash])
        elif failure == "delete-old" and base_image:
            repository.images.delete([base_image.image_hash])
        repository.commit_engines()
        logging.error(traceback.format_exc())
        raise
    finally:
        sys.stdout = stdout
        target_postgres.DbSync = old_sync
        repository.object_engine.delete_schema(staging_schema)
        repository.commit_engines()
Пример #5
0
 def image_mapper(repository: Repository, image_hash: str):
     new_image = (repository.to_schema() + ":" +
                  source_replacement.get(repository, image_hash))
     return new_image, new_image