Пример #1
0
def test_repo_lookup_override(remote_engine):
    test_repo = Repository("overridden", "repo", engine=remote_engine)
    try:
        test_repo.init()
        assert lookup_repository("overridden/repo") == test_repo
    finally:
        test_repo.delete(unregister=True, uncheckout=True)
Пример #2
0
def test_engine_autocommit(local_engine_empty):
    conn_params = _prepare_engine_config(CONFIG)
    engine = PostgresEngine(conn_params=conn_params, name="test_engine", autocommit=True)

    repo = Repository("test", "repo", engine=engine)
    repo.init()

    repo.engine.rollback()
    assert repository_exists(Repository.from_template(repo, engine=local_engine_empty))
Пример #3
0
def build_repo():
    repo = Repository(namespace="abc", repository="1234")
    repo.delete()
    repo.init()
    df_to_table(fake_data(8),
                repository=repo,
                table="unit_test",
                if_exists='replace')
    new_img = repo.commit()
    new_img.checkout()

    return repo
Пример #4
0
def _execute_from(
        node: Node,
        output: Repository) -> Tuple[Repository, Optional[ProvenanceLine]]:
    interesting_nodes = extract_nodes(node, ["repo_source", "repository"])
    repo_source = get_first_or_none(interesting_nodes, "repo_source")
    output_node = get_first_or_none(interesting_nodes, "repository")
    provenance: Optional[ProvenanceLine] = None

    if output_node:
        # AS (output) detected, change the current output repository to it.
        output = Repository.from_schema(output_node.match.group(0))
        logging.info("Changed output repository to %s" % str(output))

        # NB this destroys all data in the case where we ran some commands in the Splitfile and then
        # did FROM (...) without AS repository
        if repository_exists(output):
            logging.info("Clearing all output from %s" % str(output))
            output.delete()
    if not repository_exists(output):
        output.init()
    if repo_source:
        repository, tag_or_hash = parse_image_spec(repo_source)
        source_repo = lookup_repository(repository.to_schema(),
                                        include_local=True)

        if source_repo.engine.name == "LOCAL":
            # For local repositories, make sure to update them if they've an upstream
            if source_repo.upstream:
                source_repo.pull()

        # Get the target image hash from the source repo: otherwise, if the tag is, say, 'latest' and
        # the output has just had the base commit (000...) created in it, that commit will be the latest.
        clone(source_repo, local_repository=output, download_all=False)
        source_hash = source_repo.images[tag_or_hash].image_hash
        output.images.by_hash(source_hash).checkout()
        provenance = {
            "type": "FROM",
            "source_namespace": source_repo.namespace,
            "source": source_repo.repository,
            "source_hash": source_hash,
        }
    else:
        # FROM EMPTY AS repository -- initializes an empty repository (say to create a table or import
        # the results of a previous stage in a multistage build.
        # In this case, if AS repository has been specified, it's already been initialized. If not, this command
        # literally does nothing
        if not output_node:
            raise SplitfileError(
                "FROM EMPTY without AS (repository) does nothing!")
    return output, provenance
Пример #5
0
    def init_repo(self, repo_info: RepoInfo) -> Repository:
        repo = Repository(namespace=repo_info.namespace,
                          repository=repo_info.repository)

        if not repository_exists(repo):
            self.logger.info("Creating repo {}/{}...".format(
                repo.namespace, repo.repository))
            repo.init()

        if repo_info.remote_name:
            remote = Repository.from_template(repo,
                                              engine=get_engine(
                                                  repo_info.remote_name))
            cloned_repo = clone(
                remote,
                local_repository=repo,
                download_all=False,
                overwrite_objects=True,
                overwrite_tags=True,
            )

        return repo
    def write(self, value_: Any, **kwargs: Any) -> Result:
        """
        Writes the result to a repository on Splitgraph


        Args:
            - value_ (Any): the value to write; will then be stored as the `value` attribute
                of the returned `Result` instance
            - **kwargs (optional): if provided, will be used to format the `table`, `comment`, and `tag`

        Returns:
            - Result: returns a new `Result` with both `value`, `comment`, `table`, and `tag` attributes
        """

        if self.schema is not None:
            errors = self.schema.validate(value_)
            if errors:
                raise SchemaValidationError(errors)

        new = self.format(**kwargs)
        new.value = value_

        repo_info = parse_repo(new.location)

        repo = Repository(namespace=repo_info.namespace,
                          repository=repo_info.repository)
        remote = Repository.from_template(repo,
                                          engine=get_engine(
                                              repo_info.remote_name,
                                              autocommit=True))

        assert isinstance(value_, pd.DataFrame)

        if not repository_exists(repo) and self.auto_init_repo:
            self.logger.info("Creating repo {}/{}...".format(
                repo.namespace, repo.repository))
            repo.init()

        # TODO: Retrieve the repo from bedrock first

        self.logger.info("Starting to upload result to {}...".format(
            new.location))

        with self.atomic(repo.engine):
            self.logger.info("checkout")
            img = repo.head

            img.checkout(force=True)

            self.logger.info("df to table")
            df_to_table(new.value,
                        repository=repo,
                        table=repo_info.table,
                        if_exists='replace')

            self.logger.info("commit")
            new_img = repo.commit(comment=new.comment, chunk_size=10000)
            new_img.tag(repo_info.tag)

        # if (repo.diff(new.table, img, new_img)):
        if self.auto_push:
            self.logger.info("push")
            repo.push(
                remote,
                handler="S3",
                overwrite_objects=True,
                overwrite_tags=True,
                reupload_objects=True,
            )

        self.logger.info("Finished uploading result to {}...".format(
            new.location))

        return new
Пример #7
0
    def write(self, value_: Any, **kwargs: Any) -> Result:
        """
        Writes the result to a repository on Splitgraph


        Args:
            - value_ (Any): the value to write; will then be stored as the `value` attribute
                of the returned `Result` instance
            - **kwargs (optional): if provided, will be used to format the `table`, `comment`, and `tag`

        Returns:
            - Result: returns a new `Result` with both `value`, `comment`, `table`, and `tag` attributes
        """

        cfg = patch_config(create_config_dict(), self.env or dict())
        engine = PostgresEngine(name='SplitgraphResult', conn_params=cfg)
        engine.initialize()
        repo = Repository(namespace=self.namespace, repository=self.repo_name, engine=engine)

        assert isinstance(value_, pd.DataFrame)
        assert engine.connected

        if not repository_exists(repo) and self.auto_init_repo:
            self.logger.info("Creating repo {}/{}...".format(repo.namespace, repo.repository))
            repo.init()

        # TODO: Retrieve the repo from bedrock first

        new = self.format(**kwargs)
        new.value = value_

        self.logger.info("Starting to upload result to {}...".format(new.table))

        with self.atomic(engine):
            self.logger.info("checkout")
            img = repo.head
            img.checkout(force=True)

            self.logger.info("df to table")
            df_to_table(new.value, repository=repo, table=new.table, if_exists='replace')

            self.logger.info("commit")
            new_img = repo.commit(comment=new.comment, chunk_size=10000)
            new_img.tag(new.tag)


        # if (repo.diff(new.table, img, new_img)):
        if self.auto_push:
            self.logger.info("push")
            repo.push(
                self.get_upstream(repo),
                handler="S3",
                overwrite_objects=True,
                overwrite_tags=True,
                reupload_objects=True,
            )

        engine.close()
        self.logger.info("Finished uploading result to {}...".format(new.table))

        return new