示例#1
0
def pg_repo_remote_registry(local_engine_empty, remote_engine_registry, clean_minio):
    staging = Repository("test", "pg_mount_staging")
    staging = make_pg_repo(get_engine(), staging)
    result = staging.push(
        Repository(REMOTE_NAMESPACE, "pg_mount", engine=remote_engine_registry),
        handler="S3",
        handler_options={},
    )
    staging.delete()
    staging.objects.cleanup()
    yield result
    def write(self, value_: Any, **kwargs: Any) -> Result:
        """
        Writes the result to a repository on Splitgraph


        Args:
            - value_ (Any): the value to write; will then be stored as the `value` attribute
                of the returned `Result` instance
            - **kwargs (optional): if provided, will be used to format the `table`, `comment`, and `tag`

        Returns:
            - Result: returns a new `Result` with both `value`, `comment`, `table`, and `tag` attributes
        """

        if self.schema is not None:
            errors = self.schema.validate(value_)
            if errors:
                raise SchemaValidationError(errors)

        new = self.format(**kwargs)
        new.value = value_

        repo_info = parse_repo(new.location)

        repo = Repository(namespace=repo_info.namespace,
                          repository=repo_info.repository)
        remote = Repository.from_template(repo,
                                          engine=get_engine(
                                              repo_info.remote_name,
                                              autocommit=True))

        assert isinstance(value_, pd.DataFrame)

        if not repository_exists(repo) and self.auto_init_repo:
            self.logger.info("Creating repo {}/{}...".format(
                repo.namespace, repo.repository))
            repo.init()

        # TODO: Retrieve the repo from bedrock first

        self.logger.info("Starting to upload result to {}...".format(
            new.location))

        with self.atomic(repo.engine):
            self.logger.info("checkout")
            img = repo.head

            img.checkout(force=True)

            self.logger.info("df to table")
            df_to_table(new.value,
                        repository=repo,
                        table=repo_info.table,
                        if_exists='replace')

            self.logger.info("commit")
            new_img = repo.commit(comment=new.comment, chunk_size=10000)
            new_img.tag(repo_info.tag)

        # if (repo.diff(new.table, img, new_img)):
        if self.auto_push:
            self.logger.info("push")
            repo.push(
                remote,
                handler="S3",
                overwrite_objects=True,
                overwrite_tags=True,
                reupload_objects=True,
            )

        self.logger.info("Finished uploading result to {}...".format(
            new.location))

        return new
示例#3
0
    def write(self, value_: Any, **kwargs: Any) -> Result:
        """
        Writes the result to a repository on Splitgraph


        Args:
            - value_ (Any): the value to write; will then be stored as the `value` attribute
                of the returned `Result` instance
            - **kwargs (optional): if provided, will be used to format the `table`, `comment`, and `tag`

        Returns:
            - Result: returns a new `Result` with both `value`, `comment`, `table`, and `tag` attributes
        """

        cfg = patch_config(create_config_dict(), self.env or dict())
        engine = PostgresEngine(name='SplitgraphResult', conn_params=cfg)
        engine.initialize()
        repo = Repository(namespace=self.namespace, repository=self.repo_name, engine=engine)

        assert isinstance(value_, pd.DataFrame)
        assert engine.connected

        if not repository_exists(repo) and self.auto_init_repo:
            self.logger.info("Creating repo {}/{}...".format(repo.namespace, repo.repository))
            repo.init()

        # TODO: Retrieve the repo from bedrock first

        new = self.format(**kwargs)
        new.value = value_

        self.logger.info("Starting to upload result to {}...".format(new.table))

        with self.atomic(engine):
            self.logger.info("checkout")
            img = repo.head
            img.checkout(force=True)

            self.logger.info("df to table")
            df_to_table(new.value, repository=repo, table=new.table, if_exists='replace')

            self.logger.info("commit")
            new_img = repo.commit(comment=new.comment, chunk_size=10000)
            new_img.tag(new.tag)


        # if (repo.diff(new.table, img, new_img)):
        if self.auto_push:
            self.logger.info("push")
            repo.push(
                self.get_upstream(repo),
                handler="S3",
                overwrite_objects=True,
                overwrite_tags=True,
                reupload_objects=True,
            )

        engine.close()
        self.logger.info("Finished uploading result to {}...".format(new.table))

        return new