def test_repo_lookup_override(remote_engine): test_repo = Repository("overridden", "repo", engine=remote_engine) try: test_repo.init() assert lookup_repository("overridden/repo") == test_repo finally: test_repo.delete(unregister=True, uncheckout=True)
def test_engine_autocommit(local_engine_empty): conn_params = _prepare_engine_config(CONFIG) engine = PostgresEngine(conn_params=conn_params, name="test_engine", autocommit=True) repo = Repository("test", "repo", engine=engine) repo.init() repo.engine.rollback() assert repository_exists(Repository.from_template(repo, engine=local_engine_empty))
def build_repo(): repo = Repository(namespace="abc", repository="1234") repo.delete() repo.init() df_to_table(fake_data(8), repository=repo, table="unit_test", if_exists='replace') new_img = repo.commit() new_img.checkout() return repo
def _execute_from( node: Node, output: Repository) -> Tuple[Repository, Optional[ProvenanceLine]]: interesting_nodes = extract_nodes(node, ["repo_source", "repository"]) repo_source = get_first_or_none(interesting_nodes, "repo_source") output_node = get_first_or_none(interesting_nodes, "repository") provenance: Optional[ProvenanceLine] = None if output_node: # AS (output) detected, change the current output repository to it. output = Repository.from_schema(output_node.match.group(0)) logging.info("Changed output repository to %s" % str(output)) # NB this destroys all data in the case where we ran some commands in the Splitfile and then # did FROM (...) without AS repository if repository_exists(output): logging.info("Clearing all output from %s" % str(output)) output.delete() if not repository_exists(output): output.init() if repo_source: repository, tag_or_hash = parse_image_spec(repo_source) source_repo = lookup_repository(repository.to_schema(), include_local=True) if source_repo.engine.name == "LOCAL": # For local repositories, make sure to update them if they've an upstream if source_repo.upstream: source_repo.pull() # Get the target image hash from the source repo: otherwise, if the tag is, say, 'latest' and # the output has just had the base commit (000...) created in it, that commit will be the latest. clone(source_repo, local_repository=output, download_all=False) source_hash = source_repo.images[tag_or_hash].image_hash output.images.by_hash(source_hash).checkout() provenance = { "type": "FROM", "source_namespace": source_repo.namespace, "source": source_repo.repository, "source_hash": source_hash, } else: # FROM EMPTY AS repository -- initializes an empty repository (say to create a table or import # the results of a previous stage in a multistage build. # In this case, if AS repository has been specified, it's already been initialized. If not, this command # literally does nothing if not output_node: raise SplitfileError( "FROM EMPTY without AS (repository) does nothing!") return output, provenance
def init_repo(self, repo_info: RepoInfo) -> Repository: repo = Repository(namespace=repo_info.namespace, repository=repo_info.repository) if not repository_exists(repo): self.logger.info("Creating repo {}/{}...".format( repo.namespace, repo.repository)) repo.init() if repo_info.remote_name: remote = Repository.from_template(repo, engine=get_engine( repo_info.remote_name)) cloned_repo = clone( remote, local_repository=repo, download_all=False, overwrite_objects=True, overwrite_tags=True, ) return repo
def write(self, value_: Any, **kwargs: Any) -> Result: """ Writes the result to a repository on Splitgraph Args: - value_ (Any): the value to write; will then be stored as the `value` attribute of the returned `Result` instance - **kwargs (optional): if provided, will be used to format the `table`, `comment`, and `tag` Returns: - Result: returns a new `Result` with both `value`, `comment`, `table`, and `tag` attributes """ if self.schema is not None: errors = self.schema.validate(value_) if errors: raise SchemaValidationError(errors) new = self.format(**kwargs) new.value = value_ repo_info = parse_repo(new.location) repo = Repository(namespace=repo_info.namespace, repository=repo_info.repository) remote = Repository.from_template(repo, engine=get_engine( repo_info.remote_name, autocommit=True)) assert isinstance(value_, pd.DataFrame) if not repository_exists(repo) and self.auto_init_repo: self.logger.info("Creating repo {}/{}...".format( repo.namespace, repo.repository)) repo.init() # TODO: Retrieve the repo from bedrock first self.logger.info("Starting to upload result to {}...".format( new.location)) with self.atomic(repo.engine): self.logger.info("checkout") img = repo.head img.checkout(force=True) self.logger.info("df to table") df_to_table(new.value, repository=repo, table=repo_info.table, if_exists='replace') self.logger.info("commit") new_img = repo.commit(comment=new.comment, chunk_size=10000) new_img.tag(repo_info.tag) # if (repo.diff(new.table, img, new_img)): if self.auto_push: self.logger.info("push") repo.push( remote, handler="S3", overwrite_objects=True, overwrite_tags=True, reupload_objects=True, ) self.logger.info("Finished uploading result to {}...".format( new.location)) return new
def write(self, value_: Any, **kwargs: Any) -> Result: """ Writes the result to a repository on Splitgraph Args: - value_ (Any): the value to write; will then be stored as the `value` attribute of the returned `Result` instance - **kwargs (optional): if provided, will be used to format the `table`, `comment`, and `tag` Returns: - Result: returns a new `Result` with both `value`, `comment`, `table`, and `tag` attributes """ cfg = patch_config(create_config_dict(), self.env or dict()) engine = PostgresEngine(name='SplitgraphResult', conn_params=cfg) engine.initialize() repo = Repository(namespace=self.namespace, repository=self.repo_name, engine=engine) assert isinstance(value_, pd.DataFrame) assert engine.connected if not repository_exists(repo) and self.auto_init_repo: self.logger.info("Creating repo {}/{}...".format(repo.namespace, repo.repository)) repo.init() # TODO: Retrieve the repo from bedrock first new = self.format(**kwargs) new.value = value_ self.logger.info("Starting to upload result to {}...".format(new.table)) with self.atomic(engine): self.logger.info("checkout") img = repo.head img.checkout(force=True) self.logger.info("df to table") df_to_table(new.value, repository=repo, table=new.table, if_exists='replace') self.logger.info("commit") new_img = repo.commit(comment=new.comment, chunk_size=10000) new_img.tag(new.tag) # if (repo.diff(new.table, img, new_img)): if self.auto_push: self.logger.info("push") repo.push( self.get_upstream(repo), handler="S3", overwrite_objects=True, overwrite_tags=True, reupload_objects=True, ) engine.close() self.logger.info("Finished uploading result to {}...".format(new.table)) return new