def test_metadata_constraints_table_objects(local_engine_empty): R = Repository("some", "repo") R.images.add(parent_id="0" * 64, image="cafecafe" * 8) R.objects.register_objects([ Object( object_id="o" + "a" * 62, format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="0" * 64, object_index={}, rows_inserted=10, rows_deleted=2, ) ]) R.commit_engines() with pytest.raises(CheckViolation) as e: R.objects.register_tables(R, [("cafecafe" * 8, "table", [ (1, "key", "integer", True) ], ["object_doesnt_exist"])]) assert "Some objects in the object_ids array aren''t registered!" in str( e) with pytest.raises(CheckViolation) as e: R.objects.register_tables( R, [( "cafecafe" * 8, "table", [(1, "key", "integer", True)], [ "o" + "a" * 62, "previous_object_existed_but_this_one_doesnt" ], )], ) assert "Some objects in the object_ids array aren''t registered!" in str( e)
def run_patched_sync( repository: Repository, base_image: Optional[Image], new_image_hash: str, delete_old: bool, failure: str, input_stream: Optional[BinaryIO] = None, output_stream: Optional[TextIO] = None, ): input_stream = input_stream or sys.stdin.buffer # Build a staging schema staging_schema = "sg_tmp_" + repository.to_schema() repository.object_engine.delete_schema(staging_schema) repository.object_engine.create_schema(staging_schema) repository.commit_engines() config = _prepare_config_params(repository) old_sync = target_postgres.DbSync stdout = sys.stdout target_postgres.DbSync = db_sync_wrapper(repository.images[new_image_hash], staging_schema) if output_stream: sys.stdout = output_stream try: singer_messages = io.TextIOWrapper(input_stream, encoding="utf-8") target_postgres.persist_lines(config, singer_messages) if delete_old and base_image: repository.images.delete([base_image.image_hash]) except Exception: repository.rollback_engines() if failure == "delete-new": repository.images.delete([new_image_hash]) elif failure == "delete-old" and base_image: repository.images.delete([base_image.image_hash]) repository.commit_engines() logging.error(traceback.format_exc()) raise finally: sys.stdout = stdout target_postgres.DbSync = old_sync repository.object_engine.delete_schema(staging_schema) repository.commit_engines()
def test_metadata_constraints_object_ids_hashes(local_engine_empty): R = Repository("some", "repo") R.images.add(parent_id="0" * 64, image="cafecafe" * 8) R.commit_engines() with pytest.raises(CheckViolation): R.objects.register_objects( [ Object( object_id="broken", format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="0" * 64, object_index={}, rows_inserted=10, rows_deleted=2, ) ] ) with pytest.raises(CheckViolation): R.objects.register_objects( [ Object( object_id="o12345", format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="0" * 64, object_index={}, rows_inserted=10, rows_deleted=2, ) ] ) with pytest.raises(CheckViolation): R.objects.register_objects( [ Object( object_id="o" + "a" * 61 + "Z", format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="0" * 64, object_index={}, rows_inserted=10, rows_deleted=2, ) ] ) with pytest.raises(CheckViolation): R.objects.register_objects( [ Object( object_id="o" + "a" * 62, format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="broken", deletion_hash="0" * 64, object_index={}, rows_inserted=10, rows_deleted=2, ) ] ) with pytest.raises(CheckViolation): R.objects.register_objects( [ Object( object_id="o" + "a" * 62, format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="broken", object_index={}, rows_inserted=10, rows_deleted=2, ) ] )
def sync( self, repository: Repository, image_hash: Optional[str] = None, tables: Optional[TableInfo] = None, use_state: bool = True, ) -> str: config = self.get_singer_config() catalog = self._run_singer_discovery(config) catalog = self.build_singer_catalog(catalog, tables) base_image, new_image_hash = prepare_new_image(repository, image_hash) state = get_ingestion_state(repository, image_hash) if use_state else None logging.info("Current ingestion state: %s", state) # Run the sink + target and capture the stdout (new state) output_stream = StringIO() with self._run_singer(config, state, catalog=catalog) as proc: run_patched_sync( repository, base_image, new_image_hash, delete_old=True, failure="keep_both", input_stream=proc.stdout, output_stream=output_stream, ) new_state = output_stream.getvalue() logging.info("New state: %s", new_state) # Add a table to the new image with the new state repository.object_engine.create_table( schema=None, table=INGESTION_STATE_TABLE, schema_spec=INGESTION_STATE_SCHEMA, temporary=True, ) # NB: new_state here is a JSON-serialized string, so we don't wrap it into psycopg2.Json() repository.object_engine.run_sql( SQL("INSERT INTO pg_temp.{} (timestamp, state) VALUES(now(), %s)"). format(Identifier(INGESTION_STATE_TABLE)), (new_state, ), ) object_id = repository.objects.create_base_fragment( "pg_temp", INGESTION_STATE_TABLE, repository.namespace, table_schema=INGESTION_STATE_SCHEMA, ) # If the state exists already, overwrite it; otherwise, add new state table. if state: repository.objects.overwrite_table( repository, new_image_hash, INGESTION_STATE_TABLE, INGESTION_STATE_SCHEMA, [object_id], ) else: repository.objects.register_tables( repository, [(new_image_hash, INGESTION_STATE_TABLE, INGESTION_STATE_SCHEMA, [object_id])], ) repository.commit_engines() return new_image_hash