示例#1
0
def test_metadata_constraints_table_objects(local_engine_empty):
    R = Repository("some", "repo")
    R.images.add(parent_id="0" * 64, image="cafecafe" * 8)
    R.objects.register_objects([
        Object(
            object_id="o" + "a" * 62,
            format="FRAG",
            namespace="",
            size=42,
            created=datetime.utcnow(),
            insertion_hash="0" * 64,
            deletion_hash="0" * 64,
            object_index={},
            rows_inserted=10,
            rows_deleted=2,
        )
    ])
    R.commit_engines()

    with pytest.raises(CheckViolation) as e:
        R.objects.register_tables(R, [("cafecafe" * 8, "table", [
            (1, "key", "integer", True)
        ], ["object_doesnt_exist"])])

        assert "Some objects in the object_ids array aren''t registered!" in str(
            e)

    with pytest.raises(CheckViolation) as e:
        R.objects.register_tables(
            R,
            [(
                "cafecafe" * 8,
                "table",
                [(1, "key", "integer", True)],
                [
                    "o" + "a" * 62,
                    "previous_object_existed_but_this_one_doesnt"
                ],
            )],
        )

        assert "Some objects in the object_ids array aren''t registered!" in str(
            e)
示例#2
0
def run_patched_sync(
    repository: Repository,
    base_image: Optional[Image],
    new_image_hash: str,
    delete_old: bool,
    failure: str,
    input_stream: Optional[BinaryIO] = None,
    output_stream: Optional[TextIO] = None,
):
    input_stream = input_stream or sys.stdin.buffer

    # Build a staging schema
    staging_schema = "sg_tmp_" + repository.to_schema()
    repository.object_engine.delete_schema(staging_schema)
    repository.object_engine.create_schema(staging_schema)
    repository.commit_engines()

    config = _prepare_config_params(repository)
    old_sync = target_postgres.DbSync

    stdout = sys.stdout
    target_postgres.DbSync = db_sync_wrapper(repository.images[new_image_hash],
                                             staging_schema)
    if output_stream:
        sys.stdout = output_stream
    try:
        singer_messages = io.TextIOWrapper(input_stream, encoding="utf-8")
        target_postgres.persist_lines(config, singer_messages)
        if delete_old and base_image:
            repository.images.delete([base_image.image_hash])
    except Exception:
        repository.rollback_engines()
        if failure == "delete-new":
            repository.images.delete([new_image_hash])
        elif failure == "delete-old" and base_image:
            repository.images.delete([base_image.image_hash])
        repository.commit_engines()
        logging.error(traceback.format_exc())
        raise
    finally:
        sys.stdout = stdout
        target_postgres.DbSync = old_sync
        repository.object_engine.delete_schema(staging_schema)
        repository.commit_engines()
示例#3
0
def test_metadata_constraints_object_ids_hashes(local_engine_empty):
    R = Repository("some", "repo")
    R.images.add(parent_id="0" * 64, image="cafecafe" * 8)
    R.commit_engines()

    with pytest.raises(CheckViolation):
        R.objects.register_objects(
            [
                Object(
                    object_id="broken",
                    format="FRAG",
                    namespace="",
                    size=42,
                    created=datetime.utcnow(),
                    insertion_hash="0" * 64,
                    deletion_hash="0" * 64,
                    object_index={},
                    rows_inserted=10,
                    rows_deleted=2,
                )
            ]
        )

    with pytest.raises(CheckViolation):
        R.objects.register_objects(
            [
                Object(
                    object_id="o12345",
                    format="FRAG",
                    namespace="",
                    size=42,
                    created=datetime.utcnow(),
                    insertion_hash="0" * 64,
                    deletion_hash="0" * 64,
                    object_index={},
                    rows_inserted=10,
                    rows_deleted=2,
                )
            ]
        )

    with pytest.raises(CheckViolation):
        R.objects.register_objects(
            [
                Object(
                    object_id="o" + "a" * 61 + "Z",
                    format="FRAG",
                    namespace="",
                    size=42,
                    created=datetime.utcnow(),
                    insertion_hash="0" * 64,
                    deletion_hash="0" * 64,
                    object_index={},
                    rows_inserted=10,
                    rows_deleted=2,
                )
            ]
        )

    with pytest.raises(CheckViolation):
        R.objects.register_objects(
            [
                Object(
                    object_id="o" + "a" * 62,
                    format="FRAG",
                    namespace="",
                    size=42,
                    created=datetime.utcnow(),
                    insertion_hash="broken",
                    deletion_hash="0" * 64,
                    object_index={},
                    rows_inserted=10,
                    rows_deleted=2,
                )
            ]
        )

    with pytest.raises(CheckViolation):
        R.objects.register_objects(
            [
                Object(
                    object_id="o" + "a" * 62,
                    format="FRAG",
                    namespace="",
                    size=42,
                    created=datetime.utcnow(),
                    insertion_hash="0" * 64,
                    deletion_hash="broken",
                    object_index={},
                    rows_inserted=10,
                    rows_deleted=2,
                )
            ]
        )
示例#4
0
    def sync(
        self,
        repository: Repository,
        image_hash: Optional[str] = None,
        tables: Optional[TableInfo] = None,
        use_state: bool = True,
    ) -> str:
        config = self.get_singer_config()
        catalog = self._run_singer_discovery(config)
        catalog = self.build_singer_catalog(catalog, tables)

        base_image, new_image_hash = prepare_new_image(repository, image_hash)
        state = get_ingestion_state(repository,
                                    image_hash) if use_state else None
        logging.info("Current ingestion state: %s", state)

        # Run the sink + target and capture the stdout (new state)
        output_stream = StringIO()

        with self._run_singer(config, state, catalog=catalog) as proc:
            run_patched_sync(
                repository,
                base_image,
                new_image_hash,
                delete_old=True,
                failure="keep_both",
                input_stream=proc.stdout,
                output_stream=output_stream,
            )

        new_state = output_stream.getvalue()
        logging.info("New state: %s", new_state)

        # Add a table to the new image with the new state
        repository.object_engine.create_table(
            schema=None,
            table=INGESTION_STATE_TABLE,
            schema_spec=INGESTION_STATE_SCHEMA,
            temporary=True,
        )
        # NB: new_state here is a JSON-serialized string, so we don't wrap it into psycopg2.Json()
        repository.object_engine.run_sql(
            SQL("INSERT INTO pg_temp.{} (timestamp, state) VALUES(now(), %s)").
            format(Identifier(INGESTION_STATE_TABLE)),
            (new_state, ),
        )

        object_id = repository.objects.create_base_fragment(
            "pg_temp",
            INGESTION_STATE_TABLE,
            repository.namespace,
            table_schema=INGESTION_STATE_SCHEMA,
        )

        # If the state exists already, overwrite it; otherwise, add new state table.
        if state:
            repository.objects.overwrite_table(
                repository,
                new_image_hash,
                INGESTION_STATE_TABLE,
                INGESTION_STATE_SCHEMA,
                [object_id],
            )
        else:
            repository.objects.register_tables(
                repository,
                [(new_image_hash, INGESTION_STATE_TABLE,
                  INGESTION_STATE_SCHEMA, [object_id])],
            )

        repository.commit_engines()
        return new_image_hash