def test_data_block_methods():
    env = make_test_env()
    db = DataBlockMetadata(
        id=get_datablock_id(),
        inferred_schema_key="_test.TestSchema1",
        nominal_schema_key="_test.TestSchema2",
        realized_schema_key="_test.TestSchema3",
    )
    strg = env.get_default_local_python_storage()
    records = [{"a": 1}]
    mdr = as_records(records)
    sdb = StoredDataBlockMetadata(
        id=get_datablock_id(),
        data_block_id=db.id,
        data_block=db,
        storage_url=strg.url,
        data_format=RecordsFormat,
    )
    with env.session_scope() as sess:
        sess.add(db)
        sess.add(sdb)
        assert sdb.name is None
        name = sdb.get_name()
        assert len(name) > 10
        assert sdb.name == name
        strg.get_api().put(sdb.name, mdr)
        assert db.inferred_schema(env, sess) == TestSchema1
        assert db.nominal_schema(env, sess) == TestSchema2
        assert db.realized_schema(env, sess) == TestSchema3
        db.compute_record_count()
        assert db.record_count == 1
def convert_sdb(
    env: Environment,
    sess: Session,
    sdb: StoredDataBlockMetadata,
    conversion_path: ConversionPath,
    target_storage: Storage,
    storages: Optional[List[Storage]] = None,
) -> StoredDataBlockMetadata:
    if not conversion_path.conversions:
        return sdb
    if storages is None:
        storages = env.storages
    prev_sdb = sdb
    next_sdb: Optional[StoredDataBlockMetadata] = None
    prev_storage = sdb.storage
    next_storage: Optional[Storage] = None
    realized_schema = sdb.realized_schema(env, sess)
    for conversion_edge in conversion_path.conversions:
        conversion = conversion_edge.conversion
        target_storage_format = conversion.to_storage_format
        next_storage = select_storage(target_storage, storages,
                                      target_storage_format)
        logger.debug(
            f"CONVERSION: {conversion.from_storage_format} -> {conversion.to_storage_format}"
        )
        next_sdb = StoredDataBlockMetadata(  # type: ignore
            id=get_datablock_id(),
            data_block_id=prev_sdb.data_block_id,
            data_block=prev_sdb.data_block,
            data_format=target_storage_format.data_format,
            storage_url=next_storage.url,
        )
        sess.add(next_sdb)
        conversion_edge.copier.copy(
            from_name=prev_sdb.get_name(),
            to_name=next_sdb.get_name(),
            conversion=conversion,
            from_storage_api=prev_storage.get_api(),
            to_storage_api=next_storage.get_api(),
            schema=realized_schema,
        )
        if (prev_sdb.data_format.is_python_format()
                and not prev_sdb.data_format.is_storable()):
            # If the records obj is in python and not storable, and we just used it, then it can be reused
            # TODO: Bit of a hack. Is there a central place we can do this?
            #       also is reusable a better name than storable?
            prev_storage.get_api().remove(prev_sdb.get_name())
            prev_sdb.data_block.stored_data_blocks.remove(prev_sdb)
            if prev_sdb in sess.new:
                sess.expunge(prev_sdb)
            else:
                sess.delete(prev_sdb)
        prev_sdb = next_sdb
        prev_storage = next_storage
    return next_sdb
Exemplo n.º 3
0
 def create_stored_datablock(self) -> StoredDataBlockMetadata:
     block = DataBlockMetadata(
         id=get_datablock_id(),
         inferred_schema_key=None,
         nominal_schema_key=None,
         realized_schema_key="Any",
         record_count=None,
         created_by_node_key=self.node.key,
     )
     sdb = StoredDataBlockMetadata(  # type: ignore
         id=get_stored_datablock_id(),
         data_block_id=block.id,
         data_block=block,
         storage_url=self.execution_context.target_storage.url,
         data_format=None,
     )
     return sdb