def test_records_to_db(url):
    s: Storage = Storage.from_url(url)
    api_cls: Type[DatabaseApi] = s.storage_engine.get_api_cls()
    if not s.get_api().dialect_is_supported():
        warnings.warn(
            f"Skipping tests for database engine {s.storage_engine.__name__} (client library not installed)"
        )
        return
    mem_api: PythonStorageApi = new_local_python_storage().get_api()
    with api_cls.temp_local_database() as db_url:
        name = "_test"
        db_api: DatabaseStorageApi = Storage.from_url(db_url).get_api()
        # Records
        mdr = as_records(records)
        mem_api.put(name, mdr)
        conversion = Conversion(
            StorageFormat(LocalPythonStorageEngine, RecordsFormat),
            StorageFormat(s.storage_engine, DatabaseTableFormat),
        )
        copy_records_to_db.copy(name,
                                name,
                                conversion,
                                mem_api,
                                db_api,
                                schema=TestSchema4)
        with db_api.execute_sql_result(f"select * from {name}") as res:
            assert [dict(r) for r in res] == records
Пример #2
0
def test_storage():
    s = Storage.from_url("sqlite://")
    assert s.storage_engine is SqliteStorageEngine
    s = Storage.from_url("postgres://localhost")
    assert s.storage_engine is PostgresStorageEngine
    s = Storage.from_url("mysql://localhost")
    assert s.storage_engine is MysqlStorageEngine
    s = Storage.from_url("file:///")
    assert s.storage_engine is LocalFileSystemStorageEngine
    s = Storage.from_url("python://")
    assert s.storage_engine is LocalPythonStorageEngine
Пример #3
0
def test_storage_api():
    s = Storage.from_url("sqlite://").get_api()
    assert isinstance(s, DatabaseStorageApi)
    s = Storage.from_url("postgres://localhost").get_api()
    assert isinstance(s, PostgresDatabaseStorageApi)
    s = Storage.from_url("mysql://localhost").get_api()
    assert isinstance(s, MysqlDatabaseStorageApi)
    s = Storage.from_url("file:///").get_api()
    assert isinstance(s, FileSystemStorageApi)
    s = Storage.from_url("python://").get_api()
    assert isinstance(s, PythonStorageApi)
Пример #4
0
def create_data_block_from_records(
    env: Environment,
    sess: Session,
    local_storage: Storage,
    records: Any,
    nominal_schema: Schema = None,
    inferred_schema: Schema = None,
    created_by_node_key: str = None,
) -> Tuple[DataBlockMetadata, StoredDataBlockMetadata]:
    from snapflow.storage.storage import LocalPythonStorageEngine

    logger.debug("CREATING DATA BLOCK")
    if isinstance(records, MemoryDataRecords):
        dro = records
        # Important: override nominal schema with DRO entry if it exists
        if dro.nominal_schema is not None:
            nominal_schema = env.get_schema(dro.nominal_schema, sess)
    else:
        dro = as_records(records, schema=nominal_schema)
    if not nominal_schema:
        nominal_schema = env.get_schema("Any", sess)
    if not inferred_schema:
        inferred_schema = dro.data_format.infer_schema_from_records(
            dro.records_object)
        env.add_new_generated_schema(inferred_schema, sess)
    realized_schema = cast_to_realized_schema(env, sess, inferred_schema,
                                              nominal_schema)
    dro = dro.conform_to_schema(realized_schema)
    block = DataBlockMetadata(
        id=get_datablock_id(),
        inferred_schema_key=inferred_schema.key if inferred_schema else None,
        nominal_schema_key=nominal_schema.key,
        realized_schema_key=realized_schema.key,
        record_count=dro.record_count,
        created_by_node_key=created_by_node_key,
    )
    sdb = StoredDataBlockMetadata(  # type: ignore
        id=get_datablock_id(),
        data_block_id=block.id,
        data_block=block,
        storage_url=local_storage.url,
        data_format=dro.data_format,
    )
    sess.add(block)
    sess.add(sdb)
    # sess.flush([block, sdb])
    local_storage.get_api().put(sdb.get_name(), dro)
    return block, sdb
Пример #5
0
def test_database_api_core_operations(url):
    s: Storage = Storage.from_url(url)
    api_cls: Type[DatabaseApi] = s.storage_engine.get_api_cls()
    if not s.get_api().dialect_is_supported():
        return
    with api_cls.temp_local_database() as db_url:
        api: DatabaseApi = Storage.from_url(db_url).get_api()
        name = "_test"
        api.execute_sql(f"create table {name} as select 1 a, 2 b")
        assert api.exists(name)
        assert not api.exists(name + "doesntexist")
        assert api.record_count(name) == 1
        api.create_alias(name, name + "alias")
        assert api.record_count(name + "alias") == 1
        api.copy(name, name + "copy")
        assert api.record_count(name + "copy") == 1
Пример #6
0
def test_filesystem_api_core_operations(url):
    api: PythonStorageApi = Storage.from_url(url).get_api()
    name = "_test"
    api.put(name, as_records([{"a": 1}, {"b": 2}]))
    assert api.exists(name)
    assert not api.exists(name + "doesntexist")
    assert api.record_count(name) == 2
    api.create_alias(name, name + "alias")
    assert api.record_count(name + "alias") == 2
    api.copy(name, name + "copy")
    assert api.record_count(name + "copy") == 2
Пример #7
0
def make_test_env(**kwargs) -> Environment:
    if "metadata_storage" not in kwargs:
        url = "sqlite://"
        metadata_storage = Storage.from_url(url)
        kwargs["metadata_storage"] = metadata_storage
    env = Environment(**kwargs)
    test_module = SnapflowModule(
        "_test",
        schemas=[TestSchema1, TestSchema2, TestSchema3, TestSchema4],
    )
    env.add_module(test_module)
    return env
Пример #8
0
def test_filesystem_api_core_operations(url):
    api: FileSystemStorageApi = Storage.from_url(url).get_api()
    name = "_test"
    pth = os.path.join(url[7:], name)
    with open(pth, "w") as f:
        f.writelines(["f1,f2\n", "1,2\n"])
    assert api.exists(name)
    assert not api.exists(name + "doesntexist")
    assert api.record_count(name) == 2
    api.create_alias(name, name + "alias")
    assert api.record_count(name + "alias") == 2
    api.copy(name, name + "copy")
    assert api.record_count(name + "copy") == 2
Пример #9
0
def make_test_run_context(**kwargs) -> RunContext:
    s = Storage.from_url(url=f"python://_test_default_{rand_str(6)}", )
    env = make_test_env()
    g = Graph(env)
    args = dict(
        graph=g,
        env=env,
        runtimes=[Runtime.from_storage(s)],
        storages=[s],
        local_python_storage=s,
        target_storage=s,
    )
    args.update(**kwargs)
    return RunContext(**args)
Пример #10
0
def load_environment_from_project(project: Any) -> Environment:
    from snapflow.storage.storage import Storage

    env = Environment(
        metadata_storage=getattr(project, "metadata_storage", None),
        add_default_python_runtime=getattr(project,
                                           "add_default_python_runtime", True),
    )
    for url in getattr(project, "storages", []):
        env.add_storage(Storage.from_url(url))
    for module_name in getattr(project, "modules", []):
        m = import_module(module_name)
        env.add_module(m)  # type: ignore  # We hijack the module
    return env
Пример #11
0
def test_db_to_mem(url):
    s: Storage = Storage.from_url(url)
    api_cls: Type[DatabaseApi] = s.storage_engine.get_api_cls()
    mem_api: PythonStorageApi = new_local_python_storage().get_api()
    if not s.get_api().dialect_is_supported():
        return
    with api_cls.temp_local_database() as db_url:
        api: DatabaseStorageApi = Storage.from_url(db_url).get_api()
        name = "_test"
        api.execute_sql(f"create table {name} as select 1 a, 2 b")
        # Records
        conversion = Conversion(
            StorageFormat(s.storage_engine, DatabaseTableFormat),
            StorageFormat(LocalPythonStorageEngine, RecordsFormat),
        )
        copy_db_to_records.copy(name, name, conversion, api, mem_api)
        assert mem_api.get(name).records_object == [{"a": 1, "b": 2}]
        # DatabaseCursor
        conversion = Conversion(
            StorageFormat(s.storage_engine, DatabaseTableFormat),
            StorageFormat(LocalPythonStorageEngine, DatabaseCursorFormat),
        )
        copy_db_to_records.copy(name, name, conversion, api, mem_api)
        assert list(mem_api.get(name).records_object) == [{"a": 1, "b": 2}]
Пример #12
0
    def __init__(
            self,
            name: str = None,
            metadata_storage: Union["Storage", str] = None,
            add_default_python_runtime: bool = True,
            initial_modules: List[
                SnapflowModule] = None,  # Defaults to `core` module
    ):
        from snapflow.core.runtime import Runtime, LocalPythonRuntimeEngine
        from snapflow.storage.storage import Storage, new_local_python_storage
        from snapflow.modules import core

        self.name = name
        if metadata_storage is None:
            metadata_storage = DEFAULT_METADATA_STORAGE_URL
            logger.warning(
                f"No metadata storage specified, using default sqlite db `{DEFAULT_METADATA_STORAGE_URL}`"
            )
        if isinstance(metadata_storage, str):
            metadata_storage = Storage.from_url(metadata_storage)
        if metadata_storage is None:
            raise Exception("Must specify metadata_storage or allow default")
        self.metadata_storage = metadata_storage
        self.initialize_metadata_database()
        self._local_module = DEFAULT_LOCAL_MODULE
        self.library = ComponentLibrary()
        self.storages = []
        self.runtimes = []
        self._metadata_sessions: List[Session] = []
        # if add_default_python_runtime:
        #     self.runtimes.append(
        #         Runtime(
        #             url="python://local",
        #             runtime_engine=LocalPythonRuntimeEngine,
        #         )
        #     )
        if initial_modules is None:
            initial_modules = [core]
        for m in initial_modules:
            self.add_module(m)

        self._local_python_storage = new_local_python_storage()
        self.add_storage(self._local_python_storage)
        self.runtimes.append(Runtime.from_storage(self._local_python_storage))
Пример #13
0
def test_file_to_mem():
    dr = tempfile.gettempdir()
    s: Storage = Storage.from_url(f"file://{dr}")
    fs_api: FileSystemStorageApi = s.get_api()
    mem_api: PythonStorageApi = new_local_python_storage().get_api()
    name = "_test"
    fs_api.write_lines_to_file(name, ["f1,f2", "hi,2"])
    # Records
    records_obj = [{"f1": "hi", "f2": 2}]
    conversion = Conversion(
        StorageFormat(s.storage_engine, DelimitedFileFormat),
        StorageFormat(LocalPythonStorageEngine, RecordsFormat),
    )
    copy_delim_file_to_records.copy(name,
                                    name,
                                    conversion,
                                    fs_api,
                                    mem_api,
                                    schema=TestSchema4)
    assert mem_api.get(name).records_object == records_obj
def test_obj_to_file():
    dr = tempfile.gettempdir()
    s: Storage = Storage.from_url(f"file://{dr}")
    fs_api: FileSystemStorageApi = s.get_api()
    mem_api: PythonStorageApi = new_local_python_storage().get_api()
    name = "_test"
    fmt = DelimitedFileObjectFormat
    obj = (lambda: StringIO("f1,f2\nhi,2"), )[0]
    mdr = as_records(obj(), data_format=fmt)
    mem_api.put(name, mdr)
    conversion = Conversion(
        StorageFormat(LocalPythonStorageEngine, fmt),
        StorageFormat(s.storage_engine, DelimitedFileFormat),
    )
    copy_file_object_to_delim_file.copy(name,
                                        name,
                                        conversion,
                                        mem_api,
                                        fs_api,
                                        schema=TestSchema4)
    with fs_api.open(name) as f:
        assert f.read() == obj().read()
def test_records_to_file():
    dr = tempfile.gettempdir()
    s: Storage = Storage.from_url(f"file://{dr}")
    fs_api: FileSystemStorageApi = s.get_api()
    mem_api: PythonStorageApi = new_local_python_storage().get_api()
    name = "_test"
    fmt = RecordsFormat
    obj = [{"f1": "hi", "f2": 2}]
    mdr = as_records(obj, data_format=fmt)
    mem_api.put(name, mdr)
    conversion = Conversion(
        StorageFormat(LocalPythonStorageEngine, fmt),
        StorageFormat(s.storage_engine, DelimitedFileFormat),
    )
    copy_records_to_delim_file.copy(name,
                                    name,
                                    conversion,
                                    mem_api,
                                    fs_api,
                                    schema=TestSchema4)
    with fs_api.open(name) as f:
        recs = list(read_csv(f))
        recs = RecordsFormat.conform_records_to_schema(recs, TestSchema4)
        assert recs == obj
Пример #16
0
    def add_storage(self,
                    storage_like: Union[Storage, str],
                    add_runtime: bool = True) -> Storage:
        from snapflow.storage.storage import Storage

        if isinstance(storage_like, str):
            sr = Storage.from_url(storage_like)
        elif isinstance(storage_like, Storage):
            sr = storage_like
        else:
            raise TypeError
        for s in self.storages:
            if s.url == sr.url:
                return s
        self.storages.append(sr)
        if add_runtime:
            from snapflow.core.runtime import Runtime

            try:
                rt = Runtime.from_storage(sr)
                self.runtimes.append(rt)
            except ValueError:
                pass
        return sr
Пример #17
0
    def storage(self) -> Storage:
        from snapflow.storage.storage import Storage

        return Storage.from_url(self.storage_url)
Пример #18
0
 def as_storage(self) -> Storage:
     return Storage(
         url=self.url,
         storage_engine=self.runtime_engine.natural_storage_engine,
     )