Пример #1
0
def _mount_mysql(repository):
    # We don't use this one in tests beyond basic mounting, so no point importing it.
    mount(
        repository.to_schema(),
        "mysql_fdw",
        dict(
            host="mysqlorigin",
            port=3306,
            username="******",
            password="******",
            dbname="mysqlschema",
        ),
        tables={
            "mushrooms": (
                [
                    TableColumn(1, "mushroom_id", "integer", False),
                    TableColumn(2, "name", "character varying (20)", False),
                    TableColumn(3, "discovery", "timestamp", False),
                    TableColumn(4, "friendly", "boolean", False),
                    TableColumn(5, "binary_data", "bytea", False),
                    TableColumn(6, "varbinary_data", "bytea", False),
                ],
                {},
            )
        },
    )
Пример #2
0
def _mount_mongo(repository):
    mount(
        "tmp",
        "mongo_fdw",
        dict(
            server="mongoorigin",
            port=27017,
            username="******",
            password="******",
            stuff={
                "db": "origindb",
                "coll": "stuff",
                "schema": {
                    "name": "text",
                    "duration": "numeric",
                    "happy": "boolean"
                },
            },
        ),
    )
    repository.import_tables([],
                             R("tmp"), [],
                             foreign_tables=True,
                             do_checkout=True)
    R("tmp").delete()
Пример #3
0
def _mount_mysql(repository):
    # We don't use this one in tests beyond basic mounting, so no point importing it.
    mount(
        repository.to_schema(),
        "mysql_fdw",
        dict(
            dict(
                server="mysqlorigin",
                port=3306,
                username="******",
                password="******",
                remote_schema="mysqlschema",
            )),
    )
Пример #4
0
def _mount_postgres(repository, tables=None):
    mount(
        "tmp",
        "postgres_fdw",
        dict(
            host="pgorigin",
            port=5432,
            username="******",
            password="******",
            dbname="origindb",
            remote_schema="public",
            tables=tables,
        ),
    )
    repository.import_tables([], R("tmp"), [], foreign_tables=True, do_checkout=True)
    R("tmp").delete()
Пример #5
0
def test_socrata_mounting_error():
    socrata = MagicMock(spec=Socrata)
    socrata.datasets.side_effect = Exception(
        "Unknown response format: text/html; charset=utf-8")
    with mock.patch("sodapy.Socrata", return_value=socrata):
        with pytest.raises(RepositoryNotFoundError):
            mount(
                "test/pg_mount",
                "socrata",
                {
                    "domain": "example.com",
                    "tables": {
                        "some_table": "xzkq-xp2w"
                    },
                    "app_token": "some_token",
                },
            )
Пример #6
0
def test_mount_elasticsearch(local_engine_empty):
    # No ES running in this stack: this is just a test that we can instantiate the FDW.
    repo = Repository("test", "es_mount")
    try:
        mount(
            repo.to_schema(),
            "elasticsearch",
            dict(
                username=None,
                password=None,
                server="elasticsearch",
                port=9200,
                table_spec={
                    "table_1": {
                        "schema": {
                            "id": "text",
                            "@timestamp": "timestamp",
                            "query": "text",
                            "col_1": "text",
                            "col_2": "boolean",
                        },
                        "index": "index-pattern*",
                        "rowid_column": "id",
                        "query_column": "query",
                    }
                },
            ),
        )

        assert get_engine().get_full_table_schema(repo.to_schema(), "table_1") == [
            TableColumn(ordinal=1, name="id", pg_type="text", is_pk=False, comment=None),
            TableColumn(
                ordinal=2,
                name="@timestamp",
                pg_type="timestamp without time zone",
                is_pk=False,
                comment=None,
            ),
            TableColumn(ordinal=3, name="query", pg_type="text", is_pk=False, comment=None),
            TableColumn(ordinal=4, name="col_1", pg_type="text", is_pk=False, comment=None),
            TableColumn(ordinal=5, name="col_2", pg_type="boolean", is_pk=False, comment=None),
        ]

    finally:
        repo.delete()
Пример #7
0
def test_socrata_smoke(domain, dataset_id, local_engine_empty):
    # This relies on the Socrata API being available, but good to smoke test some popular datasets
    # to make sure the mounting works end-to-end.
    try:
        mount(
            "socrata_mount",
            "socrata",
            {
                "domain": domain,
                "tables": {
                    "data": dataset_id
                }
            },
        )
        result = local_engine_empty.run_sql(
            "SELECT * FROM socrata_mount.data LIMIT 10")
        assert len(result) == 10
    finally:
        local_engine_empty.delete_schema("socrata_mount")
Пример #8
0
def test_socrata_mounting_slug(local_engine_empty):
    with open(os.path.join(INGESTION_RESOURCES, "socrata/find_datasets.json"),
              "r") as f:
        socrata_meta = json.load(f)

    socrata = MagicMock(spec=Socrata)
    socrata.datasets.return_value = socrata_meta
    with mock.patch("sodapy.Socrata", return_value=socrata):
        mount(
            "test/pg_mount",
            "socrata",
            {
                "domain": "example.com",
                "app_token": "some_token",
            },
        )

    assert local_engine_empty.get_all_tables("test/pg_mount") == [
        "current_employee_names_salaries_and_position_xzkq_xp2w"
    ]
Пример #9
0
def test_socrata_mounting_missing_tables():
    with open(os.path.join(INGESTION_RESOURCES, "socrata/find_datasets.json"),
              "r") as f:
        socrata_meta = json.load(f)

    socrata = MagicMock(spec=Socrata)
    socrata.datasets.return_value = socrata_meta
    with mock.patch("sodapy.Socrata", return_value=socrata):
        with pytest.raises(ValueError) as e:
            mount(
                "test/pg_mount",
                "socrata",
                {
                    "domain": "example.com",
                    "tables": {
                        "some_table": "wrong_id"
                    },
                    "app_token": "some_token",
                },
            )

    assert "Some Socrata tables couldn't be found! Missing tables: xzkq-xp2w" in str(
        e.value)
Пример #10
0
def _execute_db_import(conn_string, fdw_name, fdw_params, table_names,
                       target_mountpoint, table_aliases,
                       table_queries) -> ProvenanceLine:
    tmp_mountpoint = Repository.from_schema(fdw_name + "_tmp_staging")
    try:
        handler_kwargs = json.loads(fdw_params)
        handler_kwargs.update(
            conn_string_to_dict(conn_string.group() if conn_string else None))
        mount(tmp_mountpoint.to_schema(), fdw_name, handler_kwargs)
        # The foreign database is a moving target, so the new image hash is random.
        # Maybe in the future, when the object hash is a function of its contents, we can be smarter here...
        target_hash = "{:064x}".format(getrandbits(256))
        target_mountpoint.import_tables(
            table_aliases,
            tmp_mountpoint,
            table_names,
            target_hash=target_hash,
            foreign_tables=True,
            table_queries=table_queries,
        )
        return {"type": "MOUNT"}
    finally:
        tmp_mountpoint.delete()
Пример #11
0
def test_socrata_mounting(local_engine_empty):
    with open(os.path.join(INGESTION_RESOURCES, "socrata/find_datasets.json"),
              "r") as f:
        socrata_meta = json.load(f)

    socrata = MagicMock(spec=Socrata)
    socrata.datasets.return_value = socrata_meta
    with mock.patch("sodapy.Socrata", return_value=socrata):
        mount(
            "test/pg_mount",
            "socrata",
            {
                "domain": "example.com",
                "tables": {
                    "some_table": "xzkq-xp2w"
                },
                "app_token": "some_token",
            },
        )

    assert local_engine_empty.get_full_table_schema(
        "test/pg_mount", "some_table"
    ) == [
        TableColumn(ordinal=1,
                    name=":id",
                    pg_type="text",
                    is_pk=False,
                    comment="Socrata column ID"),
        TableColumn(
            ordinal=2,
            name="full_or_part_time",
            pg_type="text",
            is_pk=False,
            comment=
            "Whether the employee was employed full- (F) or part-time (P).",
        ),
        TableColumn(ordinal=3,
                    name="hourly_rate",
                    pg_type="numeric",
                    is_pk=False,
                    comment=mock.ANY),
        TableColumn(ordinal=4,
                    name="salary_or_hourly",
                    pg_type="text",
                    is_pk=False,
                    comment=mock.ANY),
        TableColumn(
            ordinal=5,
            name="job_titles",
            pg_type="text",
            is_pk=False,
            comment="Title of employee at the time when the data was updated.",
        ),
        TableColumn(ordinal=6,
                    name="typical_hours",
                    pg_type="numeric",
                    is_pk=False,
                    comment=mock.ANY),
        TableColumn(ordinal=7,
                    name="annual_salary",
                    pg_type="numeric",
                    is_pk=False,
                    comment=mock.ANY),
        TableColumn(ordinal=8,
                    name=_long_name_col_sg,
                    pg_type="text",
                    is_pk=False,
                    comment=mock.ANY),
        TableColumn(
            ordinal=9,
            name="department",
            pg_type="text",
            is_pk=False,
            comment="Department where employee worked.",
        ),
    ]

    assert local_engine_empty.run_sql(
        "SELECT option_value FROM information_schema.foreign_table_options "
        "WHERE foreign_table_name = 'some_table' "
        "AND foreign_table_schema = 'test/pg_mount' "
        "AND option_name = 'column_map'") == [
            (f'{{"{_long_name_col_sg}": "{_long_name_col}"}}', )
        ]
Пример #12
0
 def _callback(schema, connection, handler_options):
     handler_options.update(conn_string_to_dict(connection))
     mount(schema, mount_handler=handler_name, handler_kwargs=handler_options)