Пример #1
0
def dataset_db_with_swap_table(metadata_db):
    database = factories.DatabaseFactory(memorable_name="my_database")
    with psycopg2.connect(database_dsn(settings.DATABASES_DATA["my_database"])) as conn:
        conn.cursor().execute(
            """
            CREATE TABLE IF NOT EXISTS dataset_test (
                id INT,
                name VARCHAR(255),
                date DATE
            );

            DELETE FROM dataset_test;
            INSERT INTO dataset_test values(1,'test','2022-01-01');

            CREATE TABLE IF NOT EXISTS dataset_test_20220101t000000_swap (
                id INT,
                name VARCHAR(255),
                date DATE
            );

            DELETE FROM dataset_test_20220101t000000_swap;
            INSERT INTO dataset_test_20220101t000000_swap values(1,'test','2022-01-01');
            INSERT INTO dataset_test_20220101t000000_swap values(2,'test_2','2022-01-02');

            """
        )

    return database
Пример #2
0
    def test_view_download(self):
        dsn = database_dsn(settings.DATABASES_DATA['my_database'])
        with connect(dsn) as conn, conn.cursor() as cursor:
            cursor.execute('''
                CREATE TABLE if not exists download_test_table (field2 int,field1 varchar(255));
                TRUNCATE TABLE download_test_table;
                INSERT INTO download_test_table VALUES(1, 'record1');
                INSERT INTO download_test_table VALUES(2, 'record2');
                CREATE OR REPLACE VIEW download_test_view AS SELECT * FROM download_test_table;
                ''')

        dataset = factories.DataSetFactory(
            user_access_type='REQUIRES_AUTHENTICATION')
        source_view = factories.SourceViewFactory(
            dataset=dataset,
            database=factories.DatabaseFactory(memorable_name='my_database'),
            schema='public',
            view='download_test_view',
        )
        log_count = EventLog.objects.count()
        download_count = dataset.number_of_downloads
        response = self._authenticated_get(source_view.get_absolute_url())
        self.assertEqual(response.status_code, 200)
        self.assertEqual(
            b''.join(response.streaming_content),
            b'"field2","field1"\r\n1,"record1"\r\n2,"record2"\r\n"Number of rows: 2"\r\n',
        )
        self.assertEqual(EventLog.objects.count(), log_count + 1)
        self.assertEqual(
            EventLog.objects.latest().event_type,
            EventLog.TYPE_DATASET_SOURCE_VIEW_DOWNLOAD,
        )
        self.assertEqual(
            DataSet.objects.get(pk=dataset.id).number_of_downloads,
            download_count + 1)
Пример #3
0
    def test_view_download(self, access_type, request_client, published):
        dsn = database_dsn(settings.DATABASES_DATA["my_database"])
        with psycopg2.connect(dsn) as conn, conn.cursor() as cursor:
            cursor.execute("""
                CREATE TABLE if not exists download_test_table (field2 int,field1 varchar(255));
                TRUNCATE TABLE download_test_table;
                INSERT INTO download_test_table VALUES(1, 'record1');
                INSERT INTO download_test_table VALUES(2, 'record2');
                CREATE OR REPLACE VIEW download_test_view AS SELECT * FROM download_test_table;
                """)

        dataset = factories.DataSetFactory(user_access_type=access_type,
                                           published=published)
        source_view = factories.SourceViewFactory(
            dataset=dataset,
            database=factories.DatabaseFactory(memorable_name="my_database"),
            schema="public",
            view="download_test_view",
        )
        log_count = EventLog.objects.count()
        download_count = dataset.number_of_downloads
        response = request_client.get(source_view.get_absolute_url())
        assert response.status_code == 200
        assert (
            b"".join(response.streaming_content) ==
            b'"field2","field1"\r\n1,"record1"\r\n2,"record2"\r\n"Number of rows: 2"\r\n'
        )
        assert EventLog.objects.count() == log_count + 1
        assert EventLog.objects.latest(
        ).event_type == EventLog.TYPE_DATASET_SOURCE_VIEW_DOWNLOAD
        assert DataSet.objects.get(
            pk=dataset.id).number_of_downloads == download_count + 1
 def test_materialized_view_download(self, request_client, published):
     dsn = database_dsn(settings.DATABASES_DATA['my_database'])
     with psycopg2.connect(dsn) as conn, conn.cursor() as cursor:
         cursor.execute('''
             CREATE TABLE if not exists materialized_test_table (field2 int,field1 varchar(255));
             TRUNCATE TABLE materialized_test_table;
             INSERT INTO materialized_test_table VALUES(1, 'record1');
             INSERT INTO materialized_test_table VALUES(2, 'record2');
             DROP MATERIALIZED VIEW IF EXISTS materialized_test_view;
             CREATE MATERIALIZED VIEW materialized_test_view AS
             SELECT * FROM materialized_test_table;
             ''')
     dataset = factories.DataSetFactory(
         user_access_type='REQUIRES_AUTHENTICATION', published=published)
     source_view = factories.SourceViewFactory(
         dataset=dataset,
         database=factories.DatabaseFactory(memorable_name='my_database'),
         schema='public',
         view='materialized_test_view',
     )
     log_count = EventLog.objects.count()
     download_count = dataset.number_of_downloads
     response = request_client.get(source_view.get_absolute_url())
     assert response.status_code == 200
     assert (
         b''.join(response.streaming_content) ==
         b'"field2","field1"\r\n1,"record1"\r\n2,"record2"\r\n"Number of rows: 2"\r\n'
     )
     assert EventLog.objects.count() == log_count + 1
     assert (EventLog.objects.latest().event_type ==
             EventLog.TYPE_DATASET_SOURCE_VIEW_DOWNLOAD)
     assert (DataSet.objects.get(
         pk=dataset.id).number_of_downloads == download_count + 1)
Пример #5
0
def metadata_db(db):
    database = factories.DatabaseFactory(memorable_name="my_database")
    with psycopg2.connect(
        database_dsn(settings.DATABASES_DATA["my_database"])
    ) as conn, conn.cursor() as cursor:
        cursor.execute(
            """
            CREATE SCHEMA IF NOT EXISTS dataflow;
            CREATE TABLE IF NOT EXISTS dataflow.metadata (
                id SERIAL,
                table_schema TEXT,
                table_name TEXT,
                source_data_modified_utc TIMESTAMP WITHOUT TIME ZONE,
                dataflow_swapped_tables_utc TIMESTAMP WITHOUT TIME ZONE,
                table_structure JSONB,
                data_ids TEXT[],
                data_type INTEGER NOT NULL,
                data_hash_v1 TEXT,
                primary_keys TEXT[]
            );
            TRUNCATE TABLE dataflow.metadata;
            INSERT INTO dataflow.metadata (
                table_schema, table_name, source_data_modified_utc, dataflow_swapped_tables_utc, table_structure, data_type
            )
            VALUES
                ('public','table1','2020-09-02 00:01:00.0','2020-09-02 00:01:00.0','{"field1":"int","field2":"varchar"}',1),
                ('public','table2','2020-09-01 00:01:00.0','2020-09-02 00:01:00.0',NULL,1),
                ('public','table1','2020-01-01 00:01:00.0','2020-09-02 00:01:00.0',NULL,1),
                ('public','table4', NULL,'2021-12-01 00:00:00.0',NULL,1);
            """
        )
        conn.commit()
    return database
Пример #6
0
 def test_rds_sync(self, mock_client, dataset_db):
     cache.delete("q" "uery_tool_logs_last_run")
     log_count = ToolQueryAuditLog.objects.count()
     table_count = ToolQueryAuditLogTable.objects.count()
     factories.DatabaseFactory(memorable_name="my_database")
     factories.DatabaseUserFactory.create(username="******")
     factories.SourceTableFactory.create(schema="public", table="test_dataset")
     mock_client.return_value.describe_db_log_files.return_value = {
         "DescribeDBLogFiles": [
             {"LogFileName": "/file/1.csv"},
             {"LogFileName": "/file/2.csv"},
         ]
     }
     mock_client.return_value.download_db_log_file_portion.side_effect = [
         {
             "Marker": "1",
             "AdditionalDataPending": True,
             "LogFileData": (
                 # Valid user and db select statement
                 self.log_data[0]
                 # Non-pgaudit log
                 + self.log_data[1]
             ),
         },
         {
             "Marker": None,
             "AdditionalDataPending": False,
             "LogFileData": (
                 # Unrecognised user
                 self.log_data[2]
                 # Unrecognised database
                 + self.log_data[3]
             ),
         },
         {
             "Marker": None,
             "AdditionalDataPending": False,
             "LogFileData": (
                 # Valid username and db insert statement
                 self.log_data[4]
                 # Timestamp out of range
                 + self.log_data[5]
                 # No timestamp
                 + self.log_data[6]
                 # Duplicate log entry
                 + self.log_data[7]
             ),
         },
     ]
     _do_sync_tool_query_logs()
     queries = ToolQueryAuditLog.objects.all()
     tables = ToolQueryAuditLogTable.objects.all()
     assert queries.count() == log_count + 2
     assert tables.count() == table_count + 1
     assert list(queries)[-2].query_sql == "SELECT * FROM dataset_test"
     assert list(queries)[-2].connection_from == "172.19.0.4"
     assert list(queries)[-1].query_sql == "INSERT INTO dataset_test VALUES(1);"
     assert list(queries)[-1].connection_from == "172.19.0.5"
Пример #7
0
 def test_missing_view(self, client):
     dataset = factories.DataSetFactory(user_access_type='REQUIRES_AUTHENTICATION')
     source_view = factories.SourceViewFactory(
         dataset=dataset,
         database=factories.DatabaseFactory(memorable_name='my_database'),
     )
     download_count = dataset.number_of_downloads
     response = client.get(source_view.get_absolute_url())
     assert response.status_code == 404
     assert DataSet.objects.get(pk=dataset.id).number_of_downloads == download_count
 def test_db(self, db):
     database = factories.DatabaseFactory(memorable_name='my_database')
     with psycopg2.connect(
             database_dsn(settings.DATABASES_DATA['my_database'])) as conn:
         conn.cursor().execute('''
             CREATE TABLE IF NOT EXISTS test_table AS (
                 SELECT 1 as a, 2 as b
             );
         ''')
         conn.commit()
         yield database
         conn.cursor().execute('DROP TABLE test_table')
Пример #9
0
 def test_missing_view(self):
     dataset = factories.DataSetFactory(
         user_access_type='REQUIRES_AUTHENTICATION')
     source_view = factories.SourceViewFactory(
         dataset=dataset,
         database=factories.DatabaseFactory(memorable_name='my_database'),
     )
     download_count = dataset.number_of_downloads
     response = self._authenticated_get(source_view.get_absolute_url())
     self.assertEqual(response.status_code, 404)
     self.assertEqual(
         DataSet.objects.get(pk=dataset.id).number_of_downloads,
         download_count)
Пример #10
0
 def setUp(self):
     super().setUp()
     self.database = factories.DatabaseFactory(memorable_name='my_database')
     self.dsn = database_dsn(settings.DATABASES_DATA['my_database'])
     with connect(self.dsn) as conn, conn.cursor() as cursor:
         cursor.execute('''
             CREATE TABLE IF NOT EXISTS custom_query_test (
                 id INT,
                 name VARCHAR(255),
                 date DATE
             );
             TRUNCATE TABLE custom_query_test;
             INSERT INTO custom_query_test VALUES(1, 'the first record', NULL);
             INSERT INTO custom_query_test VALUES(2, 'the second record', '2019-01-01');
             INSERT INTO custom_query_test VALUES(3, 'the last record', NULL);
             ''')
Пример #11
0
def metadata_db(db):
    database = factories.DatabaseFactory(memorable_name='my_database')
    with psycopg2.connect(database_dsn(settings.DATABASES_DATA['my_database'])
                          ) as conn, conn.cursor() as cursor:
        cursor.execute('''
            CREATE SCHEMA IF NOT EXISTS dataflow;
            CREATE TABLE IF NOT EXISTS dataflow.metadata (
                id int, table_schema text, table_name text, source_data_modified_utc timestamp
            );
            TRUNCATE TABLE dataflow.metadata;
            INSERT INTO dataflow.metadata VALUES(1, 'public', 'table1', '2020-09-02 00:01:00.0');
            INSERT INTO dataflow.metadata VALUES(1, 'public', 'table2', '2020-09-01 00:01:00.0');
            INSERT INTO dataflow.metadata VALUES(1, 'public', 'table1', '2020-01-01 00:01:00.0');
            INSERT INTO dataflow.metadata VALUES(1, 'public', 'table4', NULL);
            ''')
        conn.commit()
    return database
Пример #12
0
def dataset_finder_db(metadata_db):
    database = factories.DatabaseFactory(memorable_name="my_database")
    with psycopg2.connect(database_dsn(settings.DATABASES_DATA["my_database"])) as conn:
        conn.cursor().execute(
            """
            CREATE TABLE IF NOT EXISTS dataworkspace__source_tables (
                id INT,
                name VARCHAR(255),
                dataset_id UUID,
                schema VARCHAR(255),
                "table" VARCHAR(255)
            );

            CREATE TABLE IF NOT EXISTS dataworkspace__catalogue_items (
                id UUID,
                name VARCHAR(255),
                slug VARCHAR(255)
            );

            INSERT INTO dataworkspace__source_tables VALUES(
                1, 'public.data', '0dea6147-d355-4b6d-a140-0304ef9cfeca', 'public', 'data'
            );

            INSERT INTO dataworkspace__catalogue_items VALUES(
                '0dea6147-d355-4b6d-a140-0304ef9cfeca', 'public.data', '1'
            );

            CREATE SCHEMA IF NOT EXISTS public;
            CREATE TABLE IF NOT EXISTS data (
                id int,
                name VARCHAR(255),
                database VARCHAR(255),
                schema VARCHAR(255),
                frequency VARCHAR(255),
                "table" VARCHAR(255)
            );

            CREATE TABLE IF NOT EXISTS country_stats (
                date DATE,
                driving NUMERIC,
                country VARCHAR(255)
            );
            """
        )

    return database
Пример #13
0
 def test_db(self, db):
     database = factories.DatabaseFactory(memorable_name="my_database")
     with psycopg2.connect(
             database_dsn(settings.DATABASES_DATA["my_database"])) as conn:
         conn.cursor().execute("""
         CREATE TABLE IF NOT EXISTS test_sample AS (
             with data (x, y, z) as (values
                 (NULL, NULL, NULL),
                 ('a', 'b', 'c'),
                 ('a', 'b', NULL),
                 ('a', NULL, NULL)
             )
             SELECT * from data
         );
         """)
         conn.commit()
         yield database.memorable_name
         conn.cursor().execute("DROP TABLE test_sample")
Пример #14
0
 def test_docker_sync(self, mock_os, dataset_db):
     cache.delete("query_tool_logs_last_run")
     table_count = ToolQueryAuditLogTable.objects.count()
     log_count = ToolQueryAuditLog.objects.count()
     factories.DatabaseFactory(memorable_name="my_database")
     factories.DatabaseUserFactory.create(username="******")
     factories.SourceTableFactory.create(schema="public", table="test_dataset")
     mock_os.listdir.return_value = [
         "file1.csv",
         "file2.log",
     ]
     mock_os.path.getmtime.return_value = datetime.datetime.now().timestamp()
     _do_sync_tool_query_logs()
     queries = ToolQueryAuditLog.objects.all()
     tables = ToolQueryAuditLogTable.objects.all()
     assert queries.count() == log_count + 2
     assert tables.count() == table_count + 1
     assert list(queries)[-2].query_sql == "SELECT * FROM dataset_test"
     assert list(queries)[-1].query_sql == "INSERT INTO dataset_test VALUES(1);"
def dataset_db(metadata_db):
    database = factories.DatabaseFactory(memorable_name='my_database')
    with psycopg2.connect(database_dsn(
            settings.DATABASES_DATA['my_database'])) as conn:
        conn.cursor().execute('''
            CREATE TABLE IF NOT EXISTS dataset_test (
                id INT,
                name VARCHAR(255),
                date DATE
            );

            CREATE TABLE IF NOT EXISTS dataset_test2 (
                id INT,
                name VARCHAR(255)
            );

            CREATE OR REPLACE VIEW dataset_view AS (SELECT * FROM dataset_test);
            ''')

    return database
Пример #16
0
def test_download_filtered_columns(client, mocker, dataset_finder_db):
    source_table = factories.SourceTableFactory.create(
        dataset=factories.MasterDataSetFactory.create(
            user_access_type=UserAccessType.OPEN),
        schema="public",
        table="country_stats",
        database=factories.DatabaseFactory(memorable_name="my_database"),
    )
    get_fields = mocker.patch(
        "dataworkspace.apps.finder.elasticsearch.ElasticsearchClient.get_fields"
    )
    get_fields.return_value = ["date", "country", "driving"]
    dataset_search = mocker.patch("elasticsearch.Elasticsearch.search")
    dataset_search.side_effect = [
        {
            "took": 11,
            "timed_out": False,
            "_shards": {
                "total": 45,
                "successful": 45,
                "skipped": 0,
                "failed": 0
            },
            "hits": {
                "total": {
                    "value": 1260,
                    "relation": "eq"
                },
                "max_score": None,
                "hits": [],
            },
            "aggregations": {
                "indexes": {
                    "doc_count_error_upper_bound":
                    0,
                    "sum_other_doc_count":
                    0,
                    "buckets": [
                        {
                            "key": "20210316t070000--public--country_stats--1",
                            "doc_count": 3,
                        },
                    ],
                }
            },
        },
        {
            "took": 11,
            "timed_out": False,
            "_shards": {
                "total": 1,
                "successful": 1,
                "skipped": 0,
                "failed": 0
            },
            "hits": {
                "total": {
                    "value": 3,
                    "relation": "eq"
                },
                "max_score":
                None,
                "hits": [
                    {
                        "_index": "20210316t070000--public--country_stats--1",
                        "_type": "_doc",
                        "_id": "1",
                        "_score": 1.0,
                        "_source": {
                            "country": "Albania",
                            "date": "2020-01-13",
                            "driving": 0.0,
                        },
                    },
                    {
                        "_index": "20210316t070000--public--country_stats--1",
                        "_type": "_doc",
                        "_id": "1",
                        "_score": 1.0,
                        "_source": {
                            "country": "Algeria",
                            "date": "2020-01-14",
                            "driving": 1.5,
                        },
                    },
                ],
            },
        },
    ]

    params = {
        "q": "albania",
        "name": "test",
        "uuid": source_table.dataset.id,
        "slug": "slug",
        "download": "1",
    }

    response = client.post(
        reverse(
            "finder:data_grid_results",
            kwargs={
                "schema": "public",
                "table": "country_stats"
            },
        ) + "?" + urlencode(params),
        {
            "columns": ["country"],
            "filters": {}
        },
    )

    assert response.status_code == 200
    assert b"".join(response.streaming_content) == (
        b'"country"\r\n"Albania"\r\n"Algeria"\r\n')
Пример #17
0
 def _get_database(self):
     return factories.DatabaseFactory(memorable_name='my_database')
 def test_db(self, db):
     yield factories.DatabaseFactory(memorable_name='my_database')