Exemplo n.º 1
0
def dataset_db_with_swap_table(metadata_db):
    database = factories.DatabaseFactory(memorable_name="my_database")
    with psycopg2.connect(database_dsn(settings.DATABASES_DATA["my_database"])) as conn:
        conn.cursor().execute(
            """
            CREATE TABLE IF NOT EXISTS dataset_test (
                id INT,
                name VARCHAR(255),
                date DATE
            );

            DELETE FROM dataset_test;
            INSERT INTO dataset_test values(1,'test','2022-01-01');

            CREATE TABLE IF NOT EXISTS dataset_test_20220101t000000_swap (
                id INT,
                name VARCHAR(255),
                date DATE
            );

            DELETE FROM dataset_test_20220101t000000_swap;
            INSERT INTO dataset_test_20220101t000000_swap values(1,'test','2022-01-01');
            INSERT INTO dataset_test_20220101t000000_swap values(2,'test_2','2022-01-02');

            """
        )

    return database
Exemplo n.º 2
0
def test_dataset(db):
    with psycopg2.connect(
        database_dsn(settings.DATABASES_DATA["my_database"])
    ) as conn, conn.cursor() as cursor:
        cursor.execute(
            "CREATE TABLE IF NOT EXISTS foo AS SELECT a,b FROM (VALUES ('test',30)) AS temp_table(a,b);"
        )
        cursor.execute(
            """
            CREATE SCHEMA IF NOT EXISTS dataflow;
            CREATE TABLE IF NOT EXISTS dataflow.metadata (
                id SERIAL,
                table_schema TEXT,
                table_name TEXT,
                source_data_modified_utc TIMESTAMP WITHOUT TIME ZONE,
                dataflow_swapped_tables_utc TIMESTAMP WITHOUT TIME ZONE,
                table_structure JSONB,
                data_ids TEXT[],
                data_type INTEGER NOT NULL,
                data_hash_v1 TEXT
            );
            TRUNCATE TABLE dataflow.metadata;
            INSERT INTO dataflow.metadata (table_schema, table_name, source_data_modified_utc, table_structure, data_type)
            VALUES
            ('public', 'foo', '2021-01-01 00:00:00.0', '{"a":"text","b":"int"}', 1);
            """
        )
        conn.commit()
    return ("public", "foo")
Exemplo n.º 3
0
def dataset_api_view_GET(request, dataset_id, source_table_id):

    source_table = get_object_or_404(SourceTable,
                                     id=source_table_id,
                                     dataset__id=dataset_id,
                                     dataset__deleted=False)

    search_after = request.GET.getlist("$searchAfter")

    with psycopg2.connect(
            database_dsn(settings.DATABASES_DATA[
                source_table.database.memorable_name])) as connection:
        primary_key = _get_dataset_primary_key(connection, source_table.schema,
                                               source_table.table)

        if not primary_key:
            raise ValueError(
                f"Cannot order response without a primary key on the table: "
                f"`{source_table.schema}`.`{source_table.table}`")

        if search_after == []:
            sql = psycopg2.sql.SQL("""
                    select
                        *
                    from {}.{}
                    order by {}
                """).format(
                psycopg2.sql.Identifier(source_table.schema),
                psycopg2.sql.Identifier(source_table.table),
                psycopg2.sql.SQL(",").join(
                    map(psycopg2.sql.Identifier, primary_key)),
            )
        else:
            sql = psycopg2.sql.SQL("""
                    select
                        *
                    from {}.{}
                    where ({}) > ({})
                    order by {}
                """).format(
                psycopg2.sql.Identifier(source_table.schema),
                psycopg2.sql.Identifier(source_table.table),
                psycopg2.sql.SQL(",").join(
                    map(psycopg2.sql.Identifier, primary_key)),
                psycopg2.sql.SQL(",").join(psycopg2.sql.Placeholder() *
                                           len(search_after)),
                psycopg2.sql.SQL(",").join(
                    map(psycopg2.sql.Identifier, primary_key)),
            )

        columns = _get_dataset_columns(connection, source_table)
        rows = _get_dataset_rows(connection, sql, query_args=search_after)

    return _get_streaming_http_response(
        StreamingHttpResponseWithoutDjangoDbConnection,
        request,
        primary_key,
        columns,
        rows,
    )
Exemplo n.º 4
0
    def test_search_after(self):
        # create django objects
        memorable_name = self.memorable_name
        table = self.table
        database = Database.objects.get_or_create(memorable_name=memorable_name)[0]
        data_grouping = DataGrouping.objects.get_or_create()[0]
        dataset = DataSet.objects.get_or_create(grouping=data_grouping)[0]
        source_table = SourceTable.objects.get_or_create(
            dataset=dataset, database=database, table=table
        )[0]

        # create external source table
        with psycopg2.connect(
            database_dsn(settings.DATABASES_DATA[memorable_name])
        ) as conn, conn.cursor() as cur:
            sql = '''
            create table {table} (id int primary key, name varchar(100))
            '''.format(
                table=table
            )
            cur.execute(sql)
            sql = '''insert into {table} values (%s, %s)'''.format(table=self.table)
            values = [(0, 'abigail'), (1, 'romeo')]
            cur.executemany(sql, values)

        url = '/api/v1/dataset/{}/{}?$searchAfter=0'.format(dataset.id, source_table.id)
        response = self.client.get(url)
        expected = {'headers': ['id', 'name'], 'values': [[1, 'romeo']], 'next': None}

        output = b''
        for streaming_output in response.streaming_content:
            output = output + streaming_output
        output_dict = json.loads(output.decode('utf-8'))
        self.assertEqual(output_dict, expected)
Exemplo n.º 5
0
def refresh_chart_data(chart_id):
    chart = models.ChartBuilderChart.objects.get(id=chart_id)
    original_table_name = chart.get_temp_table_name()
    query_log_params = {
        "run_by_user": chart.created_by,
        "page": 1,
        "page_size": None,
    }
    if chart.related_source is not None:
        # If the chart is created from a dataset source, get the latest copy
        # of the query from the source before rerunning
        query_log_params.update({
            "sql":
            chart.related_source.get_chart_builder_query(),
            "connection":
            chart.related_source.database.memorable_name,
        })
    else:
        # If there is no related source, rerun the original query
        query_log_params.update({
            "sql": chart.query_log.sql,
            "query_id": chart.query_log.query_id,
            "connection": chart.query_log.connection,
        })

    chart.query_log = QueryLog.objects.create(**query_log_params)
    chart.save()
    run_chart_builder_query(chart.id)
    chart.refresh_thumbnail()
    with psycopg2.connect(
            database_dsn(
                DATABASES_DATA[query_log_params["connection"]])) as conn:
        cursor = conn.cursor()
        cursor.execute(f"DROP TABLE IF EXISTS {original_table_name}")
 def test_materialized_view_download(self, request_client, published):
     dsn = database_dsn(settings.DATABASES_DATA['my_database'])
     with psycopg2.connect(dsn) as conn, conn.cursor() as cursor:
         cursor.execute('''
             CREATE TABLE if not exists materialized_test_table (field2 int,field1 varchar(255));
             TRUNCATE TABLE materialized_test_table;
             INSERT INTO materialized_test_table VALUES(1, 'record1');
             INSERT INTO materialized_test_table VALUES(2, 'record2');
             DROP MATERIALIZED VIEW IF EXISTS materialized_test_view;
             CREATE MATERIALIZED VIEW materialized_test_view AS
             SELECT * FROM materialized_test_table;
             ''')
     dataset = factories.DataSetFactory(
         user_access_type='REQUIRES_AUTHENTICATION', published=published)
     source_view = factories.SourceViewFactory(
         dataset=dataset,
         database=factories.DatabaseFactory(memorable_name='my_database'),
         schema='public',
         view='materialized_test_view',
     )
     log_count = EventLog.objects.count()
     download_count = dataset.number_of_downloads
     response = request_client.get(source_view.get_absolute_url())
     assert response.status_code == 200
     assert (
         b''.join(response.streaming_content) ==
         b'"field2","field1"\r\n1,"record1"\r\n2,"record2"\r\n"Number of rows: 2"\r\n'
     )
     assert EventLog.objects.count() == log_count + 1
     assert (EventLog.objects.latest().event_type ==
             EventLog.TYPE_DATASET_SOURCE_VIEW_DOWNLOAD)
     assert (DataSet.objects.get(
         pk=dataset.id).number_of_downloads == download_count + 1)
Exemplo n.º 7
0
    def test_view_download(self, access_type, request_client, published):
        dsn = database_dsn(settings.DATABASES_DATA["my_database"])
        with psycopg2.connect(dsn) as conn, conn.cursor() as cursor:
            cursor.execute("""
                CREATE TABLE if not exists download_test_table (field2 int,field1 varchar(255));
                TRUNCATE TABLE download_test_table;
                INSERT INTO download_test_table VALUES(1, 'record1');
                INSERT INTO download_test_table VALUES(2, 'record2');
                CREATE OR REPLACE VIEW download_test_view AS SELECT * FROM download_test_table;
                """)

        dataset = factories.DataSetFactory(user_access_type=access_type,
                                           published=published)
        source_view = factories.SourceViewFactory(
            dataset=dataset,
            database=factories.DatabaseFactory(memorable_name="my_database"),
            schema="public",
            view="download_test_view",
        )
        log_count = EventLog.objects.count()
        download_count = dataset.number_of_downloads
        response = request_client.get(source_view.get_absolute_url())
        assert response.status_code == 200
        assert (
            b"".join(response.streaming_content) ==
            b'"field2","field1"\r\n1,"record1"\r\n2,"record2"\r\n"Number of rows: 2"\r\n'
        )
        assert EventLog.objects.count() == log_count + 1
        assert EventLog.objects.latest(
        ).event_type == EventLog.TYPE_DATASET_SOURCE_VIEW_DOWNLOAD
        assert DataSet.objects.get(
            pk=dataset.id).number_of_downloads == download_count + 1
Exemplo n.º 8
0
def metadata_db(db):
    database = factories.DatabaseFactory(memorable_name="my_database")
    with psycopg2.connect(
        database_dsn(settings.DATABASES_DATA["my_database"])
    ) as conn, conn.cursor() as cursor:
        cursor.execute(
            """
            CREATE SCHEMA IF NOT EXISTS dataflow;
            CREATE TABLE IF NOT EXISTS dataflow.metadata (
                id SERIAL,
                table_schema TEXT,
                table_name TEXT,
                source_data_modified_utc TIMESTAMP WITHOUT TIME ZONE,
                dataflow_swapped_tables_utc TIMESTAMP WITHOUT TIME ZONE,
                table_structure JSONB,
                data_ids TEXT[],
                data_type INTEGER NOT NULL,
                data_hash_v1 TEXT,
                primary_keys TEXT[]
            );
            TRUNCATE TABLE dataflow.metadata;
            INSERT INTO dataflow.metadata (
                table_schema, table_name, source_data_modified_utc, dataflow_swapped_tables_utc, table_structure, data_type
            )
            VALUES
                ('public','table1','2020-09-02 00:01:00.0','2020-09-02 00:01:00.0','{"field1":"int","field2":"varchar"}',1),
                ('public','table2','2020-09-01 00:01:00.0','2020-09-02 00:01:00.0',NULL,1),
                ('public','table1','2020-01-01 00:01:00.0','2020-09-02 00:01:00.0',NULL,1),
                ('public','table4', NULL,'2021-12-01 00:00:00.0',NULL,1);
            """
        )
        conn.commit()
    return database
Exemplo n.º 9
0
    def test_friendly_exception_from_table_without_primary_key(self):
        """This test is in place to assert existing behaviour. It may well be reasonable to make this view work
        without primary keys on the table."""
        # create django objects
        memorable_name = self.memorable_name
        table = self.table
        database = Database.objects.get_or_create(
            memorable_name=memorable_name)[0]
        data_grouping = DataGrouping.objects.get_or_create()[0]
        dataset = DataSet.objects.get_or_create(grouping=data_grouping)[0]
        source_table = SourceTable.objects.get_or_create(dataset=dataset,
                                                         database=database,
                                                         table=table)[0]

        # create external source table
        with psycopg2.connect(
                database_dsn(settings.DATABASES_DATA[memorable_name])
        ) as conn, conn.cursor() as cur:
            sql = '''
            create table {table} (id int, name varchar(100), timestamp timestamp)
            '''.format(table=table)
            cur.execute(sql)

        url = '/api/v1/dataset/{}/{}'.format(dataset.id, source_table.id)
        with pytest.raises(ValueError) as e:
            self.client.get(url)

        assert str(e.value) == (
            f"Cannot order response without a primary key on the table: "
            f"`{source_table.schema}`.`{source_table.table}`")
Exemplo n.º 10
0
    def test_view_download(self):
        dsn = database_dsn(settings.DATABASES_DATA['my_database'])
        with connect(dsn) as conn, conn.cursor() as cursor:
            cursor.execute('''
                CREATE TABLE if not exists download_test_table (field2 int,field1 varchar(255));
                TRUNCATE TABLE download_test_table;
                INSERT INTO download_test_table VALUES(1, 'record1');
                INSERT INTO download_test_table VALUES(2, 'record2');
                CREATE OR REPLACE VIEW download_test_view AS SELECT * FROM download_test_table;
                ''')

        dataset = factories.DataSetFactory(
            user_access_type='REQUIRES_AUTHENTICATION')
        source_view = factories.SourceViewFactory(
            dataset=dataset,
            database=factories.DatabaseFactory(memorable_name='my_database'),
            schema='public',
            view='download_test_view',
        )
        log_count = EventLog.objects.count()
        download_count = dataset.number_of_downloads
        response = self._authenticated_get(source_view.get_absolute_url())
        self.assertEqual(response.status_code, 200)
        self.assertEqual(
            b''.join(response.streaming_content),
            b'"field2","field1"\r\n1,"record1"\r\n2,"record2"\r\n"Number of rows: 2"\r\n',
        )
        self.assertEqual(EventLog.objects.count(), log_count + 1)
        self.assertEqual(
            EventLog.objects.latest().event_type,
            EventLog.TYPE_DATASET_SOURCE_VIEW_DOWNLOAD,
        )
        self.assertEqual(
            DataSet.objects.get(pk=dataset.id).number_of_downloads,
            download_count + 1)
Exemplo n.º 11
0
    def test_friendly_exception_from_non_standard_table(self):
        # create django objects
        memorable_name = self.memorable_name
        table = self.table
        database = Database.objects.get_or_create(
            memorable_name=memorable_name)[0]
        data_grouping = DataGrouping.objects.get_or_create()[0]
        dataset = DataSet.objects.get_or_create(grouping=data_grouping)[0]
        _ = SourceTable.objects.get_or_create(dataset=dataset,
                                              database=database,
                                              table=table)[0]
        view_source_table = SourceTable.objects.get_or_create(
            dataset=dataset, database=database, table=f"view_{table}")[0]

        # create external source table
        with psycopg2.connect(
                database_dsn(settings.DATABASES_DATA[memorable_name])
        ) as conn, conn.cursor() as cur:
            sql = '''
            create table {table} (id int primary key, name varchar(100), timestamp timestamp)
            '''.format(table=table)
            cur.execute(sql)
            sql = '''
            create view view_{table} as (select * from {table})
            '''.format(table=table)
            cur.execute(sql)

        url = '/api/v1/dataset/{}/{}'.format(dataset.id, view_source_table.id)
        with pytest.raises(ValueError) as e:
            self.client.get(url)

        assert str(e.value) == (
            "Cannot get primary keys from something other than an ordinary table. "
            "`public`.`view_test_source_table` is a: view")
Exemplo n.º 12
0
def get_postgres_column_names_data_types(sourcetable):
    with connect(
            database_dsn(
                settings.DATABASES_DATA[sourcetable.database.memorable_name])
    ) as conn, conn.cursor() as cur:
        cur.execute(
            """
            SELECT
                pg_attribute.attname AS column_name,
                pg_catalog.format_type(pg_attribute.atttypid, pg_attribute.atttypmod) AS data_type
            FROM
                pg_catalog.pg_attribute
            INNER JOIN
                pg_catalog.pg_class ON pg_class.oid = pg_attribute.attrelid
            INNER JOIN
                pg_catalog.pg_namespace ON pg_namespace.oid = pg_class.relnamespace
            WHERE
                pg_attribute.attnum > 0
                AND NOT pg_attribute.attisdropped
                AND pg_namespace.nspname = %s
                AND pg_class.relname = %s
            ORDER BY
                attnum ASC;
        """,
            (sourcetable.schema, sourcetable.table),
        )
        return cur.fetchall()
Exemplo n.º 13
0
    def test_empty_data(self):
        # create django objects
        memorable_name = self.memorable_name
        table = self.table
        database = Database.objects.get_or_create(
            memorable_name=memorable_name)[0]
        data_grouping = DataGrouping.objects.get_or_create()[0]
        dataset = DataSet.objects.get_or_create(grouping=data_grouping)[0]
        source_table = SourceTable.objects.get_or_create(dataset=dataset,
                                                         database=database,
                                                         table=table)[0]

        # create external source table
        with psycopg2.connect(
                database_dsn(settings.DATABASES_DATA[memorable_name])
        ) as conn, conn.cursor() as cur:
            sql = '''
            create table {table} (id int primary key, name varchar(100), timestamp timestamp)
            '''.format(table=table)
            cur.execute(sql)

        url = '/api/v1/dataset/{}/{}'.format(dataset.id, source_table.id)
        response = self.client.get(url)
        expected = {
            'headers': ['id', 'name', 'timestamp'],
            'next': None,
            'values': []
        }

        output = b''
        for streaming_output in response.streaming_content:
            output = output + streaming_output
        output_dict = json.loads(output.decode('utf-8'))
        self.assertEqual(output_dict, expected)
Exemplo n.º 14
0
 def test_db(self, db):
     database = factories.DatabaseFactory(memorable_name='my_database')
     with psycopg2.connect(
             database_dsn(settings.DATABASES_DATA['my_database'])) as conn:
         conn.cursor().execute('''
             CREATE TABLE IF NOT EXISTS test_table AS (
                 SELECT 1 as a, 2 as b
             );
         ''')
         conn.commit()
         yield database
         conn.cursor().execute('DROP TABLE test_table')
Exemplo n.º 15
0
def run_chart_builder_query(chart_id):
    chart = models.ChartBuilderChart.objects.get(id=chart_id)
    query_log = chart.query_log
    with psycopg2.connect(database_dsn(
            DATABASES_DATA[query_log.connection])) as conn:
        _run_query(
            conn,
            query_log,
            query_log.page,
            query_log.page_size,
            settings.EXPLORER_QUERY_TIMEOUT_MS,
            chart.get_temp_table_name(),
        )
Exemplo n.º 16
0
def dataset_api_view_GET(request, dataset_id, source_table_id):

    source_table = get_object_or_404(
        SourceTable, id=source_table_id, dataset__id=dataset_id, dataset__deleted=False
    )

    search_after = request.GET.getlist('$searchAfter')

    with psycopg2.connect(
        database_dsn(settings.DATABASES_DATA[source_table.database.memorable_name])
    ) as connection:
        primary_key = _get_dataset_primary_key(
            connection, source_table.schema, source_table.table
        )

        if search_after == []:
            sql = psycopg2.sql.SQL(
                '''
                    select
                        *
                    from {}.{}
                    order by {}
                '''
            ).format(
                psycopg2.sql.Identifier(source_table.schema),
                psycopg2.sql.Identifier(source_table.table),
                psycopg2.sql.SQL(',').join(map(psycopg2.sql.Identifier, primary_key)),
            )
        else:
            sql = psycopg2.sql.SQL(
                '''
                    select
                        *
                    from {}.{}
                    where ({}) > ({})
                    order by {}
                '''
            ).format(
                psycopg2.sql.Identifier(source_table.schema),
                psycopg2.sql.Identifier(source_table.table),
                psycopg2.sql.SQL(',').join(map(psycopg2.sql.Identifier, primary_key)),
                psycopg2.sql.SQL(',').join(
                    psycopg2.sql.Placeholder() * len(search_after)
                ),
                psycopg2.sql.SQL(',').join(map(psycopg2.sql.Identifier, primary_key)),
            )

        columns = _get_dataset_columns(connection, source_table)
        rows = _get_dataset_rows(connection, sql, query_args=search_after)

    return _get_streaming_http_response(request, primary_key, columns, rows)
Exemplo n.º 17
0
def get_columns(database_name, schema=None, table=None, query=None):
    if table is not None and schema is not None:
        source = sql.SQL("{}.{}").format(sql.Identifier(schema), sql.Identifier(table))
    elif query is not None:
        source = sql.SQL("({}) AS custom_query".format(query.rstrip(";")))
    else:
        raise ValueError("Either table or query are required")

    with psycopg2.connect(
        database_dsn(settings.DATABASES_DATA[database_name])
    ) as connection:
        try:
            return query_columns(connection, source)
        except Exception:
            logger.error("Failed to get dataset fields", exc_info=True)
            return []
Exemplo n.º 18
0
 def setUp(self):
     super().setUp()
     self.database = factories.DatabaseFactory(memorable_name='my_database')
     self.dsn = database_dsn(settings.DATABASES_DATA['my_database'])
     with connect(self.dsn) as conn, conn.cursor() as cursor:
         cursor.execute('''
             CREATE TABLE IF NOT EXISTS custom_query_test (
                 id INT,
                 name VARCHAR(255),
                 date DATE
             );
             TRUNCATE TABLE custom_query_test;
             INSERT INTO custom_query_test VALUES(1, 'the first record', NULL);
             INSERT INTO custom_query_test VALUES(2, 'the second record', '2019-01-01');
             INSERT INTO custom_query_test VALUES(3, 'the last record', NULL);
             ''')
Exemplo n.º 19
0
def metadata_db(db):
    database = factories.DatabaseFactory(memorable_name='my_database')
    with psycopg2.connect(database_dsn(settings.DATABASES_DATA['my_database'])
                          ) as conn, conn.cursor() as cursor:
        cursor.execute('''
            CREATE SCHEMA IF NOT EXISTS dataflow;
            CREATE TABLE IF NOT EXISTS dataflow.metadata (
                id int, table_schema text, table_name text, source_data_modified_utc timestamp
            );
            TRUNCATE TABLE dataflow.metadata;
            INSERT INTO dataflow.metadata VALUES(1, 'public', 'table1', '2020-09-02 00:01:00.0');
            INSERT INTO dataflow.metadata VALUES(1, 'public', 'table2', '2020-09-01 00:01:00.0');
            INSERT INTO dataflow.metadata VALUES(1, 'public', 'table1', '2020-01-01 00:01:00.0');
            INSERT INTO dataflow.metadata VALUES(1, 'public', 'table4', NULL);
            ''')
        conn.commit()
    return database
Exemplo n.º 20
0
def dataset_finder_db(metadata_db):
    database = factories.DatabaseFactory(memorable_name="my_database")
    with psycopg2.connect(database_dsn(settings.DATABASES_DATA["my_database"])) as conn:
        conn.cursor().execute(
            """
            CREATE TABLE IF NOT EXISTS dataworkspace__source_tables (
                id INT,
                name VARCHAR(255),
                dataset_id UUID,
                schema VARCHAR(255),
                "table" VARCHAR(255)
            );

            CREATE TABLE IF NOT EXISTS dataworkspace__catalogue_items (
                id UUID,
                name VARCHAR(255),
                slug VARCHAR(255)
            );

            INSERT INTO dataworkspace__source_tables VALUES(
                1, 'public.data', '0dea6147-d355-4b6d-a140-0304ef9cfeca', 'public', 'data'
            );

            INSERT INTO dataworkspace__catalogue_items VALUES(
                '0dea6147-d355-4b6d-a140-0304ef9cfeca', 'public.data', '1'
            );

            CREATE SCHEMA IF NOT EXISTS public;
            CREATE TABLE IF NOT EXISTS data (
                id int,
                name VARCHAR(255),
                database VARCHAR(255),
                schema VARCHAR(255),
                frequency VARCHAR(255),
                "table" VARCHAR(255)
            );

            CREATE TABLE IF NOT EXISTS country_stats (
                date DATE,
                driving NUMERIC,
                country VARCHAR(255)
            );
            """
        )

    return database
Exemplo n.º 21
0
    def test_data(self):
        # create django objects
        memorable_name = self.memorable_name
        table = self.table
        database = Database.objects.get_or_create(
            memorable_name=memorable_name)[0]
        data_grouping = DataGrouping.objects.get_or_create()[0]
        dataset = DataSet.objects.get_or_create(grouping=data_grouping)[0]
        source_table = SourceTable.objects.get_or_create(dataset=dataset,
                                                         database=database,
                                                         table=table)[0]

        # create external source table
        with psycopg2.connect(
                database_dsn(settings.DATABASES_DATA[memorable_name])
        ) as conn, conn.cursor() as cur:
            sql = """
            create table {table} (id int primary key, name varchar(100), timestamp timestamp)
            """.format(table=table)
            cur.execute(sql)
            sql = """insert into {table} values (%s, %s, %s)""".format(
                table=self.table)
            values = [
                (0, "abigail", "2019-01-01 01:00"),
                (1, "romeo", "2019-01-01 02:00"),
            ]
            cur.executemany(sql, values)

        url = "/api/v1/dataset/{}/{}".format(dataset.id, source_table.id)
        response = self.client.get(url)
        expected = {
            "headers": ["id", "name", "timestamp"],
            "next":
            None,
            "values": [
                [0, "abigail", "2019-01-01 01:00:00"],
                [1, "romeo", "2019-01-01 02:00:00"],
            ],
        }

        output = b""
        for streaming_output in response.streaming_content:
            output = output + streaming_output
        output_dict = json.loads(output.decode("utf-8"))
        self.assertEqual(output_dict, expected)
Exemplo n.º 22
0
 def test_db(self, db):
     database = factories.DatabaseFactory(memorable_name="my_database")
     with psycopg2.connect(
             database_dsn(settings.DATABASES_DATA["my_database"])) as conn:
         conn.cursor().execute("""
         CREATE TABLE IF NOT EXISTS test_sample AS (
             with data (x, y, z) as (values
                 (NULL, NULL, NULL),
                 ('a', 'b', 'c'),
                 ('a', 'b', NULL),
                 ('a', NULL, NULL)
             )
             SELECT * from data
         );
         """)
         conn.commit()
         yield database.memorable_name
         conn.cursor().execute("DROP TABLE test_sample")
def dataset_db(metadata_db):
    database = factories.DatabaseFactory(memorable_name='my_database')
    with psycopg2.connect(database_dsn(
            settings.DATABASES_DATA['my_database'])) as conn:
        conn.cursor().execute('''
            CREATE TABLE IF NOT EXISTS dataset_test (
                id INT,
                name VARCHAR(255),
                date DATE
            );

            CREATE TABLE IF NOT EXISTS dataset_test2 (
                id INT,
                name VARCHAR(255)
            );

            CREATE OR REPLACE VIEW dataset_view AS (SELECT * FROM dataset_test);
            ''')

    return database
Exemplo n.º 24
0
    def handle(self, *args, **options):
        self.stdout.write("ensure_database_configured...")

        for database_name, database_data in settings.DATABASES_DATA.items():
            self.stdout.write(
                f"ensure_database_configured ensuring {database_name} is in db"
            )
            Database.objects.get_or_create(memorable_name=database_name)

            self.stdout.write(
                f"ensure_database_configured {database_name} revoking public access..."
            )
            with connect(
                    database_dsn(database_data)) as conn, conn.cursor() as cur:
                cur.execute("REVOKE ALL ON schema public FROM public;")
            self.stdout.write(
                f"ensure_database_configured {database_name} revoking public access... (done)"
            )

        self.stdout.write(
            self.style.SUCCESS("ensure_database_configured... (done)"))
    def handle(self, *args, **options):
        self.stdout.write("deleting temporary Data Explorer result tables...")

        for _database_name, database_data in settings.DATABASES_DATA.items():
            with connect(
                    database_dsn(database_data)) as conn, conn.cursor() as cur:
                cur.execute("""
                    SELECT table_schema, table_name
                    FROM information_schema.tables
                    WHERE table_name
                    LIKE '%_tmp_query_%'
                """)
                for table_schema, table_name in cur.fetchall():
                    cur.execute(
                        psycopg2.sql.SQL("DROP TABLE {}.{}").format(
                            psycopg2.sql.Identifier(table_schema),
                            psycopg2.sql.Identifier(table_name),
                        ))

        self.stdout.write(
            self.style.SUCCESS(
                "deleting temporary Data Explorer result tables (done)"))
Exemplo n.º 26
0
def get_rows(sourcetable, schema_value_funcs, query_var):
    cursor_itersize = 1000

    # Order the rows by primary key so
    # - multiple requests are consistent with each other;
    # - and specifically and more importantly, sopaginated results are
    #   consistent with each other
    # We make no assumption on the name or number of columns in the primary
    # key, other than it exists
    # We _could_ use `oid` to order rows if there is no primary key, but we
    # would like all tables to have a primary key, so we deliberately don't
    # implement this
    with connect(
            database_dsn(
                settings.DATABASES_DATA[sourcetable.database.memorable_name])
    ) as conn, conn.cursor() as cur:

        cur.execute(
            """
            SELECT
                pg_attribute.attname AS column_name
            FROM
                pg_catalog.pg_class pg_class_table
            INNER JOIN
                pg_catalog.pg_index ON pg_index.indrelid = pg_class_table.oid
            INNER JOIN
                pg_catalog.pg_class pg_class_index ON pg_class_index.oid = pg_index.indexrelid
            INNER JOIN
                pg_catalog.pg_namespace ON pg_namespace.oid = pg_class_table.relnamespace
            INNER JOIN
                pg_catalog.pg_attribute ON pg_attribute.attrelid = pg_class_index.oid
            WHERE
                pg_namespace.nspname = %s
                AND pg_class_table.relname = %s
                AND pg_index.indisprimary
            ORDER BY
                pg_attribute.attnum
        """,
            (sourcetable.schema, sourcetable.table),
        )
        primary_key_column_names = [row[0] for row in cur.fetchall()]

    with connect(
            database_dsn(settings.DATABASES_DATA[
                sourcetable.database.memorable_name])) as conn, conn.cursor(
                    name="google_data_studio_all_table_data"
                ) as cur:  # Named cursor => server-side cursor

        cur.itersize = cursor_itersize
        cur.arraysize = cursor_itersize

        fields_sql = sql.SQL(",").join([
            sql.Identifier(schema["name"]) for schema, _ in schema_value_funcs
        ])
        primary_key_sql = sql.SQL(",").join([
            sql.Identifier(column_name)
            for column_name in primary_key_column_names
        ])
        schema_sql = sql.Identifier(sourcetable.schema)
        table_sql = sql.Identifier(sourcetable.table)

        query_sql, vars_sql = query_var(fields_sql, schema_sql, table_sql,
                                        primary_key_sql)
        cur.execute(query_sql, vars_sql)

        while True:
            rows = cur.fetchmany(cursor_itersize)
            for row in rows:
                primary_key_values = row[:len(primary_key_column_names)]
                requested_field_values = row[len(primary_key_column_names):]
                values = [
                    schema_value_funcs[i][1](value)
                    for i, value in enumerate(requested_field_values)
                ]
                yield {"values": values}, primary_key_values
            if not rows:
                break
Exemplo n.º 27
0
 def flush_database(self):
     with psycopg2.connect(
             database_dsn(
                 settings.DATABASES_DATA[self.memorable_name])) as conn:
         flush_database(conn)
Exemplo n.º 28
0
def delete_unused_datasets_users():
    logger.info('delete_unused_datasets_users: Start')

    for memorable_name, database_data in settings.DATABASES_DATA.items():
        database_obj = Database.objects.get(memorable_name=memorable_name)
        database_name = database_data['NAME']

        with connect(
                database_dsn(database_data)) as conn, conn.cursor() as cur:
            logger.info('delete_unused_datasets_users: finding database users')
            cur.execute("""
                SELECT usename FROM pg_catalog.pg_user
                WHERE valuntil != 'infinity' AND usename LIKE 'user_%'
                ORDER BY usename;
            """)
            usenames = [result[0] for result in cur.fetchall()]

            logger.info('delete_unused_datasets_users: finding schemas')
            cur.execute("""
                SELECT nspname FROM pg_catalog.pg_namespace WHERE
                nspname != 'pg_catalog' AND nspname != 'information_schema'
                ORDER BY nspname
            """)
            schemas = [result[0] for result in cur.fetchall()]

        logger.info(
            'delete_unused_datasets_users: waiting in case they were just created'
        )
        gevent.sleep(15)

        # We want to be able to delete db users created, but then _not_ associated with an
        # running application, such as those from a STOPPED application, but also from those
        # that were created but then the server went down before the application was created.
        in_use_usenames = set(
            ApplicationInstanceDbUsers.objects.filter(
                db=database_obj,
                db_username__in=usenames,
                application_instance__state__in=['RUNNING', 'SPAWNING'],
            ).values_list('db_username', flat=True))
        not_in_use_usernames = [
            usename for usename in usenames if usename not in in_use_usenames
        ]

        schema_revokes = [
            'REVOKE USAGE ON SCHEMA {} FROM {};',
            'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA {} FROM {};',
            'REVOKE ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA {} FROM {};',
            'REVOKE ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA {} FROM {};',
            'ALTER DEFAULT PRIVILEGES IN SCHEMA {} REVOKE ALL PRIVILEGES ON TABLES FROM {}',
            'ALTER DEFAULT PRIVILEGES IN SCHEMA {} REVOKE ALL PRIVILEGES ON SEQUENCES FROM {}',
            'ALTER DEFAULT PRIVILEGES IN SCHEMA {} REVOKE ALL PRIVILEGES ON FUNCTIONS FROM {}',
        ]

        with connect(database_dsn(database_data)) as conn:
            conn.autocommit = True
            with conn.cursor() as cur:
                for usename in not_in_use_usernames:
                    try:
                        logger.info(
                            'delete_unused_datasets_users: revoking credentials for %s',
                            usename,
                        )

                        # Multiple concurrent GRANT CONNECT on the same database can cause
                        # "tuple concurrently updated" errors
                        with cache.lock(
                                f'database-grant-connect-{database_name}--v2',
                                blocking_timeout=3,
                        ):
                            cur.execute(
                                sql.SQL(
                                    'REVOKE CONNECT ON DATABASE {} FROM {};').
                                format(
                                    sql.Identifier(database_name),
                                    sql.Identifier(usename),
                                ))

                            cur.execute(
                                sql.SQL(
                                    'REVOKE ALL PRIVILEGES ON DATABASE {} FROM {};'
                                ).format(
                                    sql.Identifier(database_name),
                                    sql.Identifier(usename),
                                ))

                        for schema in schemas:
                            with cache.lock(
                                    f'database-grant--{database_name}--{schema}--v2',
                                    blocking_timeout=3,
                            ):
                                for schema_revoke in schema_revokes:
                                    try:
                                        cur.execute(
                                            sql.SQL(schema_revoke).format(
                                                sql.Identifier(schema),
                                                sql.Identifier(usename),
                                            ))
                                    except Exception:
                                        # This is likely to happen for private schemas where the current user
                                        # does not have revoke privileges. We carry on in a best effort
                                        # to remove the user
                                        logger.info(
                                            'delete_unused_datasets_users: Unable to %s %s %s',
                                            schema_revoke,
                                            schema,
                                            usename,
                                        )

                        logger.info(
                            'delete_unused_datasets_users: dropping user %s',
                            usename)
                        cur.execute(
                            sql.SQL('DROP USER {};').format(
                                sql.Identifier(usename)))
                    except Exception:
                        logger.exception(
                            'delete_unused_datasets_users: Failed deleting %s',
                            usename)
                    else:
                        logger.info(
                            'delete_unused_datasets_users: revoked credentials for and dropped %s',
                            usename,
                        )

    logger.info('delete_unused_datasets_users: End')
Exemplo n.º 29
0
 def _get_dsn(self):
     return database_dsn(settings.DATABASES_DATA['my_database'])