Пример #1
0
    def test_database_opens_new_connection(self, mock_connect, mock_fetch):
        db = Database()

        mock_connect.side_effect = test_connect
        mock_fetch.side_effect = test_fetch

        connection_1 = db.fetch(db, 'SELECT a from abc')
        connection_2 = db.fetch(db, 'SELECT b from def')

        self.assertEqual(2, mock_connect.call_count)
        self.assertNotEqual(connection_1, connection_2)
Пример #2
0
def fetch_data(
        database: Database,
        queries: List[Type[QueryBuilder]],
        dimensions: Iterable[Field],
        share_dimensions: Iterable[Field] = (),
        reference_groups=(),
) -> Tuple[int, pd.DataFrame]:
    queries = [str(query) for query in queries]

    # Indicate which dimensions need to be parsed as date types
    # For this we create a dictionary with the dimension alias as key and PANDAS_TO_DATETIME_FORMAT as value
    pandas_parse_dates = {}
    for dimension in dimensions:
        unmodified_dimension = find_field_in_modified_field(dimension)
        if unmodified_dimension.data_type == DataType.date:
            pandas_parse_dates[alias_selector(
                unmodified_dimension.alias)] = PANDAS_TO_DATETIME_FORMAT

    results = database.fetch_dataframes(*queries,
                                        parse_dates=pandas_parse_dates)
    max_rows_returned = max([len(x) for x in results], default=0)
    logger.info('max_rows_returned',
                extra={
                    'row_count': max_rows_returned,
                    'database': str(database)
                })

    return max_rows_returned, reduce_result_set(results, reference_groups,
                                                dimensions, share_dimensions)
Пример #3
0
    def test_database_reuse_passed_connection(self, mock_connect, mock_fetch):
        db = Database()

        mock_connect.side_effect = test_connect
        mock_fetch.side_effect = test_fetch

        with db.connect() as connection:
            connection_1 = db.fetch(db,
                                    'SELECT a from abc',
                                    connection=connection)
            connection_2 = db.fetch(db,
                                    'SELECT b from def',
                                    connection=connection)

        self.assertEqual(1, mock_connect.call_count)
        self.assertEqual(connection_1, connection_2)
Пример #4
0
    def test_fetch_dataframe(self, mock_connect, mock_read_sql):
        query = 'SELECT 1'
        mock_read_sql.return_value = 'OK'

        result = Database().fetch_dataframe(query)

        self.assertEqual(mock_read_sql.return_value, result)

        mock_read_sql.assert_called_once_with(query, mock_connect().__enter__())
Пример #5
0
    def test_fetch(self, mock_connect):
        mock_cursor_func = mock_connect.return_value.__enter__.return_value.cursor
        mock_cursor = mock_cursor_func.return_value = MagicMock(name='mock_cursor')
        mock_cursor.fetchall.return_value = 'OK'

        result = Database().fetch('SELECT 1')

        self.assertEqual(mock_cursor.fetchall.return_value, result)
        mock_cursor_func.assert_called_once_with()
        mock_cursor.execute.assert_called_once_with('SELECT 1')
        mock_cursor.fetchall.assert_called_once_with()
Пример #6
0
def _do_fetch_data(query: str, database: Database):
    """
    Executes a query to fetch data from database middleware and builds/cleans the data as a data frame. The query
    execution is logged with its duration.

    :param database:
        instance of `fireant.Database`, database middleware
    :param query: Query string

    :return: `pd.DataFrame` constructed from the result of the query
    """
    with database.connect() as connection:
        return pd.read_sql(query, connection, coerce_float=True, parse_dates=True)
Пример #7
0
    def test_database_api(self):
        db = Database()

        with self.assertRaises(NotImplementedError):
            db.connect()

        with self.assertRaises(NotImplementedError):
            db.trunc_date(Field('abc'), 'day')
Пример #8
0
def fetch_data(
        database: Database,
        queries: Union[Sized, Iterable],
        dimensions: Iterable[Field],
        share_dimensions: Iterable[Field] = (),
        reference_groups=(),
):
    queries = [
        str(
            query.limit(
                min(query._limit or float("inf"),
                    database.max_result_set_size))) for query in queries
    ]
    results = database.fetch_dataframes(*queries)
    return reduce_result_set(results, reference_groups, dimensions,
                             share_dimensions)
Пример #9
0
def _do_fetch_data(query: str, database: Database):
    """
    Executes a query to fetch data from database middleware and builds/cleans the data as a data frame. The query
    execution is logged with its duration.

    :param database:
        instance of `fireant.Database`, database middleware
    :param query: Query string

    :return: `pd.DataFrame` constructed from the result of the query
    """
    with database.connect() as connection:
        return pd.read_sql(query,
                           connection,
                           coerce_float=True,
                           parse_dates=True)
Пример #10
0
def fetch_data(
        database: Database,
        queries: List[Type[QueryBuilder]],
        dimensions: Iterable[Field],
        share_dimensions: Iterable[Field] = (),
        reference_groups=(),
) -> Tuple[int, pd.DataFrame]:
    queries = [str(query) for query in queries]

    # Indicate which dimensions need to be parsed as date types
    # For this we create a dictionary with the dimension alias as key and PANDAS_TO_DATETIME_FORMAT as value
    pandas_parse_dates = {}
    for dimension in dimensions:
        unmodified_dimension = find_field_in_modified_field(dimension)
        if unmodified_dimension.data_type == DataType.date:
            pandas_parse_dates[alias_selector(
                unmodified_dimension.alias)] = PANDAS_TO_DATETIME_FORMAT

    results = database.fetch_dataframes(*queries,
                                        parse_dates=pandas_parse_dates)
    max_rows_returned = 0
    for result_df in results:
        row_count = len(result_df)
        if row_count > max_rows_returned:
            max_rows_returned = row_count
        if row_count > database.max_result_set_size:
            logger.warning('row_count_over_max',
                           extra={
                               'row_count': len(result_df),
                               'database': str(database)
                           })
            # drop all result rows above database.max_result_set_size in place
            result_df.drop(result_df.index[database.max_result_set_size:],
                           inplace=True)

    logger.info('max_rows_returned',
                extra={
                    'row_count': max_rows_returned,
                    'database': str(database)
                })
    return max_rows_returned, reduce_result_set(results, reference_groups,
                                                dimensions, share_dimensions)
Пример #11
0
    def test_to_char(self):
        db = Database()

        to_char = db.to_char(Field('field'))
        self.assertEqual(str(to_char), 'CAST("field" AS VARCHAR)')
Пример #12
0
    def test_no_custom_middlewares_specified_still_gives_connection_middleware(
            self):
        db = Database()

        self.assertEqual(1, len(db.middlewares))
        self.assertIs(db.middlewares[0], connection_middleware)