Пример #1
0
def fetch_data(
        database: Database,
        queries: List[Type[QueryBuilder]],
        dimensions: Iterable[Field],
        share_dimensions: Iterable[Field] = (),
        reference_groups=(),
) -> Tuple[int, pd.DataFrame]:
    queries = [str(query) for query in queries]

    # Indicate which dimensions need to be parsed as date types
    # For this we create a dictionary with the dimension alias as key and PANDAS_TO_DATETIME_FORMAT as value
    pandas_parse_dates = {}
    for dimension in dimensions:
        unmodified_dimension = find_field_in_modified_field(dimension)
        if unmodified_dimension.data_type == DataType.date:
            pandas_parse_dates[alias_selector(
                unmodified_dimension.alias)] = PANDAS_TO_DATETIME_FORMAT

    results = database.fetch_dataframes(*queries,
                                        parse_dates=pandas_parse_dates)
    max_rows_returned = max([len(x) for x in results], default=0)
    logger.info('max_rows_returned',
                extra={
                    'row_count': max_rows_returned,
                    'database': str(database)
                })

    return max_rows_returned, reduce_result_set(results, reference_groups,
                                                dimensions, share_dimensions)
Пример #2
0
def fetch_data(
        database: Database,
        queries: Union[Sized, Iterable],
        dimensions: Iterable[Field],
        share_dimensions: Iterable[Field] = (),
        reference_groups=(),
):
    queries = [
        str(
            query.limit(
                min(query._limit or float("inf"),
                    database.max_result_set_size))) for query in queries
    ]
    results = database.fetch_dataframes(*queries)
    return reduce_result_set(results, reference_groups, dimensions,
                             share_dimensions)
Пример #3
0
def fetch_data(
        database: Database,
        queries: List[Type[QueryBuilder]],
        dimensions: Iterable[Field],
        share_dimensions: Iterable[Field] = (),
        reference_groups=(),
) -> Tuple[int, pd.DataFrame]:
    queries = [str(query) for query in queries]

    # Indicate which dimensions need to be parsed as date types
    # For this we create a dictionary with the dimension alias as key and PANDAS_TO_DATETIME_FORMAT as value
    pandas_parse_dates = {}
    for dimension in dimensions:
        unmodified_dimension = find_field_in_modified_field(dimension)
        if unmodified_dimension.data_type == DataType.date:
            pandas_parse_dates[alias_selector(
                unmodified_dimension.alias)] = PANDAS_TO_DATETIME_FORMAT

    results = database.fetch_dataframes(*queries,
                                        parse_dates=pandas_parse_dates)
    max_rows_returned = 0
    for result_df in results:
        row_count = len(result_df)
        if row_count > max_rows_returned:
            max_rows_returned = row_count
        if row_count > database.max_result_set_size:
            logger.warning('row_count_over_max',
                           extra={
                               'row_count': len(result_df),
                               'database': str(database)
                           })
            # drop all result rows above database.max_result_set_size in place
            result_df.drop(result_df.index[database.max_result_set_size:],
                           inplace=True)

    logger.info('max_rows_returned',
                extra={
                    'row_count': max_rows_returned,
                    'database': str(database)
                })
    return max_rows_returned, reduce_result_set(results, reference_groups,
                                                dimensions, share_dimensions)