Пример #1
0
def get_types_from_rows(column_names, rows):
    """
    Return description by scraping the rows

    We only return the name and type (inferred from the data).
    """
    if not column_names:
        return []
    if not rows:
        raise exceptions.InternalError(
            f'Cannot infer the column types from empty rows')
    types = [None] * len(column_names)
    remaining = len(column_names)
    for row in rows:
        if remaining <= 0:
            break
        if len(row) != len(column_names):
            raise exceptions.DatabaseError(
                f'Column names {column_names} does not match row {row}')
        for column_index, value in enumerate(row):
            if value is not None:
                current_type = types[column_index]
                new_tc = get_type(value)
                if current_type is None:
                    types[column_index] = new_tc
                    remaining -= 1
                elif new_tc is not current_type.code:
                    raise exceptions.DatabaseError(
                        f'Differing column type found for column @{column_index} {column_names[column_index]}:'
                        f'{current_type} vs {new_tc}')
    if any([t is None for t in types]):
        raise exceptions.DatabaseError(
            f'Couldn\'t infer all the types {types}')
    return types
Пример #2
0
def get_group_by_column_names(aggregation_results):
    group_by_cols = []
    for metric in aggregation_results:
        metric_name = metric.get('function', 'noname')
        gby_cols_for_metric = metric.get('groupByColumns', [])
        if group_by_cols and group_by_cols != gby_cols_for_metric:
            raise exceptions.DatabaseError(
                f"Cols for metric {metric_name}: {gby_cols_for_metric} differ from other columns {group_by_cols}"
            )
        elif not group_by_cols:
            group_by_cols = gby_cols_for_metric[:]
    return group_by_cols
Пример #3
0
 def get_metadata_from_controller(self, path):
     url = parse.urljoin(self._server, path)
     r = requests.get(url, headers={'Accept': 'application/json'})
     try:
         result = r.json()
     except ValueError as e:
         raise exceptions.DatabaseError(
             f'Got invalid json response from {self._server}:{path}: {r.text}'
         ) from e
     if self._debug:
         logger.info(
             f"metadata get on {self._server}:{path} returned {result}")
     return result
Пример #4
0
 def check_sufficient_responded(self, query, queried, responded):
     fraction = self.acceptable_respond_fraction
     if fraction == 0:
         return
     if queried < 0 or responded < 0:
         responded = -1
         needed = -1
     elif fraction <= -1:
         needed = queried
     elif fraction > 0 and fraction < 1:
         needed = int(fraction * queried)
     else:
         needed = fraction
     if responded < 0 or responded < needed:
         raise exceptions.DatabaseError(
             f"Query\n\n{query} timed out: Out of {queried}, only"
             f" {responded} responded, while needed was {needed}")
Пример #5
0
    def execute(self, operation, parameters=None):
        query = apply_parameters(operation, parameters or {})

        headers = {'Content-Type': 'application/json'}
        headers.update(self._extra_request_headers)
        payload = {'pql': query}
        if self._debug:
            logger.info(
                f'Submitting the pinot query to {self.url}:\n{query}\n{pformat(payload)}, with {headers}'
            )
        r = requests.post(self.url, headers=headers, json=payload)
        if r.encoding is None:
            r.encoding = 'utf-8'

        try:
            payload = r.json()
        except Exception as e:
            raise exceptions.DatabaseError(
                f"Error when querying {query} from {self.url}, raw response is:\n{r.text}"
            ) from e

        if self._debug:
            logger.info(
                f'Got the payload of type {type(payload)} with the status code {0 if not r else r.status_code}:\n{payload}'
            )

        num_servers_responded = payload.get('numServersResponded', -1)
        num_servers_queried = payload.get('numServersQueried', -1)

        if num_servers_queried > num_servers_responded or num_servers_responded == -1 or num_servers_queried == -1:
            raise exceptions.DatabaseError(
                f"Query\n\n{query} timed out: Out of {num_servers_queried}, only"
                f" {num_servers_responded} responded")

        # raise any error messages
        if r.status_code != 200:
            msg = f"Query\n\n{query}\n\nreturned an error: {r.status_code}\nFull response is {pformat(payload)}"
            raise exceptions.ProgrammingError(msg)

        if payload.get('exceptions', []):
            msg = '\n'.join(
                pformat(exception) for exception in payload['exceptions'])
            raise exceptions.DatabaseError(msg)

        rows = [
        ]  # array of array, where inner array is array of column values
        column_names = [
        ]  # column names, such that len(column_names) == len(rows[0])

        if 'aggregationResults' in payload:
            aggregation_results = payload['aggregationResults']
            gby_cols = get_group_by_column_names(aggregation_results)
            metric_names = [
                agg_result['function'] for agg_result in aggregation_results
            ]
            gby_rows = OrderedDict(
            )  # Dict of group-by-vals to array of metrics
            total_group_vals_key = ()
            num_metrics = len(metric_names)
            for i, agg_result in enumerate(aggregation_results):
                if 'groupByResult' in agg_result:
                    if total_group_vals_key in gby_rows:
                        raise exceptions.DatabaseError(
                            f"Invalid response {pformat(aggregation_results)} since we have both total and group by results"
                        )
                    for gb_result in agg_result['groupByResult']:
                        group_values = gb_result['group']
                        if len(group_values) < len(gby_cols):
                            raise exceptions.DatabaseError(
                                f"Expected {pformat(agg_result)} to contain {len(gby_cols)}, but got {len(group_values)}"
                            )
                        elif len(group_values) > len(gby_cols):
                            # This can happen because of poor escaping in the results
                            extra = len(group_values) - len(gby_cols)
                            new_group_values = group_values[extra:]
                            new_group_values[0] = ''.join(
                                group_values[0:extra]) + new_group_values[0]
                            group_values = new_group_values

                        group_values_key = tuple(group_values)
                        if group_values_key not in gby_rows:
                            gby_rows[group_values_key] = [None] * num_metrics
                        gby_rows[group_values_key][i] = gb_result['value']
                else:  # Global aggregation result
                    if total_group_vals_key not in gby_rows:
                        gby_rows[total_group_vals_key] = [None] * num_metrics
                    if len(gby_rows) != 1:
                        raise exceptions.DatabaseError(
                            f"Invalid response {pformat(aggregation_results)} since we have both total and group by results"
                        )
                    if len(gby_cols) > 0:
                        raise exceptions.DatabaseError(
                            f"Invalid response since total aggregation results are present even when non zero gby_cols:{gby_cols}, {pformat(aggregation_results)}"
                        )
                    gby_rows[total_group_vals_key][i] = agg_result['value']

            rows = []
            column_names = gby_cols + metric_names
            for group_vals, metric_vals in gby_rows.items():
                if len(group_vals) != len(gby_cols):
                    raise exceptions.DatabaseError(
                        f"Expected {len(gby_cols)} but got {len(group_vals)} for a row"
                    )
                if len(metric_vals) != len(metric_names):
                    raise exceptions.DatabaseError(
                        f"Expected {len(metric_names)} but got {len(metric_vals)} for a row"
                    )
                rows.append(list(group_vals) + metric_vals)
        elif 'selectionResults' in payload:
            results = payload['selectionResults']
            column_names = results.get('columns')
            values = results.get('results')
            if column_names and values:
                rows = values
            else:
                raise exceptions.DatabaseError(
                    f'Expected columns and results in selectionResults, but got {pformat(results)} instead'
                )

        logger.debug(
            f'Got the rows as a type {type(rows)} of size {len(rows)}')
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(pformat(rows))
        self.description = None
        self._results = []
        if rows:
            types = get_types_from_rows(column_names, rows)
            if self._debug:
                logger.info(
                    f'There are {len(rows)} rows and types is {pformat(types)}, column_names are {pformat(column_names)}, first row is like {pformat(rows[0])}, and last row is like {pformat(rows[-1])}'
                )
            self._results = rows
            self.description = get_description_from_types(column_names, types)

        return self