def update_query_to_limit_nb_points_for_query(backend_host, backend_port, user, password, from_parts, query, parsed_query, aggregation_properties, max_nb_points_per_query): schema = from_parts['schema'] # counter_aggregation_mode = get_counter_aggregation_mode(from_parts, aggregation_properties) # if counter_aggregation_mode is None: # counter_aggregation_mode = 'mean' expected_nb_points_per_query = get_expected_nb_points_for_query( backend_host, backend_port, user, password, schema, query, parsed_query) if expected_nb_points_per_query is None: return None if expected_nb_points_per_query['nb_points'] > max_nb_points_per_query: logging.info('Expected nb of points per query ' + str(expected_nb_points_per_query['nb_points']) + ' is bigger than max allowed one (' + str(max_nb_points_per_query) + ')') my_factor = expected_nb_points_per_query[ 'nb_points'] / max_nb_points_per_query split_group_by_time_interval = influx_date_manipulation.split_influx_time( expected_nb_points_per_query['group_by_time_interval']) adjusted_group_by_time_value = int( math.ceil(my_factor * split_group_by_time_interval['number'])) new_group_by_time_interval = str( adjusted_group_by_time_value ) + split_group_by_time_interval['unit'] parsed_query = influx_query_modification.change_group_by_time_interval( parsed_query, new_group_by_time_interval) # if counter_aggregation_mode == 'sum' and influx_query_parsing.is_sum_group_by_time(parsed_query): if influx_query_parsing.is_sum_group_by_time(parsed_query): parsed_query = influx_query_modification.change_sum_group_by_time_factor( parsed_query, '1/' + str(my_factor)) query = influx_query_parsing.stringify_sqlparsed(parsed_query) logging.info('Reworked query (limit nb points per query): ' + query) return query return None
def get_right_rp_for_query(schema, query, parsed_query, known_retention_policies, override_explicit_rp=False): output = {} chosen_rp = None chosen_group_by_time_interval = None is_query_sum_group_by_time = False from_parts = influx_query_parsing.extract_measurement_from_query( schema, parsed_query) if from_parts is None: logging.error('Could not extract measurement from query') return None if schema is None and from_parts['schema'] is None: # pass-through towards InfluxDB logging.warning('Schema not specified in query nor as a URL param') return None if schema != from_parts['schema']: # schema in query overrides URL param schema = from_parts['schema'] if from_parts['rp'] is not None and override_explicit_rp is False: logging.info('RP ' + from_parts['rp'] + ' set in query, skipping') return None if schema not in known_retention_policies: logging.info('no known RP for schema ' + schema) return None time_bounds = influx_query_parsing.extract_time_window_bounds(query) group_by_time_interval = influx_query_parsing.extract_time_interval_group_by( parsed_query) if group_by_time_interval is not None: is_query_sum_group_by_time = influx_query_parsing.is_sum_group_by_time( parsed_query) if time_bounds['from'] is None: logging.info( 'no lower time boundary in query, cannot select automatically RP') return None logging.debug('lower time bound is ' + time_bounds['from'].strftime("%Y-%m-%d %H:%M:%S")) rp = None if from_parts['rp'] is not None: logging.debug('RP defined explicitly in query: ' + from_parts['rp']) rp = next((rp for rp in known_retention_policies[schema] if rp['name'] == from_parts['rp']), None) if rp is None: logging.warning('explicit RP ' + from_parts['rp'] + ' is unknown') return None else: logging.debug('no explicit RP in query') rp = influx_rp_conf_access.get_default_rp_for_schema_from_conf( schema, known_retention_policies) if rp is None: logging.error('missing default RP in known RP list') return None max_datetime = influx_date_manipulation.datetime_max_for_influx_rp( rp['duration']) logging.debug('Selected RP (' + rp['name'] + ') max datetime is ' + max_datetime.strftime("%Y-%m-%d %H:%M:%S")) if is_rp_good_for_our_interval(max_datetime, time_bounds['from']): logging.info('Selected RP (' + rp['name'] + ') is already pretty good') else: for rp in known_retention_policies[schema]: max_datetime = influx_date_manipulation.datetime_max_for_influx_rp( rp['duration']) logging.debug('RP ' + rp['name'] + ' max datetime is ' + max_datetime.strftime("%Y-%m-%d %H:%M:%S")) if is_rp_good_for_our_interval(max_datetime, time_bounds['from']): logging.info('RP ' + rp['name'] + ' is selected') chosen_rp = rp['name'] if group_by_time_interval is not None and 'interval' in rp: chosen_group_by_time_interval = get_new_group_by_time_interval_according_to_rp( group_by_time_interval, from_parts['measurement'], rp) break if chosen_rp is not None: output['rp'] = chosen_rp if chosen_group_by_time_interval is not None: output['group_by_time_interval'] = chosen_group_by_time_interval if is_query_sum_group_by_time: output[ 'sum_group_by_time_interval_factor'] = get_sum_group_by_time_interval_factor( group_by_time_interval, chosen_group_by_time_interval) if not output: return None return output
def check(self, query, parsed_query): is_sum_group_by_time = influx_query_parsing.is_sum_group_by_time( parsed_query) is_lower_time_bound_parsable = influx_query_parsing.is_lower_time_bound_parsable( query) return is_sum_group_by_time and is_lower_time_bound_parsable