def get_df_payload( # pylint: disable=too-many-locals,too-many-statements self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.cache_key(query_obj, **kwargs) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: cache_value = pkl.loads(cache_value) df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS is_loaded = True stats_logger.incr("loaded_from_cache") except Exception as ex: # pylint: disable=broad-except logger.exception(ex) logger.error("Error reading cache: %s", utils.error_msg_from_exception(ex)) logger.info("Serving from cache") if query_obj and not is_loaded: try: query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") if not self.force: stats_logger.incr("loaded_from_source_without_force") is_loaded = True except Exception as ex: # pylint: disable=broad-except logger.exception(ex) if not error_message: error_message = "{}".format(ex) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: try: cache_value = dict(dttm=cached_dttm, df=df, query=query) cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL) logger.info("Caching %d chars at key %s", len(cache_binary), cache_key) stats_logger.incr("set_cache_key") cache.set(cache_key, cache_binary, timeout=self.cache_timeout) except Exception as ex: # pylint: disable=broad-except # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logger.warning("Could not cache key %s", cache_key) logger.exception(ex) cache.delete(cache_key) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "error": error_message, "is_cached": cache_key is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index), }
def get_df_payload(self, query_obj, **kwargs): """Handles caching around the df paylod retrieval""" cache_key = query_obj.cache_key( datasource=self.datasource.uid, **kwargs) if query_obj else None logging.info('Cache key: {}'.format(cache_key)) is_loaded = False stacktrace = None df = None cached_dttm = datetime.utcnow().isoformat().split('.')[0] cache_value = None status = None query = '' error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr('loaded_from_cache') try: cache_value = pkl.loads(cache_value) df = cache_value['df'] query = cache_value['query'] status = utils.QueryStatus.SUCCESS is_loaded = True except Exception as e: logging.exception(e) logging.error('Error reading cache: ' + utils.error_msg_from_exception(e)) logging.info('Serving from cache') if query_obj and not is_loaded: try: query_result = self.get_query_result(query_obj) status = query_result['status'] query = query_result['query'] error_message = query_result['error_message'] df = query_result['df'] if status != utils.QueryStatus.FAILED: stats_logger.incr('loaded_from_source') is_loaded = True except Exception as e: logging.exception(e) if not error_message: error_message = '{}'.format(e) status = utils.QueryStatus.FAILED stacktrace = traceback.format_exc() if ( is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED): try: cache_value = dict( dttm=cached_dttm, df=df if df is not None else None, query=query, ) cache_value = pkl.dumps( cache_value, protocol=pkl.HIGHEST_PROTOCOL) logging.info('Caching {} chars at key {}'.format( len(cache_value), cache_key)) stats_logger.incr('set_cache_key') cache.set( cache_key, cache_value, timeout=self.cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning('Could not cache key {}'.format(cache_key)) logging.exception(e) cache.delete(cache_key) return { 'cache_key': cache_key, 'cached_dttm': cache_value['dttm'] if cache_value is not None else None, 'cache_timeout': self.cache_timeout, 'df': df, 'error': error_message, 'is_cached': cache_key is not None, 'query': query, 'status': status, 'stacktrace': stacktrace, 'rowcount': len(df.index) if df is not None else 0, }
def get_df_payload(self, query_obj: QueryObject, **kwargs): """Handles caching around the df paylod retrieval""" extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict()) cache_key = ( query_obj.cache_key( datasource=self.datasource.uid, extra_cache_keys=extra_cache_keys, **kwargs ) if query_obj else None ) logging.info("Cache key: {}".format(cache_key)) is_loaded = False stacktrace = None df = None cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr("loaded_from_cache") try: cache_value = pkl.loads(cache_value) df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS is_loaded = True except Exception as e: logging.exception(e) logging.error( "Error reading cache: " + utils.error_msg_from_exception(e) ) logging.info("Serving from cache") if query_obj and not is_loaded: try: query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") is_loaded = True except Exception as e: logging.exception(e) if not error_message: error_message = "{}".format(e) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: try: cache_value = dict( dttm=cached_dttm, df=df if df is not None else None, query=query ) cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL) logging.info( "Caching {} chars at key {}".format( len(cache_binary), cache_key ) ) stats_logger.incr("set_cache_key") cache.set(cache_key, cache_binary, timeout=self.cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning("Could not cache key {}".format(cache_key)) logging.exception(e) cache.delete(cache_key) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "error": error_message, "is_cached": cache_key is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index) if df is not None else 0, }
def get_payload(self, force=False): """Handles caching around the json payload retrieval""" cache_key = self.cache_key payload = None force = force if force else self.form_data.get('force') == 'true' if not force and cache: payload = cache.get(cache_key) if payload: is_cached = True try: cached_data = zlib.decompress(payload) if PY3: cached_data = cached_data.decode('utf-8') payload = json.loads(cached_data) except Exception as e: logging.error("Error reading cache: " + utils.error_msg_from_exception(e)) payload = None logging.info("Serving from cache") if not payload: data = None is_cached = False cache_timeout = self.cache_timeout stacktrace = None try: df = self.get_df() if not self.error_message: data = self.get_data(df) except Exception as e: logging.exception(e) if not self.error_message: self.error_message = str(e) self.status = utils.QueryStatus.FAILED data = None stacktrace = traceback.format_exc() payload = { 'cache_key': cache_key, 'cache_timeout': cache_timeout, 'data': data, 'error': self.error_message, 'form_data': self.form_data, 'query': self.query, 'status': self.status, 'stacktrace': stacktrace, } payload['cached_dttm'] = datetime.now().isoformat().split('.')[0] logging.info( "Caching for the next {} seconds".format(cache_timeout)) data = self.json_dumps(payload) if PY3: data = bytes(data, 'utf-8') if cache and self.status != utils.QueryStatus.FAILED: try: cache.set(cache_key, zlib.compress(data), timeout=cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning("Could not cache key {}".format(cache_key)) logging.exception(e) cache.delete(cache_key) payload['is_cached'] = is_cached return payload
def get_df_payload( # pylint: disable=too-many-locals,too-many-statements self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.cache_key(query_obj, **kwargs) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS is_loaded = True stats_logger.incr("loaded_from_cache") except Exception as ex: # pylint: disable=broad-except logger.exception(ex) logger.error("Error reading cache: %s", utils.error_msg_from_exception(ex)) logger.info("Serving from cache") if query_obj and not is_loaded: try: invalid_columns = [ col for col in query_obj.columns + query_obj.groupby + [flt["col"] for flt in query_obj.filter] + utils.get_column_names_from_metrics(query_obj.metrics) if col not in self.datasource.column_names ] if invalid_columns: raise QueryObjectValidationError( _( "Columns missing in datasource: %(invalid_columns)s", invalid_columns=invalid_columns, )) query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") if not self.force: stats_logger.incr("loaded_from_source_without_force") is_loaded = True except QueryObjectValidationError as ex: error_message = str(ex) status = utils.QueryStatus.FAILED except Exception as ex: # pylint: disable=broad-except logger.exception(ex) if not error_message: error_message = str(ex) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: try: cache_value = dict(dttm=cached_dttm, df=df, query=query) stats_logger.incr("set_cache_key") cache.set(cache_key, cache_value, timeout=self.cache_timeout) except Exception as ex: # pylint: disable=broad-except # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logger.warning("Could not cache key %s", cache_key) logger.exception(ex) cache.delete(cache_key) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "error": error_message, "is_cached": cache_key is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index), }
def get_payload(self, force=False): """Handles caching around the json payload retrieval""" cache_key = self.cache_key payload = None force = force if force else self.form_data.get('force') == 'true' if not force and cache: payload = cache.get(cache_key) if payload: is_cached = True try: cached_data = zlib.decompress(payload) if PY3: cached_data = cached_data.decode('utf-8') payload = json.loads(cached_data) except Exception as e: logging.error("Error reading cache: " + utils.error_msg_from_exception(e)) payload = None logging.info("Serving from cache") if not payload: data = None is_cached = False cache_timeout = self.cache_timeout stacktrace = None try: df = self.get_df() if not self.error_message: data = self.get_data(df) except Exception as e: logging.exception(e) if not self.error_message: self.error_message = str(e) self.status = utils.QueryStatus.FAILED data = None stacktrace = traceback.format_exc() payload = { 'cache_key': cache_key, 'cache_timeout': cache_timeout, 'data': data, 'error': self.error_message, 'form_data': self.form_data, 'query': self.query, 'status': self.status, 'stacktrace': stacktrace, } payload['cached_dttm'] = datetime.now().isoformat().split('.')[0] logging.info("Caching for the next {} seconds".format( cache_timeout)) data = self.json_dumps(payload) if PY3: data = bytes(data, 'utf-8') if cache and self.status != utils.QueryStatus.FAILED: try: cache.set( cache_key, zlib.compress(data), timeout=cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning("Could not cache key {}".format(cache_key)) logging.exception(e) cache.delete(cache_key) payload['is_cached'] = is_cached return payload