def __init__(self, redis_connection, query, priority, job_id=None, wait_time=None, query_time=None, updated_at=None, status=None, error=None, query_result_id=None, process_id=0): self.redis_connection = redis_connection self.query = query self.priority = priority self.query_hash = gen_query_hash(self.query) self.query_result_id = query_result_id if process_id == 'None': self.process_id = None else: self.process_id = int(process_id) if job_id is None: self.id = str(uuid.uuid1()) self.new_job = True self.wait_time = 0 self.query_time = 0 self.error = None self.updated_at = time.time() # job_dict.get('updated_at', time.time()) self.status = self.WAITING # int(job_dict.get('status', self.WAITING)) else: self.id = job_id self.new_job = False self.error = error self.wait_time = wait_time self.query_time = query_time self.updated_at = updated_at self.status = status
def pre_save(self, created): super(Query, self).pre_save(created) self.query_hash = utils.gen_query_hash(self.query) self._set_api_key() if self.last_modified_by is None: self.last_modified_by = self.user
def add_job(self, query, priority, data_source): query_hash = gen_query_hash(query) logging.info("[Manager][%s] Inserting job with priority=%s", query_hash, priority) try_count = 0 job = None while try_count < self.max_retries: try_count += 1 pipe = self.redis_connection.pipeline() try: pipe.watch('query_hash_job:%s' % query_hash) job_id = pipe.get('query_hash_job:%s' % query_hash) if job_id: logging.info("[Manager][%s] Found existing job: %s", query_hash, job_id) job = worker.Job.load(self.redis_connection, job_id) else: job = worker.Job(self.redis_connection, query=query, priority=priority, data_source_id=data_source.id, data_source_name=data_source.name, data_source_type=data_source.type, data_source_options=data_source.options) pipe.multi() job.save(pipe) logging.info("[Manager][%s] Created new job: %s", query_hash, job.id) self.queue.push(job.id, job.priority) break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def __init__(self, task, query, data_source_id, user_id, metadata, scheduled_query): self.task = task self.query = query self.data_source_id = data_source_id self.metadata = metadata self.data_source = self._load_data_source() if user_id is not None: self.user = models.User.query.get(user_id) else: self.user = None # Close DB connection to prevent holding a connection for a long time while the query is executing. models.db.session.close() self.query_hash = gen_query_hash(self.query) self.scheduled_query = scheduled_query # Load existing tracker or create a new one if the job was created before code update: self.tracker = ( QueryTaskTracker.get_by_task_id(task.request.id) or QueryTaskTracker.create( task.request.id, 'created', self.query_hash, self.data_source_id, False, metadata ) ) if self.tracker.scheduled: models.scheduled_queries_executions.update(self.tracker.query_id)
def setUp(self): super(TestQueryResultStoreResult, self).setUp() self.data_source = data_source_factory.create() self.query = "SELECT 1" self.query_hash = gen_query_hash(self.query) self.runtime = 123 self.utcnow = datetime.datetime.utcnow() self.data = "data"
def enqueue_query(query, data_source, user_id, scheduled_query=None, metadata={}): query_hash = gen_query_hash(query) logging.info("Inserting job for %s with metadata=%s", query_hash, metadata) try_count = 0 job = None while try_count < 5: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(_job_lock_id(query_hash, data_source.id)) job_id = pipe.get(_job_lock_id(query_hash, data_source.id)) if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) job = QueryTask(job_id=job_id) if job.ready(): logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status) redis_connection.delete(_job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() time_limit = None if scheduled_query: queue_name = data_source.scheduled_queue_name scheduled_query_id = scheduled_query.id else: queue_name = data_source.queue_name scheduled_query_id = None time_limit = settings.ADHOC_QUERY_TIME_LIMIT result = execute_query.apply_async(args=(query, data_source.id, metadata, user_id, scheduled_query_id), queue=queue_name, time_limit=time_limit) job = QueryTask(async_result=result) tracker = QueryTaskTracker.create( result.id, 'created', query_hash, data_source.id, scheduled_query is not None, metadata) tracker.save(connection=pipe) logging.info("[%s] Created new job: %s", query_hash, job.id) pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME) pipe.execute() break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def enqueue_query(query, data_source, scheduled=False, metadata={}): query_hash = gen_query_hash(query) logging.info("Inserting job for %s with metadata=%s", query_hash, metadata) try_count = 0 job = None while try_count < 5: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(_job_lock_id(query_hash, data_source.id)) job_id = pipe.get(_job_lock_id(query_hash, data_source.id)) if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) job = QueryTask(job_id=job_id) tracker = QueryTaskTracker.get_by_task_id(job_id, connection=pipe) # tracker might not exist, if it's an old job if scheduled and tracker: tracker.update(retries=tracker.retries+1) elif tracker: tracker.update(scheduled_retries=tracker.scheduled_retries+1) if job.ready(): logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status) redis_connection.delete(_job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() if scheduled: queue_name = data_source.scheduled_queue_name else: queue_name = data_source.queue_name result = execute_query.apply_async(args=(query, data_source.id, metadata), queue=queue_name) job = QueryTask(async_result=result) tracker = QueryTaskTracker.create(result.id, 'created', query_hash, data_source.id, scheduled, metadata) tracker.save(connection=pipe) logging.info("[%s] Created new job: %s", query_hash, job.id) pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME) pipe.execute() break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def get_latest(cls, data_source, query, max_age=0): query_hash = utils.gen_query_hash(query) if max_age == -1: query = cls.select().where(cls.query_hash == query_hash, cls.data_source == data_source).order_by(cls.retrieved_at.desc()) else: query = cls.select().where(cls.query_hash == query_hash, cls.data_source == data_source, peewee.SQL("retrieved_at + interval '%s second' >= now() at time zone 'utc'", max_age)).order_by(cls.retrieved_at.desc()) return query.first()
def __init__(self, task, query, data_source_id, metadata): self.task = task self.query = query self.data_source_id = data_source_id self.metadata = metadata self.data_source = self._load_data_source() self.query_hash = gen_query_hash(self.query) # Load existing tracker or create a new one if the job was created before code update: self.tracker = QueryTaskTracker.get_by_task_id(task.request.id) or QueryTaskTracker.create(task.request.id, 'created', self.query_hash, self.data_source_id, False, metadata)
def execute_query(self, query, data_source_id, metadata): signal.signal(signal.SIGINT, signal_handler) start_time = time.time() logger.info("Loading data source (%d)...", data_source_id) # TODO: we should probably cache data sources in Redis data_source = models.DataSource.get_by_id(data_source_id) self.update_state(state="STARTED", meta={"start_time": start_time, "custom_message": ""}) logger.info("Executing query:\n%s", query) query_hash = gen_query_hash(query) query_runner = get_query_runner(data_source.type, data_source.options) if query_runner.annotate_query(): metadata["Task ID"] = self.request.id metadata["Query Hash"] = query_hash metadata["Queue"] = self.request.delivery_info["routing_key"] annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()]) logging.debug(u"Annotation: %s", annotation) annotated_query = u"/* {} */ {}".format(annotation, query) else: annotated_query = query with statsd_client.timer("query_runner.{}.{}.run_time".format(data_source.type, data_source.name)): data, error = query_runner.run_query(annotated_query) run_time = time.time() - start_time logger.info("Query finished... data length=%s, error=%s", data and len(data), error) self.update_state(state="STARTED", meta={"start_time": start_time, "error": error, "custom_message": ""}) # Delete query_hash redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id)) if not error: query_result, updated_query_ids = models.QueryResult.store_result( data_source.id, query_hash, query, data, run_time, utils.utcnow() ) for query_id in updated_query_ids: check_alerts_for_query.delay(query_id) else: raise Exception(error) return query_result.id
def __init__(self, task, query, data_source_id, user_id, is_api_key, metadata, scheduled_query): self.task = task self.query = query self.data_source_id = data_source_id self.metadata = metadata self.data_source = self._load_data_source() self.user = _resolve_user(user_id, is_api_key) # Close DB connection to prevent holding a connection for a long time while the query is executing. models.db.session.close() self.query_hash = gen_query_hash(self.query) self.scheduled_query = scheduled_query # Load existing tracker or create a new one if the job was created before code update: if scheduled_query: models.scheduled_queries_executions.update(scheduled_query.id)
def add_task(cls, query, data_source, scheduled=False, metadata={}): query_hash = gen_query_hash(query) logging.info("[Manager][%s] Inserting job", query_hash) logging.info("[Manager] Metadata: [%s]", metadata) try_count = 0 job = None while try_count < cls.MAX_RETRIES: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(cls._job_lock_id(query_hash, data_source.id)) job_id = pipe.get(cls._job_lock_id(query_hash, data_source.id)) if job_id: logging.info("[Manager][%s] Found existing job: %s", query_hash, job_id) job = cls(job_id=job_id) if job.ready(): logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status) redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() if scheduled: queue_name = data_source.scheduled_queue_name else: queue_name = data_source.queue_name result = execute_query.apply_async(args=(query, data_source.id, metadata), queue=queue_name) job = cls(async_result=result) logging.info("[Manager][%s] Created new job: %s", query_hash, job.id) pipe.set(cls._job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME) pipe.execute() break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def get_query_result(self, query, ttl=0): query_hash = gen_query_hash(query) with self.db_transaction() as cursor: sql = ( "SELECT id, query, data, runtime, retrieved_at, query_hash FROM query_results " "WHERE query_hash=%s " "AND retrieved_at < now() at time zone 'utc' - interval '%s second'" "ORDER BY retrieved_at DESC LIMIT 1" ) cursor.execute(sql, (query_hash, psycopg2.extensions.AsIs(ttl))) query_result = cursor.fetchone() if query_result: query_result = QueryResult(*query_result) return query_result
def execute_query(self, query, data_source_id, metadata): start_time = time.time() logger.info("Loading data source (%d)...", data_source_id) # TODO: we should probably cache data sources in Redis data_source = models.DataSource.get_by_id(data_source_id) self.update_state(state='STARTED', meta={'start_time': start_time, 'custom_message': ''}) logger.info("Executing query:\n%s", query) query_hash = gen_query_hash(query) query_runner = get_query_runner(data_source.type, data_source.options) if query_runner.annotate_query(): metadata['Task ID'] = self.request.id metadata['Query Hash'] = query_hash metadata['Queue'] = self.request.delivery_info['routing_key'] annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()]) logging.debug(u"Annotation: %s", annotation) annotated_query = u"/* {} */ {}".format(annotation, query) else: annotated_query = query with statsd_client.timer('query_runner.{}.{}.run_time'.format(data_source.type, data_source.name)): data, error = query_runner.run_query(annotated_query) run_time = time.time() - start_time logger.info("Query finished... data length=%s, error=%s", data and len(data), error) self.update_state(state='STARTED', meta={'start_time': start_time, 'error': error, 'custom_message': ''}) # Delete query_hash redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id)) if not error: query_result = models.QueryResult.store_result(data_source.id, query_hash, query, data, run_time, utils.utcnow()) else: raise Exception(error) return query_result.id
def __init__(self, task, query, data_source_id, user_id, metadata, scheduled_query): self.task = task self.query = query self.data_source_id = data_source_id self.metadata = metadata self.data_source = self._load_data_source() if user_id is not None: self.user = models.User.query.get(user_id) else: self.user = None self.query_hash = gen_query_hash(self.query) self.scheduled_query = scheduled_query # Load existing tracker or create a new one if the job was created before code update: self.tracker = QueryTaskTracker.get_by_task_id(task.request.id) or QueryTaskTracker.create(task.request.id, 'created', self.query_hash, self.data_source_id, False, metadata)
def execute_query(self, query, data_source_id): # TODO: maybe this should be a class? start_time = time.time() logger.info("Loading data source (%d)...", data_source_id) # TODO: we should probably cache data sources in Redis data_source = models.DataSource.get_by_id(data_source_id) self.update_state(state='STARTED', meta={'start_time': start_time, 'custom_message': ''}) logger.info("Executing query:\n%s", query) query_hash = gen_query_hash(query) query_runner = get_query_runner(data_source.type, data_source.options) if getattr(query_runner, 'annotate_query', True): # TODO: anotate with queu ename annotated_query = "/* Task Id: %s, Query hash: %s */ %s" % \ (self.request.id, query_hash, query) else: annotated_query = query with statsd_client.timer('query_runner.{}.{}.run_time'.format(data_source.type, data_source.name)): data, error = query_runner(annotated_query) run_time = time.time() - start_time logger.info("Query finished... data length=%s, error=%s", data and len(data), error) self.update_state(state='STARTED', meta={'start_time': start_time, 'error': error, 'custom_message': ''}) # Delete query_hash redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id)) # TODO: it is possible that storing the data will fail, and we will need to retry # while we already marked the job as done if not error: query_result = models.QueryResult.store_result(data_source.id, query_hash, query, data, run_time, datetime.datetime.utcnow()) else: raise Exception(error) return query_result.id
def run_query_sync(data_source, parameter_values, query_text, max_age=0): query_parameters = set(collect_query_parameters(query_text)) missing_params = set(query_parameters) - set(parameter_values.keys()) if missing_params: raise Exception('Missing parameter value for: {}'.format(", ".join(missing_params))) if query_parameters: query_text = mustache_render(query_text, parameter_values) if max_age <= 0: query_result = None else: query_result = models.QueryResult.get_latest(data_source, query_text, max_age) query_hash = gen_query_hash(query_text) if query_result: logging.info("Returning cached result for query %s" % query_hash) return query_result try: started_at = time.time() data, error = data_source.query_runner.run_query(query_text, current_user) if error: logging.info('got bak error') logging.info(error) return None run_time = time.time() - started_at query_result, updated_query_ids = models.QueryResult.store_result(data_source.org_id, data_source, query_hash, query_text, data, run_time, utcnow()) models.db.session.commit() return query_result except Exception as e: if max_age > 0: abort(404, message="Unable to get result from the database, and no cached query result found.") else: abort(503, message="Unable to get result from the database.") return None
def store_query_result(self, query, data, run_time, retrieved_at): query_result_id = None query_hash = gen_query_hash(query) sql = ( "INSERT INTO query_results (query_hash, query, data, runtime, retrieved_at) " "VALUES (%s, %s, %s, %s, %s) RETURNING id" ) with self.db_transaction() as cursor: cursor.execute(sql, (query_hash, query, data, run_time, retrieved_at)) if cursor.rowcount == 1: query_result_id = cursor.fetchone()[0] logging.info("[Manager][%s] Inserted query data; id=%s", query_hash, query_result_id) sql = "UPDATE queries SET latest_query_data_id=%s WHERE query_hash=%s" cursor.execute(sql, (query_result_id, query_hash)) logging.info("[Manager][%s] Updated %s queries.", query_hash, cursor.rowcount) else: logging.error("[Manager][%s] Failed inserting query data.", query_hash) return query_result_id
def get_latest(cls, data_source, query, max_age=0): query_hash = utils.gen_query_hash(query) if max_age == -1: query = cls.query.filter( cls.query_hash == query_hash, cls.data_source == data_source ) else: query = cls.query.filter( cls.query_hash == query_hash, cls.data_source == data_source, ( db.func.timezone('utc', cls.retrieved_at) + datetime.timedelta(seconds=max_age) >= db.func.timezone('utc', db.func.now()) ) ) return query.order_by(cls.retrieved_at.desc()).first()
def store_query_result(self, data_source_id, query, data, run_time, retrieved_at): query_hash = gen_query_hash(query) query_result = models.QueryResult.create(query_hash=query_hash, query=query, runtime=run_time, data_source=data_source_id, retrieved_at=retrieved_at, data=data) logging.info("[Manager][%s] Inserted query data; id=%s", query_hash, query_result.id) # TODO: move this logic to the model? updated_count = models.Query.update(latest_query_data=query_result).\ where(models.Query.query_hash==query_hash, models.Query.data_source==data_source_id).\ execute() logging.info("[Manager][%s] Updated %s queries.", query_hash, updated_count) return query_result.id
def apply_parameters(template, parameters, data_source): query = SQLQuery(template).apply(parameters) # for now we only log `SQLInjectionError` to detect false positives try: text = query.text except SQLInjectionError: record_event({ 'action': 'sql_injection', 'object_type': 'query', 'query': template, 'parameters': parameters, 'timestamp': time.time(), 'org_id': data_source.org_id }) except Exception as e: logging.info(u"Failed applying parameters for query %s: %s", gen_query_hash(query.query), e.message) finally: text = query.query return text
def add_task(cls, query, data_source, scheduled=False): query_hash = gen_query_hash(query) logging.info("[Manager][%s] Inserting job", query_hash) try_count = 0 job = None while try_count < cls.MAX_RETRIES: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(cls._job_lock_id(query_hash, data_source.id)) job_id = pipe.get(cls._job_lock_id(query_hash, data_source.id)) if job_id: logging.info("[Manager][%s] Found existing job: %s", query_hash, job_id) job = cls(job_id=job_id) else: pipe.multi() if scheduled: queue_name = data_source.scheduled_queue_name else: queue_name = data_source.queue_name result = execute_query.apply_async(args=(query, data_source.id), queue=queue_name) job = cls(async_result=result) logging.info("[Manager][%s] Created new job: %s", query_hash, job.id) pipe.set(cls._job_lock_id(query_hash, data_source.id), job.id) pipe.execute() break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def __init__(self, task, query, data_source_id, user_id, metadata, scheduled_query): self.task = task self.query = query self.data_source_id = data_source_id self.metadata = metadata self.data_source = self._load_data_source() if user_id is not None: self.user = models.User.query.get(user_id) else: self.user = None # Close DB connection to prevent holding a connection for a long time while the query is executing. models.db.session.close() self.query_hash = gen_query_hash(self.query) self.scheduled_query = scheduled_query # Load existing tracker or create a new one if the job was created before code update: self.tracker = QueryTaskTracker.get_by_task_id( task.request.id) or QueryTaskTracker.create( task.request.id, 'created', self.query_hash, self.data_source_id, False, metadata) if self.tracker.scheduled: models.scheduled_queries_executions.update(self.tracker.query_id)
def __init__(self, redis_connection, query, priority, job_id=None, wait_time=None, query_time=None, updated_at=None, status=None, error=None, query_result_id=None, process_id=0): self.redis_connection = redis_connection self.query = query self.priority = priority self.query_hash = gen_query_hash(self.query) self.query_result_id = query_result_id if process_id == 'None': self.process_id = None else: self.process_id = int(process_id) if job_id is None: self.id = str(uuid.uuid1()) self.new_job = True self.wait_time = 0 self.query_time = 0 self.error = None self.updated_at = time.time( ) # job_dict.get('updated_at', time.time()) self.status = self.WAITING # int(job_dict.get('status', self.WAITING)) else: self.id = job_id self.new_job = False self.error = error self.wait_time = wait_time self.query_time = query_time self.updated_at = updated_at self.status = status
def __init__( self, query, data_source_id, user_id, is_api_key, metadata, is_scheduled_query ): self.job = get_current_job() self.query = query self.data_source_id = data_source_id self.metadata = metadata self.data_source = self._load_data_source() self.query_id = metadata.get("query_id") self.user = _resolve_user(user_id, is_api_key, metadata.get("Query ID")) self.query_model = ( models.Query.query.get(self.query_id) if self.query_id and self.query_id != "adhoc" else None ) # Close DB connection to prevent holding a connection for a long time while the query is executing. models.db.session.close() self.query_hash = gen_query_hash(self.query) self.is_scheduled_query = is_scheduled_query if self.is_scheduled_query: # Load existing tracker or create a new one if the job was created before code update: models.scheduled_queries_executions.update(self.query_model.id)
def gen_query_hash(target, val, oldval, initiator): target.query_hash = utils.gen_query_hash(val) target.schedule_failures = 0
schedule=None, data_source=data_source_factory.create, org=1) alert_factory = ModelFactory(redash.models.Alert, name=Sequence('Alert {}'), query=query_factory.create, user=user_factory.create, options={}) query_result_factory = ModelFactory(redash.models.QueryResult, data='{"columns":{}, "rows":[]}', runtime=1, retrieved_at=utcnow, query="SELECT 1", query_hash=gen_query_hash('SELECT 1'), data_source=data_source_factory.create, org=1) visualization_factory = ModelFactory(redash.models.Visualization, type='CHART', query=query_factory.create, name='Chart', description='', options='{}') widget_factory = ModelFactory(redash.models.Widget, type='chart', width=1, options='{}', dashboard=dashboard_factory.create,
def enqueue_query(query, data_source, user_id, scheduled_query=None, metadata={}): query_hash = gen_query_hash(query) logging.info("Inserting job for %s with metadata=%s", query_hash, metadata) try_count = 0 job = None while try_count < 5: try_count += 1 pipe = redis_connection.pipeline() # 连接redis try: pipe.watch(_job_lock_id(query_hash, data_source.id)) job_id = pipe.get(_job_lock_id(query_hash, data_source.id)) if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) job = QueryTask(job_id=job_id) if job.ready(): # 判断job在celery中的是否结束 logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status) redis_connection.delete( _job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() time_limit = None if scheduled_query: queue_name = data_source.scheduled_queue_name scheduled_query_id = scheduled_query.id else: queue_name = data_source.queue_name scheduled_query_id = None time_limit = settings.ADHOC_QUERY_TIME_LIMIT result = execute_query.apply_async(args=(query, data_source.id, metadata, user_id, scheduled_query_id), queue=queue_name, time_limit=time_limit) job = QueryTask(async_result=result) tracker = QueryTaskTracker.create(result.id, 'created', query_hash, data_source.id, scheduled_query is not None, metadata) tracker.save(connection=pipe) logging.info("[%s] Created new job: %s", query_hash, job.id) pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME) pipe.execute() break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def execute_query(self, query, data_source_id, metadata): signal.signal(signal.SIGINT, signal_handler) start_time = time.time() logger.info("task=execute_query state=load_ds ds_id=%d", data_source_id) data_source = models.DataSource.get_by_id(data_source_id) self.update_state(state='STARTED', meta={ 'start_time': start_time, 'custom_message': '' }) logger.debug("Executing query:\n%s", query) query_hash = gen_query_hash(query) query_runner = data_source.query_runner logger.info( "task=execute_query state=before query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s", query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'], metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown')) if query_runner.annotate_query(): metadata['Task ID'] = self.request.id metadata['Query Hash'] = query_hash metadata['Queue'] = self.request.delivery_info['routing_key'] annotation = u", ".join( [u"{}: {}".format(k, v) for k, v in metadata.iteritems()]) logging.debug(u"Annotation: %s", annotation) annotated_query = u"/* {} */ {}".format(annotation, query) else: annotated_query = query with statsd_client.timer('query_runner.{}.{}.run_time'.format( data_source.type, data_source.name)): data, error = query_runner.run_query(annotated_query) logger.info( "task=execute_query state=after query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s", query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'], metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown')) run_time = time.time() - start_time logger.info("Query finished... data length=%s, error=%s", data and len(data), error) self.update_state(state='STARTED', meta={ 'start_time': start_time, 'error': error, 'custom_message': '' }) # Delete query_hash redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id)) if not error: query_result, updated_query_ids = models.QueryResult.store_result( data_source.org_id, data_source.id, query_hash, query, data, run_time, utils.utcnow()) logger.info( "task=execute_query state=after_store query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s", query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'], metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown')) for query_id in updated_query_ids: check_alerts_for_query.delay(query_id) logger.info( "task=execute_query state=after_alerts query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s", query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'], metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown')) else: raise QueryExecutionError(error) return query_result.id
def save(self, *args, **kwargs): self.query_hash = utils.gen_query_hash(self.query) self._set_api_key() super(Query, self).save(*args, **kwargs)
alert_factory = ModelFactory( redash.models.Alert, name=Sequence("Alert {}"), query_rel=query_factory.create, user=user_factory.create, options={}, ) query_result_factory = ModelFactory( redash.models.QueryResult, data='{"columns":{}, "rows":[]}', runtime=1, retrieved_at=utcnow, query_text="SELECT 1", query_hash=gen_query_hash("SELECT 1"), data_source=data_source_factory.create, org_id=1, ) visualization_factory = ModelFactory( redash.models.Visualization, type="CHART", query_rel=query_factory.create, name="Chart", description="", options="{}", ) widget_factory = ModelFactory( redash.models.Widget,
def gen_query_hash(self, query_text, set_auto_limit=False): query_text = self.apply_auto_limit(query_text, set_auto_limit) return utils.gen_query_hash(query_text)
def enqueue_query(query, data_source, scheduled=False, metadata={}): query_hash = gen_query_hash(query) logging.info("Inserting job for %s with metadata=%s", query_hash, metadata) try_count = 0 job = None while try_count < 5: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(_job_lock_id(query_hash, data_source.id)) job_id = pipe.get(_job_lock_id(query_hash, data_source.id)) if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) job = QueryTask(job_id=job_id) tracker = QueryTaskTracker.get_by_task_id(job_id, connection=pipe) # tracker might not exist, if it's an old job if scheduled and tracker: tracker.update(retries=tracker.retries + 1) elif tracker: tracker.update( scheduled_retries=tracker.scheduled_retries + 1) if job.ready(): logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status) redis_connection.delete( _job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() if scheduled: queue_name = data_source.scheduled_queue_name else: queue_name = data_source.queue_name result = execute_query.apply_async(args=(query, data_source.id, metadata), queue=queue_name) job = QueryTask(async_result=result) tracker = QueryTaskTracker.create(result.id, 'created', query_hash, data_source.id, scheduled, metadata) tracker.save(connection=pipe) logging.info("[%s] Created new job: %s", query_hash, job.id) pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME) pipe.execute() break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def execute_query(self, query, data_source_id, metadata): signal.signal(signal.SIGINT, signal_handler) start_time = time.time() logger.info("task=execute_query state=load_ds ds_id=%d", data_source_id) data_source = models.DataSource.get_by_id(data_source_id) self.update_state(state='STARTED', meta={'start_time': start_time, 'custom_message': ''}) logger.debug("Executing query:\n%s", query) query_hash = gen_query_hash(query) query_runner = data_source.query_runner logger.info("task=execute_query state=before query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s", query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'], metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown')) if query_runner.annotate_query(): metadata['Task ID'] = self.request.id metadata['Query Hash'] = query_hash metadata['Queue'] = self.request.delivery_info['routing_key'] annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()]) logging.debug(u"Annotation: %s", annotation) annotated_query = u"/* {} */ {}".format(annotation, query) else: annotated_query = query with statsd_client.timer('query_runner.{}.{}.run_time'.format(data_source.type, data_source.name)): data, error = query_runner.run_query(annotated_query) logger.info("task=execute_query state=after query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s", query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'], metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown')) run_time = time.time() - start_time logger.info("Query finished... data length=%s, error=%s", data and len(data), error) self.update_state(state='STARTED', meta={'start_time': start_time, 'error': error, 'custom_message': ''}) # Delete query_hash redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id)) if not error: query_result, updated_query_ids = models.QueryResult.store_result(data_source.org_id, data_source.id, query_hash, query, data, run_time, utils.utcnow()) logger.info("task=execute_query state=after_store query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s", query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'], metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown')) for query_id in updated_query_ids: check_alerts_for_query.delay(query_id) logger.info("task=execute_query state=after_alerts query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s", query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'], metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown')) else: raise QueryExecutionError(error) return query_result.id
def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}): query_hash = gen_query_hash(query) logging.info("Inserting job for %s with metadata=%s", query_hash, metadata) try_count = 0 job = None while try_count < 5: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(_job_lock_id(query_hash, data_source.id)) job_id = pipe.get(_job_lock_id(query_hash, data_source.id)) if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) job = QueryTask(job_id=job_id) if job.ready(): logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status) redis_connection.delete(_job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() time_limit = None if scheduled_query: queue_name = data_source.scheduled_queue_name scheduled_query_id = scheduled_query.id else: queue_name = data_source.queue_name scheduled_query_id = None time_limit = settings.ADHOC_QUERY_TIME_LIMIT args = (query, data_source.id, metadata, user_id, scheduled_query_id, is_api_key) argsrepr = json_dumps({ 'org_id': data_source.org_id, 'data_source_id': data_source.id, 'enqueue_time': time.time(), 'scheduled': scheduled_query_id is not None, 'query_id': metadata.get('Query ID'), 'user_id': user_id }) result = execute_query.apply_async(args=args, argsrepr=argsrepr, queue=queue_name, time_limit=time_limit) job = QueryTask(async_result=result) logging.info("[%s] Created new job: %s", query_hash, job.id) pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME) pipe.execute() break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def execute_query(self, query, data_source_id, metadata): signal.signal(signal.SIGINT, signal_handler) start_time = time.time() logger.info("Loading data source (%d)...", data_source_id) # TODO: we should probably cache data sources in Redis data_source = models.DataSource.get_by_id(data_source_id) self.update_state(state='STARTED', meta={ 'start_time': start_time, 'custom_message': '' }) logger.info("Executing query:\n%s", query) query_hash = gen_query_hash(query) query_runner = get_query_runner(data_source.type, data_source.options) if query_runner.annotate_query(): metadata['Task ID'] = self.request.id metadata['Query Hash'] = query_hash metadata['Queue'] = self.request.delivery_info['routing_key'] annotation = u", ".join( [u"{}: {}".format(k, v) for k, v in metadata.iteritems()]) logging.debug(u"Annotation: %s", annotation) annotated_query = u"/* {} */ {}".format(annotation, query) else: annotated_query = query with statsd_client.timer('query_runner.{}.{}.run_time'.format( data_source.type, data_source.name)): data, error = query_runner.run_query(annotated_query) run_time = time.time() - start_time logger.info("Query finished... data length=%s, error=%s", data and len(data), error) self.update_state(state='STARTED', meta={ 'start_time': start_time, 'error': error, 'custom_message': '' }) # Delete query_hash redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id)) if not error: query_result, updated_query_ids = models.QueryResult.store_result( data_source.id, query_hash, query, data, run_time, utils.utcnow()) for query_id in updated_query_ids: check_alerts_for_query.delay(query_id) else: raise Exception(error) return query_result.id
def test_gen_query_hash_NoneSQL(self): origin_query_text = "select *" base_runner = BaseQueryRunner({}) self.assertEqual(gen_query_hash(origin_query_text), base_runner.gen_query_hash(origin_query_text, True))
def update_instance(cls, query_id, **kwargs): if 'query' in kwargs: kwargs['query_hash'] = utils.gen_query_hash(kwargs['query']) update = cls.update(**kwargs).where(cls.id == query_id) return update.execute()
def __init__(self, redis_connection, query, priority, **kwargs): kwargs['query'] = fix_unicode(query) kwargs['priority'] = priority kwargs['query_hash'] = gen_query_hash(kwargs['query']) self.new_job = 'id' not in kwargs super(Job, self).__init__(redis_connection, **kwargs)
def run_query_sync(data_source, parameter_values, query_text, max_age=0): query_parameters = set(collect_query_parameters(query_text)) missing_params = set(query_parameters) - set(parameter_values.keys()) if missing_params: raise Exception('Missing parameter value for: {}'.format( ", ".join(missing_params))) if query_parameters: query_text = pystache.render(query_text, parameter_values) if max_age <= 0: query_result = None else: query_result = models.QueryResult.get_latest(data_source, query_text, max_age) query_hash = gen_query_hash(query_text) if query_result: logging.info("Returning cached result for query %s" % query_hash) return query_result.data try: started_at = time.time() data, error = data_source.query_runner.run_query( query_text, current_user) if error: data_consumed_mb = 0.0 return None else: dict_data = json.loads(data) data_consumed_mb = dict_data['data_consumed_mb'] # update cache if max_age > 0: run_time = time.time() - started_at query_result, updated_query_ids = models.QueryResult.store_result( data_source.org_id, data_source.id, query_hash, query_text, data, run_time, utils.utcnow()) query_result_metadata = models.QueryResultMetaData.store_result_metadata( updated_query_ids=updated_query_ids, query_result_id=query_result.id, data_consumed_mb=data_consumed_mb, data_source_id=data_source.id, query_hash=query_hash, run_by_user_id=current_user.id, run_at=utils.utcnow()) models.db.session.commit() return data except Exception: if max_age > 0: abort( 404, message= "Unable to get result from the database, and no cached query result found." ) else: abort(503, message="Unable to get result from the database.") return None
def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}): query_hash = gen_query_hash(query) logging.info("Inserting job for %s with metadata=%s", query_hash, metadata) try_count = 0 job = None while try_count < 5: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(_job_lock_id(query_hash, data_source.id)) job_id = pipe.get(_job_lock_id(query_hash, data_source.id)) if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) job = Job.fetch(job_id) status = job.get_status() if status in [JobStatus.FINISHED, JobStatus.FAILED]: logging.info( "[%s] job found is ready (%s), removing lock", query_hash, status, ) redis_connection.delete( _job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() if scheduled_query: queue_name = data_source.scheduled_queue_name scheduled_query_id = scheduled_query.id else: queue_name = data_source.queue_name scheduled_query_id = None time_limit = settings.dynamic_settings.query_time_limit( scheduled_query, user_id, data_source.org_id) metadata["Queue"] = queue_name queue = Queue(queue_name) job = queue.enqueue( execute_query, query, data_source.id, metadata, user_id=user_id, scheduled_query_id=scheduled_query_id, is_api_key=is_api_key, job_timeout=time_limit, ) logging.info("[%s] Created new job: %s", query_hash, job.id) pipe.set( _job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME, ) pipe.execute() break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}): query_hash = gen_query_hash(query) logging.info("Inserting job for %s with metadata=%s", query_hash, metadata) try_count = 0 job = None while try_count < 5: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(_job_lock_id(query_hash, data_source.id)) job_id = pipe.get(_job_lock_id(query_hash, data_source.id)) if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) job = QueryTask(job_id=job_id) if job.ready(): logging.info( "[%s] job found is ready (%s), removing lock", query_hash, job.celery_status, ) redis_connection.delete( _job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() if scheduled_query: queue_name = data_source.scheduled_queue_name scheduled_query_id = scheduled_query.id else: queue_name = data_source.queue_name scheduled_query_id = None args = ( query, data_source.id, metadata, user_id, scheduled_query_id, is_api_key, ) argsrepr = json_dumps({ "org_id": data_source.org_id, "data_source_id": data_source.id, "enqueue_time": time.time(), "scheduled": scheduled_query_id is not None, "query_id": metadata.get("Query ID"), "user_id": user_id, }) time_limit = settings.dynamic_settings.query_time_limit( scheduled_query, user_id, data_source.org_id) result = execute_query.apply_async( args=args, argsrepr=argsrepr, queue=queue_name, soft_time_limit=time_limit, ) job = QueryTask(async_result=result) logging.info("[%s] Created new job: %s", query_hash, job.id) pipe.set( _job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME, ) pipe.execute() break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def enqueue_query( query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={} ): query_id = metadata.get("Query ID", "unknown") query_hash = gen_query_hash(query) get_logger().info("[query_id=%s] [query_hash=%s] Inserting job", query_id, query_hash) try_count = 0 job = None while try_count < 5: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(_job_lock_id(query_hash, data_source.id)) job_id = pipe.get(_job_lock_id(query_hash, data_source.id)) if job_id: job_status = "UNKNOWN" job_complete = False job_cancelled = "False" try: job = Job.fetch(job_id) job_exists = True job_status = job.get_status() job_complete = job_status in [JobStatus.FINISHED, JobStatus.FAILED] if job.is_cancelled: job_cancelled = "True" except NoSuchJobError: job_exists = False job_status = "EXPIRED" get_logger().info("[query_id=%s] [query_hash=%s] Found existing job [job.id=%s] [job_status=%s] [job_cancelled=%s]", query_id, query_hash, job_id, job_status, job_cancelled) if job_complete or (not job_exists): #get_logger().info("[query_id=%s] [query_hash=%s] [job.id=%s], removing redis lock", query_id, query_hash, job_id) redis_connection.delete(_job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() if scheduled_query: queue_name = data_source.scheduled_queue_name #默认都是scheduled_queries scheduled_query_id = scheduled_query.id else: queue_name = data_source.queue_name #默认都是queries scheduled_query_id = None time_limit = settings.dynamic_settings.query_time_limit( scheduled_query, user_id, data_source.org_id ) metadata["Queue"] = queue_name metadata["Enqueue Time"] = time.time() queue = Queue(queue_name) enqueue_kwargs = { "user_id": user_id, "scheduled_query_id": scheduled_query_id, "is_api_key": is_api_key, "job_timeout": time_limit, "meta": { "data_source_id": data_source.id, "org_id": data_source.org_id, "scheduled": scheduled_query_id is not None, "query_id": query_id, "user_id": user_id, }, } if not scheduled_query: enqueue_kwargs["result_ttl"] = settings.JOB_EXPIRY_TIME job = queue.enqueue( execute_query, query, data_source.id, metadata, **enqueue_kwargs ) get_logger().info("[query_id=%s] [query_hash=%s] Created new job [job.id=%s]", query_id, query_hash, job.id) pipe.set( _job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME, ) pipe.execute() break except redis.WatchError: get_logger().error("[query_id=%s] [query_hash=%s] redis.WatchError, try_count = %d", query_id, query_hash, try_count) continue if not job: get_logger().error("[Manager] [query_id=%s] [query_hash=%s] Failed adding job for query.", query_id, query_hash) return job
def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}): query_hash = gen_query_hash(query) logger.info("Inserting job for %s with metadata=%s", query_hash, metadata) try_count = 0 job = None while try_count < 5: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(_job_lock_id(query_hash, data_source.id)) job_id = pipe.get(_job_lock_id(query_hash, data_source.id)) if job_id: logger.info("[%s] Found existing job: %s", query_hash, job_id) job_complete = None try: job = Job.fetch(job_id) job_exists = True status = job.get_status() job_complete = status in [ JobStatus.FINISHED, JobStatus.FAILED ] if job_complete: message = "job found is complete (%s)" % status except NoSuchJobError: message = "job found has expired" job_exists = False if job_complete or not job_exists: logger.info("[%s] %s, removing lock", query_hash, message) redis_connection.delete( _job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() if scheduled_query: queue_name = data_source.scheduled_queue_name scheduled_query_id = scheduled_query.id else: queue_name = data_source.queue_name scheduled_query_id = None time_limit = settings.dynamic_settings.query_time_limit( scheduled_query, user_id, data_source.org_id) metadata["Queue"] = queue_name queue = Queue(queue_name) enqueue_kwargs = { "user_id": user_id, "scheduled_query_id": scheduled_query_id, "is_api_key": is_api_key, "job_timeout": time_limit, "meta": { "data_source_id": data_source.id, "org_id": data_source.org_id, "scheduled": scheduled_query_id is not None, "query_id": metadata.get("Query ID"), "user_id": user_id, }, } if not scheduled_query: enqueue_kwargs["result_ttl"] = settings.JOB_EXPIRY_TIME job = queue.enqueue(execute_query, query, data_source.id, metadata, **enqueue_kwargs) logger.info("[%s] Created new job: %s", query_hash, job.id) pipe.set( _job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME, ) pipe.execute() break except redis.WatchError: continue if not job: logger.error("[Manager][%s] Failed adding job for query.", query_hash) return job
def hashify_query(): arguments = request.get_json(force=True) sql = arguments.get("query_text", "") return gen_query_hash(sql)
def setUp(self): self.priority = 1 self.query = "SELECT 1" self.query_hash = gen_query_hash(self.query)
def enqueue_query(query, data_source, user_id, scheduled_query=None, metadata={}): query_hash = gen_query_hash(query) logging.info("Inserting job for %s with metadata=%s", query_hash, metadata) try_count = 0 job = None while try_count < 5: try_count += 1 pipe = redis_connection.pipeline() try: pipe.watch(_job_lock_id(query_hash, data_source.id)) job_id = pipe.get(_job_lock_id(query_hash, data_source.id)) if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) job = QueryTask(job_id=job_id) if job.ready(): logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status) redis_connection.delete( _job_lock_id(query_hash, data_source.id)) job = None if not job: pipe.multi() time_limit = None if scheduled_query: queue_name = data_source.scheduled_queue_name scheduled_query_id = scheduled_query.id else: queue_name = data_source.queue_name scheduled_query_id = None time_limit = settings.ADHOC_QUERY_TIME_LIMIT args = (query, data_source.id, metadata, user_id, scheduled_query_id) argsrepr = json_dumps({ 'org_id': data_source.org_id, 'data_source_id': data_source.id, 'enqueue_time': time.time(), 'scheduled': scheduled_query_id is not None, 'query_id': metadata.get('Query ID'), 'user_id': user_id }) result = execute_query.apply_async(args=args, argsrepr=argsrepr, queue=queue_name, time_limit=time_limit) job = QueryTask(async_result=result) logging.info("[%s] Created new job: %s", query_hash, job.id) pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME) pipe.execute() break except redis.WatchError: continue if not job: logging.error("[Manager][%s] Failed adding job for query.", query_hash) return job