def test_get_latest_returns_the_last_cached_result_for_negative_ttl(self): yesterday = utcnow() + datetime.timedelta(days=-100) very_old = self.factory.create_query_result(retrieved_at=yesterday) yesterday = utcnow() + datetime.timedelta(days=-1) qr = self.factory.create_query_result(retrieved_at=yesterday) found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, -1) self.assertEqual(found_query_result.id, qr.id)
def test_schedule_until_before(self): """ Queries with non-null ``schedule['until']`` are reported by Query.outdated_queries() before the given time is past. """ one_day_from_now = (utcnow() + datetime.timedelta(days=1)).strftime("%Y-%m-%d") two_hours_ago = utcnow() - datetime.timedelta(hours=2) query = self.factory.create_query(schedule={'interval':'3600', 'until':one_day_from_now, 'time': None, 'day_of_week':None}) query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=two_hours_ago) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertIn(query, queries)
def check_alerts_for_query(self, query_id): from redash.wsgi import app logger.debug("Checking query %d for alerts", query_id) query = models.Query.get_by_id(query_id) for alert in query.alerts: alert.query = query new_state = alert.evaluate() passed_rearm_threshold = False if alert.rearm and alert.last_triggered_at: passed_rearm_threshold = alert.last_triggered_at + datetime.timedelta(seconds=alert.rearm) < utils.utcnow() if new_state != alert.state or (alert.state == models.Alert.TRIGGERED_STATE and passed_rearm_threshold ): logger.info("Alert %d new state: %s", alert.id, new_state) old_state = alert.state alert.update_instance(state=new_state, last_triggered_at=utils.utcnow()) if old_state == models.Alert.UNKNOWN_STATE and new_state == models.Alert.OK_STATE: logger.debug("Skipping notification (previous state was unknown and now it's ok).") continue # message = Message html = """ Check <a href="{host}/alerts/{alert_id}">alert</a> / check <a href="{host}/queries/{query_id}">query</a>. """.format(host=base_url(alert.query.org), alert_id=alert.id, query_id=query.id) notify_mail(alert, html, new_state, app) if settings.HIPCHAT_API_TOKEN: notify_hipchat(alert, html, new_state) if settings.WEBHOOK_ENDPOINT: notify_webhook(alert, query, html, new_state)
def test_failure_extends_schedule(self): """ Execution failures recorded for a query result in exponential backoff for scheduling future execution. """ query = self.factory.create_query(schedule="60", schedule_failures=4) retrieved_at = utcnow() - datetime.timedelta(minutes=16) query_result = self.factory.create_query_result( retrieved_at=retrieved_at, query_text=query.query_text, query_hash=query.query_hash) query.latest_query_data = query_result self.assertEqual(list(models.Query.outdated_queries()), []) query_result.retrieved_at = utcnow() - datetime.timedelta(minutes=17) self.assertEqual(list(models.Query.outdated_queries()), [query])
def check_alerts_for_query(self, query_id): from redash.wsgi import app logger.debug("Checking query %d for alerts", query_id) query = models.Query.get_by_id(query_id) for alert in query.alerts: alert.query = query new_state = alert.evaluate() passed_rearm_threshold = False if alert.rearm and alert.last_triggered_at: passed_rearm_threshold = alert.last_triggered_at + datetime.timedelta(seconds=alert.rearm) < utils.utcnow() if new_state != alert.state or (alert.state == models.Alert.TRIGGERED_STATE and passed_rearm_threshold ): logger.info("Alert %d new state: %s", alert.id, new_state) old_state = alert.state alert.update_instance(state=new_state, last_triggered_at=utils.utcnow()) if old_state == models.Alert.UNKNOWN_STATE and new_state == models.Alert.OK_STATE: logger.debug("Skipping notification (previous state was unknown and now it's ok).") continue host = base_url(alert.query.org) for subscription in alert.subscriptions: try: subscription.notify(alert, query, subscription.user, new_state, app, host) except Exception as e: logger.warn("Exception: {}".format(e))
def test_get_latest_doesnt_return_if_ttl_expired(self): yesterday = utcnow() - datetime.timedelta(days=1) qr = self.factory.create_query_result(retrieved_at=yesterday) found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, max_age=60) self.assertIsNone(found_query_result)
def test_get_latest_returns_if_ttl_not_expired(self): yesterday = utcnow() - datetime.timedelta(seconds=30) qr = self.factory.create_query_result(retrieved_at=yesterday) found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, max_age=120) self.assertEqual(found_query_result, qr)
def test_backoff(self): now = utcnow() two_hours_ago = now - datetime.timedelta(hours=2) self.assertTrue(models.should_schedule_next(two_hours_ago, now, "3600", 5)) self.assertFalse(models.should_schedule_next(two_hours_ago, now, "3600", 10))
def test_exact_time_with_day_change(self): now = utcnow().replace(hour=0, minute=1) previous = (now - datetime.timedelta(days=2)).replace(hour=23, minute=59) schedule = "23:59".format(now.hour + 3) self.assertTrue(models.should_schedule_next(previous, now, schedule, 0))
def test_exact_time_that_needs_reschedule(self): now = utcnow() yesterday = now - datetime.timedelta(days=1) scheduled_datetime = now - datetime.timedelta(hours=3) scheduled_time = "{:02d}:00".format(scheduled_datetime.hour) self.assertTrue(models.should_schedule_next(yesterday, now, scheduled_time, 0))
def outdated_queries(cls): queries = ( Query.query .options(joinedload(Query.latest_query_data).load_only('retrieved_at')) .filter(Query.schedule.isnot(None)) .order_by(Query.id) ) now = utils.utcnow() outdated_queries = {} scheduled_queries_executions.refresh() for query in queries: if query.schedule['interval'] is None: continue if query.schedule['until'] is not None: schedule_until = pytz.utc.localize(datetime.datetime.strptime(query.schedule['until'], '%Y-%m-%d')) if schedule_until <= now: continue if query.latest_query_data: retrieved_at = query.latest_query_data.retrieved_at else: retrieved_at = now retrieved_at = scheduled_queries_executions.get(query.id) or retrieved_at if should_schedule_next(retrieved_at, now, query.schedule['interval'], query.schedule['time'], query.schedule['day_of_week'], query.schedule_failures): key = "{}:{}".format(query.query_hash, query.data_source_id) outdated_queries[key] = query return outdated_queries.values()
def test_store_result_does_not_modify_query_update_at(self): original_updated_at = utcnow() - datetime.timedelta(hours=1) query = self.factory.create_query(updated_at=original_updated_at) models.QueryResult.store_result(query.org_id, query.data_source, query.query_hash, query.query_text, "", 0, utcnow()) self.assertEqual(original_updated_at, query.updated_at)
def test_save_updates_updated_at_field(self): # This should be a test of ModelTimestampsMixin, but it's easier to test in context of existing model... :-\ one_day_ago = utcnow().date() - datetime.timedelta(days=1) q = self.factory.create_query(created_at=one_day_ago, updated_at=one_day_ago) db.session.flush() q.name = 'x' db.session.flush() self.assertNotEqual(q.updated_at, one_day_ago)
def test_exact_time_every_x_days_that_doesnt_need_reschedule(self): now = utcnow() four_days_ago = now - datetime.timedelta(days=2) three_day_interval = "259200" scheduled_datetime = now - datetime.timedelta(hours=3) scheduled_time = "{:02d}:00".format(scheduled_datetime.hour) self.assertFalse(models.should_schedule_next(four_days_ago, now, three_day_interval, scheduled_time))
def test_skips_fresh_queries(self): half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30) query = self.factory.create_query(schedule={'interval':'3600', 'time': None, 'until':None, 'day_of_week':None}) query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=half_an_hour_ago) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertNotIn(query, queries)
def test_outdated_queries_works_with_ttl_based_schedule(self): two_hours_ago = utcnow() - datetime.timedelta(hours=2) query = self.factory.create_query(schedule={'interval':'3600', 'time': None, 'until':None, 'day_of_week':None}) query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=two_hours_ago) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertIn(query, queries)
def test_outdated_queries_works_with_specific_time_schedule(self): half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30) query = self.factory.create_query(schedule=half_an_hour_ago.strftime('%H:%M')) query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=half_an_hour_ago - datetime.timedelta(days=1)) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertIn(query, queries)
def test_returns_only_over_a_week_old_results(self): two_weeks_ago = utcnow() - datetime.timedelta(days=14) unused_qr = self.factory.create_query_result(retrieved_at=two_weeks_ago) db.session.flush() new_unused_qr = self.factory.create_query_result() self.assertIn((unused_qr.id,), models.QueryResult.unused()) self.assertNotIn((new_unused_qr.id,), models.QueryResult.unused())
def test_outdated_queries_works_with_ttl_based_schedule(self): two_hours_ago = utcnow() - datetime.timedelta(hours=2) query = self.factory.create_query(schedule="3600") query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=two_hours_ago) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertIn(query, queries)
def test_skips_fresh_queries(self): half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30) query = self.factory.create_query(schedule="3600") query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=half_an_hour_ago) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertNotIn(query, queries)
def test_get_latest_returns_the_most_recent_result(self): yesterday = utcnow() - datetime.timedelta(seconds=30) old_qr = self.factory.create_query_result(retrieved_at=yesterday) qr = self.factory.create_query_result() found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, 60) self.assertEqual(found_query_result.id, qr.id)
def test_returns_only_unused_query_results(self): two_weeks_ago = utcnow() - datetime.timedelta(days=14) qr = self.factory.create_query_result() query = self.factory.create_query(latest_query_data=qr) db.session.flush() unused_qr = self.factory.create_query_result(retrieved_at=two_weeks_ago) self.assertIn((unused_qr.id,), models.QueryResult.unused()) self.assertNotIn((qr.id,), list(models.QueryResult.unused()))
def setUp(self): super(TestQueryResultStoreResult, self).setUp() self.data_source = self.factory.data_source self.query = "SELECT 1" self.query_hash = gen_query_hash(self.query) self.runtime = 123 self.utcnow = utcnow() self.data = "data"
def test_skips_fresh_queries(self): query = self.factory.create_query(schedule="1200") retrieved_at = utcnow() - datetime.timedelta(minutes=10) query_result = self.factory.create_query_result(retrieved_at=retrieved_at, query=query.query, query_hash=query.query_hash) with patch('redash.tasks.queries.enqueue_query') as add_job_mock: refresh_queries() self.assertFalse(add_job_mock.called)
def test_skips_queries_with_no_ttl(self): query = query_factory.create(schedule=None) retrieved_at = utcnow() - datetime.timedelta(minutes=10) query_result = query_result_factory.create(retrieved_at=retrieved_at, query=query.query, query_hash=query.query_hash) with patch('redash.tasks.QueryTask.add_task') as add_job_mock: refresh_queries() self.assertFalse(add_job_mock.called)
def test_skips_queries_with_no_ttl(self): query = self.factory.create_query(schedule=None) retrieved_at = utcnow() - datetime.timedelta(minutes=10) query_result = self.factory.create_query_result( retrieved_at=retrieved_at, query=query.query, query_hash=query.query_hash) with patch('redash.tasks.queries.enqueue_query') as add_job_mock: refresh_queries() self.assertFalse(add_job_mock.called)
def test_enqueues_outdated_queries(self): query = self.factory.create_query(schedule="60") retrieved_at = utcnow() - datetime.timedelta(minutes=10) query_result = self.factory.create_query_result(retrieved_at=retrieved_at, query=query.query, query_hash=query.query_hash) query.latest_query_data = query_result query.save() with patch('redash.tasks.QueryTask.add_task') as add_job_mock: refresh_queries() add_job_mock.assert_called_with(query.query, query.data_source, scheduled=True, metadata=ANY)
def test_outdated_queries_works_with_specific_time_schedule(self): half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30) query = self.factory.create_query( schedule=half_an_hour_ago.strftime('%H:%M')) query_result = self.factory.create_query_result( query=query.query_text, retrieved_at=half_an_hour_ago - datetime.timedelta(days=1)) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertIn(query, queries)
def test_skips_fresh_queries(self): query = self.factory.create_query(schedule="1200") retrieved_at = utcnow() - datetime.timedelta(minutes=10) query_result = self.factory.create_query_result( retrieved_at=retrieved_at, query=query.query, query_hash=query.query_hash) with patch('redash.tasks.QueryTask.add_task') as add_job_mock: refresh_queries() self.assertFalse(add_job_mock.called)
def test_enqueues_outdated_queries(self): query = query_factory.create(schedule="60") retrieved_at = utcnow() - datetime.timedelta(minutes=10) query_result = query_result_factory.create(retrieved_at=retrieved_at, query=query.query, query_hash=query.query_hash) query.latest_query_data = query_result query.save() with patch('redash.tasks.QueryTask.add_task') as add_job_mock: refresh_queries() add_job_mock.assert_called_with(query.query, query.data_source, scheduled=True, metadata=ANY)
def run_query_sync(data_source, parameter_values, query_text, max_age=0): query_parameters = set(collect_query_parameters(query_text)) missing_params = set(query_parameters) - set(parameter_values.keys()) if missing_params: raise Exception('Missing parameter value for: {}'.format(", ".join(missing_params))) if query_parameters: query_text = pystache.render(query_text, parameter_values) if max_age <= 0: query_result = None else: query_result = models.QueryResult.get_latest(data_source, query_text, max_age) query_hash = gen_query_hash(query_text) if query_result: logging.info("Returning cached result for query %s" % query_hash) return query_result try: started_at = time.time() data, error = data_source.query_runner.run_query(query_text, current_user) if error: logging.info('got bak error') logging.info(error) data_consumed_mb = 0.0 return None else: dict_data = json.loads(data) data_consumed_mb = dict_data['data_consumed_mb'] run_time = time.time() - started_at query_result, updated_query_ids = models.QueryResult.store_result(data_source.org, data_source, query_hash, query_text, data, run_time, utils.utcnow()) query_result_metadata = models.QueryResultMetaData.store_result_metadata( updated_query_ids=updated_query_ids, query_result_id=query_result.id, data_consumed_mb=data_consumed_mb, data_source_id=data_source.id, query_hash=query_hash, run_by_user_id=current_user.id, run_at=utils.utcnow()) models.db.session.commit() return query_result except Exception as e: if max_age > 0: abort(404, message="Unable to get result from the database, and no cached query result found.") else: abort(503, message="Unable to get result from the database.") return None
def test_empty_schedules(self): one_day_ago = (utcnow() - datetime.timedelta(days=1)).strftime("%Y-%m-%d") query = self.factory.create_query(schedule={ 'interval': '3600', 'until': one_day_ago }) oq = staticmethod(lambda: [query]) with patch.object(Query, 'past_scheduled_queries', oq): empty_schedules() self.assertEqual(query.schedule, None)
def run(self): signal.signal(signal.SIGINT, signal_handler) started_at = time.time() logger.debug("Executing query:\n%s", self.query) self._log_progress('executing_query') query_runner = self.data_source.query_runner annotated_query = self._annotate_query(query_runner) try: data, error = query_runner.run_query(annotated_query, self.user) except Exception as e: if isinstance(e, SoftTimeLimitExceeded): error = TIMEOUT_MESSAGE else: error = text_type(e) data = None logging.warning('Unexpected error while running query:', exc_info=1) run_time = time.time() - started_at logger.info(u"task=execute_query query_hash=%s data_length=%s error=[%s]", self.query_hash, data and len(data), error) _unlock(self.query_hash, self.data_source.id) if error is not None and data is None: result = QueryExecutionError(error) if self.scheduled_query is not None: self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False) self.scheduled_query.schedule_failures += 1 models.db.session.add(self.scheduled_query) notify_of_failure(error, self.scheduled_query) models.db.session.commit() raise result else: if (self.scheduled_query and self.scheduled_query.schedule_failures > 0): self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False) self.scheduled_query.schedule_failures = 0 models.db.session.add(self.scheduled_query) query_result, updated_query_ids = models.QueryResult.store_result( self.data_source.org_id, self.data_source, self.query_hash, self.query, data, run_time, utcnow()) models.db.session.commit() # make sure that alert sees the latest query result self._log_progress('checking_alerts') for query_id in updated_query_ids: check_alerts_for_query.delay(query_id) self._log_progress('finished') result = query_result.id models.db.session.commit() return result
def test_backoff(self): now = utcnow() two_hours_ago = now - datetime.timedelta(hours=2) self.assertTrue( models.should_schedule_next(two_hours_ago, now, "3600", failures=5)) self.assertFalse( models.should_schedule_next(two_hours_ago, now, "3600", failures=10))
def test_returns_only_unused_query_results(self): two_weeks_ago = utcnow() - datetime.timedelta(days=14) qt = "SELECT 1" qr = self.factory.create_query_result(query_text=qt, retrieved_at=two_weeks_ago) query = self.factory.create_query(query_text=qt, latest_query_data=qr) unused_qr = self.factory.create_query_result( query_text=qt, retrieved_at=two_weeks_ago) db.session.flush() self.assertIn((unused_qr.id, ), models.QueryResult.unused()) self.assertNotIn((qr.id, ), list(models.QueryResult.unused()))
def test_exact_time_every_x_days_that_doesnt_need_reschedule(self): now = utcnow() four_days_ago = now - datetime.timedelta(days=2) three_day_interval = "259200" scheduled_datetime = now - datetime.timedelta(hours=3) scheduled_time = "{:02d}:00".format(scheduled_datetime.hour) self.assertFalse( models.should_schedule_next( four_days_ago, now, three_day_interval, scheduled_time ) )
def past_scheduled_queries(cls): now = utils.utcnow() queries = Query.query.filter(Query.schedule.isnot(None)).order_by(Query.id) return [ query for query in queries if query.schedule["until"] is not None and pytz.utc.localize( datetime.datetime.strptime(query.schedule["until"], "%Y-%m-%d") ) <= now ]
def test_schedule_until_before(self): """ Queries with non-null ``schedule['until']`` are reported by Query.outdated_queries() before the given time is past. """ one_day_from_now = (utcnow() + datetime.timedelta(days=1)).strftime("%Y-%m-%d") two_hours_ago = utcnow() - datetime.timedelta(hours=2) query = self.factory.create_query( schedule={ "interval": "3600", "until": one_day_from_now, "time": None, "day_of_week": None, }) query_result = self.factory.create_query_result( query=query.query_text, retrieved_at=two_hours_ago) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertIn(query, queries)
def update_sample(data_source_id, table_name, table_id, sample_updated_at): """ For a given table, look up a sample row for it and update the "example" fields for it in the column_metadata table. """ logger.info(u"task=update_sample state=start table_name=%s", table_name) start_time = time.time() ds = models.DataSource.get_by_id(data_source_id) persisted_columns = models.ColumnMetadata.query.filter( models.ColumnMetadata.exists.is_(True), models.ColumnMetadata.table_id == table_id, ).options(load_only("id", "name", "example")) update_threshold = utils.utcnow() - datetime.timedelta( days=settings.SCHEMA_SAMPLE_UPDATE_FREQUENCY_DAYS) first_column = persisted_columns.first() if (first_column and sample_updated_at and first_column.example and sample_updated_at > update_threshold): # Look at the first example in the persisted columns. # If this is *not* empty AND sample_updated_at is recent, don't update sample logger.info( u"task=update_sample state=abort - recent sample exists table_name=%s", table_name, ) return sample = None try: sample = ds.query_runner.get_table_sample(table_name) except NotSupported: logger.info(u"Unable to fetch samples for {}".format(table_name)) if not sample: return # If a column exists, add a sample to it. for persisted_column in persisted_columns.all(): column_example = sample.get(persisted_column.name, None) column_example = (column_example if isinstance(column_example, str) else str(column_example)) # noqa: F821 persisted_column.example = truncate_long_string(column_example, 4000) models.db.session.add(persisted_column) models.db.session.commit() logger.info( u"task=update_sample state=finished table_name=%s runtime=%.2f", table_name, time.time() - start_time, ) return sample
def test_exact_time_every_x_weeks_that_doesnt_need_reschedule(self): # Setup: # # 1) The query should run every 3 weeks on Thurday # 2) The last time it ran was 3 weeks ago from this week's Tuesday # 3) It is now Wednesday of this week # # Expectation: Even though more than 3 weeks have passed since the # last run 3 weeks ago on Tuesday, it's not overdue since # it should be running on Thursdays. this_tuesday = utcnow() + datetime.timedelta( days=list(calendar.day_name).index("Tuesday") - utcnow().weekday()) three_weeks_ago = this_tuesday - datetime.timedelta(weeks=3) now = this_tuesday + datetime.timedelta(days=1) three_week_interval = "1814400" scheduled_datetime = now - datetime.timedelta(hours=3) scheduled_time = "{:02d}:00".format(scheduled_datetime.hour) self.assertFalse( models.should_schedule_next(three_weeks_ago, now, three_week_interval, scheduled_time, "Thursday"))
def test_schedule_until_before(self): """ Queries with non-null ``schedule['until']`` are reported by Query.outdated_queries() before the given time is past. """ one_day_from_now = (utcnow() + datetime.timedelta(days=1)).strftime("%Y-%m-%d") query = self.create_scheduled_query(interval="3600", until=one_day_from_now) self.fake_previous_execution(query, hours=2) queries = models.Query.outdated_queries() self.assertIn(query, queries)
def test_schedule_until_after(self): """ Queries with non-null ``schedule['until']`` are not reported by Query.outdated_queries() after the given time is past. """ one_day_ago = (utcnow() - datetime.timedelta(days=1)).strftime("%Y-%m-%d") two_hours_ago = utcnow() - datetime.timedelta(hours=2) query = self.factory.create_query( schedule={ 'interval': '3600', 'until': one_day_ago, 'time': None, 'day_of_week': None }) query_result = self.factory.create_query_result( query=query.query_text, retrieved_at=two_hours_ago) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertNotIn(query, queries)
def check_alerts_for_query(self, query_id): from redash.wsgi import app logger.debug("Checking query %d for alerts", query_id) query = models.Query.get_by_id(query_id) for alert in query.alerts: alert.query = query new_state = alert.evaluate() passed_rearm_threshold = False if alert.rearm and alert.last_triggered_at: passed_rearm_threshold = alert.last_triggered_at + datetime.timedelta( seconds=alert.rearm) < utils.utcnow() if new_state != alert.state or (alert.state == models.Alert.TRIGGERED_STATE and passed_rearm_threshold): logger.info("Alert %d new state: %s", alert.id, new_state) old_state = alert.state alert.update_instance(state=new_state, last_triggered_at=utils.utcnow()) if old_state == models.Alert.UNKNOWN_STATE and new_state == models.Alert.OK_STATE: logger.debug( "Skipping notification (previous state was unknown and now it's ok)." ) continue # message = Message html = """ Check <a href="{host}/alerts/{alert_id}">alert</a> / check <a href="{host}/queries/{query_id}">query</a>. """.format(host=base_url(alert.query.org), alert_id=alert.id, query_id=query.id) notify_mail(alert, html, new_state, app) if settings.HIPCHAT_API_TOKEN: notify_hipchat(alert, html, new_state) if settings.WEBHOOK_ENDPOINT: notify_webhook(alert, query, html, new_state)
def cleanup_data_in_table(table_model): removed_metadata = table_model.query.filter( table_model.exists == False, ).options(load_only('updated_at')) for removed_metadata_row in removed_metadata: is_old_data = (utils.utcnow() - removed_metadata_row.updated_at) > datetime.timedelta( days=settings.SCHEMA_METADATA_TTL_DAYS) table_model.query.filter( table_model.id == removed_metadata_row.id, ).delete() db.session.commit()
def run(self): signal.signal(signal.SIGINT, signal_handler) self.tracker.update(started_at=time.time(), state='started') logger.debug("Executing query:\n%s", self.query) self._log_progress('executing_query') query_runner = self.data_source.query_runner annotated_query = self._annotate_query(query_runner) try: data, error = query_runner.run_query(annotated_query, self.user) except Exception as e: error = text_type(e) data = None logging.warning('Unexpected error while running query:', exc_info=1) run_time = time.time() - self.tracker.started_at self.tracker.update(error=error, run_time=run_time, state='saving_results') logger.info(u"task=execute_query query_hash=%s data_length=%s error=[%s]", self.query_hash, data and len(data), error) _unlock(self.query_hash, self.data_source.id) if error: self.tracker.update(state='failed') result = QueryExecutionError(error) if self.scheduled_query is not None: self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False) self.scheduled_query.schedule_failures += 1 models.db.session.add(self.scheduled_query) models.db.session.commit() raise result else: if (self.scheduled_query and self.scheduled_query.schedule_failures > 0): self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False) self.scheduled_query.schedule_failures = 0 models.db.session.add(self.scheduled_query) query_result, updated_query_ids = models.QueryResult.store_result( self.data_source.org_id, self.data_source, self.query_hash, self.query, data, run_time, utcnow()) models.db.session.commit() # make sure that alert sees the latest query result self._log_progress('checking_alerts') for query_id in updated_query_ids: check_alerts_for_query.delay(query_id) self._log_progress('finished') result = query_result.id models.db.session.commit() return result
def run(self): signal.signal(signal.SIGINT, signal_handler) self.tracker.update(started_at=time.time(), state='started') logger.debug("Executing query:\n%s", self.query) self._log_progress('executing_query') query_runner = self.data_source.query_runner annotated_query = self._annotate_query(query_runner) try: data, error = query_runner.run_query(annotated_query, self.user) except Exception as e: error = unicode(e) data = None logging.warning('Unexpected error while running query:', exc_info=1) run_time = time.time() - self.tracker.started_at self.tracker.update(error=error, run_time=run_time, state='saving_results') logger.info( u"task=execute_query query_hash=%s data_length=%s error=[%s]", self.query_hash, data and len(data), error) _unlock(self.query_hash, self.data_source.id) if error: self.tracker.update(state='failed') result = QueryExecutionError(error) if self.scheduled_query: self.scheduled_query.schedule_failures += 1 models.db.session.add(self.scheduled_query) else: if (self.scheduled_query and self.scheduled_query.schedule_failures > 0): self.scheduled_query.schedule_failures = 0 models.db.session.add(self.scheduled_query) query_result, updated_query_ids = models.QueryResult.store_result( self.data_source.org, self.data_source, self.query_hash, self.query, data, run_time, utils.utcnow()) self._log_progress('checking_alerts') for query_id in updated_query_ids: check_alerts_for_query.delay(query_id) self._log_progress('finished') result = query_result.id models.db.session.commit() return result
def past_scheduled_queries(cls): now = utils.utcnow() queries = ( Query.query .filter(Query.schedule.isnot(None)) .order_by(Query.id) ) return filter( lambda x: x.schedule["until"] is not None and pytz.utc.localize( datetime.datetime.strptime(x.schedule['until'], '%Y-%m-%d') ) <= now, queries )
def outdated_queries(cls): queries = cls.select(cls, QueryResult.retrieved_at, DataSource)\ .join(QueryResult)\ .switch(Query).join(DataSource)\ .where(cls.schedule != None) now = utils.utcnow() outdated_queries = {} for query in queries: if should_schedule_next(query.latest_query_data.retrieved_at, now, query.schedule): key = "{}:{}".format(query.query_hash, query.data_source.id) outdated_queries[key] = query return outdated_queries.values()
def test_failure_extends_schedule(self): """ Execution failures recorded for a query result in exponential backoff for scheduling future execution. """ query = self.factory.create_query(schedule={ 'interval': '60', 'until': None, 'time': None, 'day_of_week': None }, schedule_failures=4) retrieved_at = utcnow() - datetime.timedelta(minutes=16) query_result = self.factory.create_query_result( retrieved_at=retrieved_at, query_text=query.query_text, query_hash=query.query_hash) query.latest_query_data = query_result self.assertEqual(list(models.Query.outdated_queries()), []) query_result.retrieved_at = utcnow() - datetime.timedelta(minutes=17) self.assertEqual(list(models.Query.outdated_queries()), [query])
def test_enqueues_query_only_once(self): query = self.factory.create_query(schedule="60") query2 = self.factory.create_query(schedule="60", query=query.query, query_hash=query.query_hash) retrieved_at = utcnow() - datetime.timedelta(minutes=10) query_result = self.factory.create_query_result(retrieved_at=retrieved_at, query=query.query, query_hash=query.query_hash) query.latest_query_data = query_result query2.latest_query_data = query_result query.save() query2.save() with patch('redash.tasks.queries.enqueue_query') as add_job_mock: refresh_queries() add_job_mock.assert_called_once_with(query.query, query.data_source, query.user_id, scheduled=True, metadata=ANY)#{'Query ID': query.id, 'Username': '******'})
def outdated_queries(cls): queries = (Query.query.options( joinedload( Query.latest_query_data).load_only("retrieved_at")).filter( Query.schedule.isnot(None)).order_by(Query.id)) now = utils.utcnow() outdated_queries = {} scheduled_queries_executions.refresh() for query in queries: try: if query.schedule.get("disabled"): continue if query.schedule["until"]: schedule_until = pytz.utc.localize( datetime.datetime.strptime(query.schedule["until"], "%Y-%m-%d")) if schedule_until <= now: continue retrieved_at = scheduled_queries_executions.get( query.id) or (query.latest_query_data and query.latest_query_data.retrieved_at) if should_schedule_next( retrieved_at or now, now, query.schedule["interval"], query.schedule["time"], query.schedule["day_of_week"], query.schedule_failures, ): key = "{}:{}".format(query.query_hash, query.data_source_id) outdated_queries[key] = query except Exception as e: query.schedule["disabled"] = True db.session.commit() message = ( "Could not determine if query %d is outdated due to %s. The schedule for this query has been disabled." % (query.id, repr(e))) logging.info(message) sentry.capture_exception( type(e)(message).with_traceback(e.__traceback__)) return list(outdated_queries.values())
def execute_query(self, query, data_source_id, metadata): signal.signal(signal.SIGINT, signal_handler) start_time = time.time() logger.info("Loading data source (%d)...", data_source_id) # TODO: we should probably cache data sources in Redis data_source = models.DataSource.get_by_id(data_source_id) self.update_state(state="STARTED", meta={"start_time": start_time, "custom_message": ""}) logger.info("Executing query:\n%s", query) query_hash = gen_query_hash(query) query_runner = get_query_runner(data_source.type, data_source.options) if query_runner.annotate_query(): metadata["Task ID"] = self.request.id metadata["Query Hash"] = query_hash metadata["Queue"] = self.request.delivery_info["routing_key"] annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()]) logging.debug(u"Annotation: %s", annotation) annotated_query = u"/* {} */ {}".format(annotation, query) else: annotated_query = query with statsd_client.timer("query_runner.{}.{}.run_time".format(data_source.type, data_source.name)): data, error = query_runner.run_query(annotated_query) run_time = time.time() - start_time logger.info("Query finished... data length=%s, error=%s", data and len(data), error) self.update_state(state="STARTED", meta={"start_time": start_time, "error": error, "custom_message": ""}) # Delete query_hash redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id)) if not error: query_result, updated_query_ids = models.QueryResult.store_result( data_source.id, query_hash, query, data, run_time, utils.utcnow() ) for query_id in updated_query_ids: check_alerts_for_query.delay(query_id) else: raise Exception(error) return query_result.id
def update(cls, user_id): """ Update the user details hash using the given redis pipeline, user id, optional redis id and optional user details. The fields uid, rid and updated (timestamp) are enforced and can't be overwritten. """ # try getting the user detail with the given user ID # or create one if it doesn't exist yet (e.g. when key was purged) try: user_detail = cls.get(cls.user_id == user_id) # update the timestamp with the current time user_detail.updated_at = utcnow() # save to Redis user_detail.save() except ValueError: user_detail = cls.create( user_id=user_id, updated_at=utcnow(), ) return user_detail
def test_recent_empty_sample_refreshs(self): self.factory.data_source.query_runner.configuration['samples'] = True refresh_schema(self.factory.data_source.id) # Confirm no sample exists column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, None) LAST_UPDATE = utils.utcnow() - datetime.timedelta(days=5) update_sample(self.factory.data_source.id, 'table', 1, LAST_UPDATE.isoformat()) column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)
def test_outdated_queries_works_with_ttl_based_schedule(self): two_hours_ago = utcnow() - datetime.timedelta(hours=2) query = self.factory.create_query(schedule={ "interval": "3600", "time": None, "until": None, "day_of_week": None, }) query_result = self.factory.create_query_result( query=query.query_text, retrieved_at=two_hours_ago) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertIn(query, queries)
def test_skips_fresh_queries(self): half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30) query = self.factory.create_query(schedule={ "interval": "3600", "time": None, "until": None, "day_of_week": None, }) query_result = self.factory.create_query_result( query=query.query_text, retrieved_at=half_an_hour_ago) query.latest_query_data = query_result queries = models.Query.outdated_queries() self.assertNotIn(query, queries)
def test_enqueues_only_for_relevant_data_source(self): query = self.factory.create_query(schedule="60") query2 = self.factory.create_query(schedule="3600", query=query.query, query_hash=query.query_hash) import psycopg2 retrieved_at = utcnow().replace(tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)) - datetime.timedelta(minutes=10) query_result = self.factory.create_query_result(retrieved_at=retrieved_at, query=query.query, query_hash=query.query_hash) query.latest_query_data = query_result query2.latest_query_data = query_result query.save() query2.save() with patch('redash.tasks.QueryTask.add_task') as add_job_mock: refresh_queries() add_job_mock.assert_called_once_with(query.query, query.data_source, scheduled=True, metadata=ANY)
def test_doesnt_return_live_incremental_results(self): two_weeks_ago = utcnow() - datetime.timedelta(days=14) qt = "SELECT 1" qrs = [ self.factory.create_query_result(query_text=qt, retrieved_at=two_weeks_ago) for _ in range(5) ] q = self.factory.create_query(query_text=qt, latest_query_data=qrs[0], schedule_resultset_size=3) for qr in qrs: self.factory.create_query_resultset(query_rel=q, result=qr) db.session.flush() self.assertEqual([], list(models.QueryResult.unused()))
def run_query_sync(data_source, parameter_values, query_text, max_age=0): query_parameters = set(collect_query_parameters(query_text)) missing_params = set(query_parameters) - set(parameter_values.keys()) if missing_params: raise Exception('Missing parameter value for: {}'.format( ", ".join(missing_params))) query_text = apply_parameters(query_text, parameter_values, data_source) if max_age <= 0: query_result = None else: query_result = models.QueryResult.get_latest(data_source, query_text, max_age) query_hash = gen_query_hash(query_text) if query_result: logging.info("Returning cached result for query %s" % query_hash) return query_result try: started_at = time.time() data, error = data_source.query_runner.run_query( query_text, current_user) if error: logging.info('got bak error') logging.info(error) return None run_time = time.time() - started_at query_result, updated_query_ids = models.QueryResult.store_result( data_source.org_id, data_source, query_hash, query_text, data, run_time, utcnow()) models.db.session.commit() return query_result except Exception as e: if max_age > 0: abort( 404, message= "Unable to get result from the database, and no cached query result found." ) else: abort(503, message="Unable to get result from the database.") return None