def decorator(*args, **kwargs): request = args[1] json = request.body_json request.graphite_patterns = [] for field, alt in [("targets", None), ("warn_value", "expression"), ("error_value", "expression")]: if json.get(field) is None and json.get(alt) is None: defer.returnValue( bad_request(request, "%s is required" % field)) try: request.body_json = trigger_reformat(json, json.get("id"), json.get("tags", [])) except Exception: log.err() defer.returnValue(bad_request(request, "Invalid trigger format")) expression_values = { 'warn_value': json.get('warn_value'), 'error_value': json.get('error_value') } try: yield resolve_patterns(request, expression_values) except Exception: log.err() defer.returnValue(bad_request(request, "Invalid graphite target")) try: getExpression(json.get("expression"), **expression_values) except Exception: log.err() defer.returnValue(bad_request(request, "Invalid expression")) yield f(*args, **kwargs)
def decorator(*args, **kwargs): request = args[1] json = request.body_json request.graphite_patterns = [] for field, alt in [("targets", None), ("warn_value", "expression"), ("error_value", "expression")]: if json.get(field) is None and json.get(alt) is None: defer.returnValue(bad_request(request, "%s is required" % field)) try: request.body_json = trigger_reformat(json, json.get("id"), json.get("tags", [])) except: log.err() defer.returnValue(bad_request(request, "Invalid trigger format")) expression_values = {'warn_value': json.get('warn_value'), 'error_value': json.get('error_value')} try: yield resolve_patterns(request, expression_values) except: log.err() defer.returnValue(bad_request(request, "Invalid graphite target")) try: getExpression(json.get("expression"), **expression_values) except: log.err() defer.returnValue(bad_request(request, "Invalid expression")) yield f(*args, **kwargs)
def testCustom(self): self.assertEqual( expression.getExpression("ERROR if t1 > 10 and t2 > 3 else OK", t1=11, t2=4), state.ERROR) with self.assertRaises(expression.ExpressionError): expression.getExpression("ERROR if f.min(t1,t2) else OK", t1=11, t2=4) with self.assertRaises(expression.ExpressionError): expression.getExpression("(lambda f: ())", t1=11, t2=4)
def testDefault(self): self.assertEqual( expression.getExpression(t1=10, warn_value=60, error_value=90), state.OK) self.assertEqual( expression.getExpression(t1=60, warn_value=60, error_value=90), state.WARN) self.assertEqual( expression.getExpression(t1=90, warn_value=60, error_value=90), state.ERROR) self.assertEqual( expression.getExpression(t1=40, warn_value=30, error_value=10), state.OK) self.assertEqual( expression.getExpression(t1=20, warn_value=30, error_value=10), state.WARN) self.assertEqual( expression.getExpression(t1=10, warn_value=30, error_value=10), state.ERROR) self.assertEqual( expression.getExpression(**{ 't1': 10, 'warn_value': 30, 'error_value': 10 }), state.ERROR)
def testDefault(self): self.assertEqual(expression.getExpression(t1=10, warn_value=60, error_value=90), state.OK) self.assertEqual(expression.getExpression(t1=60, warn_value=60, error_value=90), state.WARN) self.assertEqual(expression.getExpression(t1=90, warn_value=60, error_value=90), state.ERROR) self.assertEqual(expression.getExpression(t1=40, warn_value=30, error_value=10), state.OK) self.assertEqual(expression.getExpression(t1=20, warn_value=30, error_value=10), state.WARN) self.assertEqual(expression.getExpression(t1=10, warn_value=30, error_value=10), state.ERROR) self.assertEqual(expression.getExpression(**{'t1': 10, 'warn_value': 30, 'error_value': 10}), state.ERROR)
def decorator(*args, **kwargs): request = args[1] json = request.body_json request.graphite_patterns = [] for field, alt in [("targets", None), ("warn_value", "expression"), ("error_value", "expression")]: if json.get(field) is None and json.get(alt) is None: defer.returnValue( bad_request(request, "%s is required" % field)) if type(json["targets"]) is not list: defer.returnValue(bad_request(request, "Invalid trigger targets")) try: request.body_json = trigger_reformat(json, json.get("id"), json.get("tags", [])) except Exception as e: log.error("Invalid trigger format [{json}]: {e}", json=json, e=e) defer.returnValue(bad_request(request, "Invalid trigger format")) expression_values = { 'warn_value': json.get('warn_value'), 'error_value': json.get('error_value'), 'PREV_STATE': state.NODATA } try: yield resolve_patterns(request, expression_values) except Exception as e: log.error("Invalid graphite targets [{targets}]: {e}", targets=request.body_json["targets"], e=e) defer.returnValue(bad_request(request, "Invalid graphite targets")) try: getExpression(json.get("expression"), **expression_values) except Exception as e: log.error("Invalid expression [{expression}]: {e}", expression=json.get("expression"), e=e) defer.returnValue(bad_request(request, "Invalid expression")) yield f(*args, **kwargs)
def trigger(trigger, fromTime, now, cache_ttl): now = now or int(time()) log.info("Checking trigger {id}", id=trigger.id) initialized = yield trigger.init(now, fromTime=fromTime) if not initialized: raise StopIteration if fromTime is None: fromTime = trigger.last_check.get("timestamp", now) requestContext = datalib.createRequestContext( str(fromTime - (trigger.ttl or 600)), str(now)) check = { "metrics": trigger.last_check["metrics"].copy(), "state": state.OK, "timestamp": now, "score": trigger.last_check.get("score") } try: time_series = yield trigger.get_timeseries(requestContext) for metric in requestContext['metrics']: yield trigger.db.cleanupMetricValues(metric, now - config.METRICS_TTL, cache_key=metric, cache_ttl=cache_ttl) if not time_series: if trigger.ttl: check["state"] = trigger.ttl_state check["msg"] = "Trigger has no metrics" yield event.compare_states(trigger, check, trigger.last_check, now) else: for t_series in time_series.values(): for tN in t_series: if not tN.stub: check["metrics"][tN.name] = tN.last_state.copy() for t1 in time_series[1]: log.debug("Checking timeserie {name}: {values}", name=t1.name, values=list(t1)) log.debug( "Checking interval: {start} - {end} ({duration}s), step: {step}", start=t1.start, end=t1.end, step=t1.step, duration=t1.end - t1.start) metric_state = check["metrics"].get(t1.name) if not metric_state: log.debug("No metric state for {name}.", name=t1.name) continue checkpoint = max( t1.last_state["timestamp"] - config.CHECKPOINT_GAP, metric_state.get("event_timestamp", 0)) log.debug("Checkpoint for {name}: {checkpoint}", name=t1.name, checkpoint=checkpoint) for value_timestamp in xrange(t1.start, now + t1.step, t1.step): if value_timestamp <= checkpoint: continue expression_values = time_series.get_expression_values( t1, value_timestamp) t1_value = expression_values["t1"] log.debug("values for ts {timestamp}: {values}", timestamp=value_timestamp, values=expression_values) if None in expression_values.values(): continue expression_values.update({ 'warn_value': trigger.struct.get('warn_value'), 'error_value': trigger.struct.get('error_value'), 'PREV_STATE': metric_state['state'] }) expression_state = expression.getExpression( trigger.struct.get('expression'), **expression_values) time_series.update_state(t1, check, expression_state, expression_values, value_timestamp) yield event.compare_states(trigger, metric_state, t1.last_state, value_timestamp, value=t1_value, metric=t1.name) # compare with last_check timestamp in case if we have not run checker for a long time if trigger.ttl and metric_state[ "timestamp"] + trigger.ttl < trigger.last_check[ "timestamp"]: log.info("Metric {name} TTL expired for state {state}", name=t1.name, state=metric_state) if trigger.ttl_state == state.DEL and metric_state.get( "event_timestamp") is not None: log.info("Remove metric {name}", name=t1.name) del check["metrics"][t1.name] for tN, tName in time_series.other_targets_names.iteritems( ): log.info("Remove metric {name}", name=tName) del check["metrics"][tName] for pattern in trigger.struct.get("patterns"): yield trigger.db.delPatternMetrics(pattern) continue time_series.update_state( t1, check, state.to_metric_state(trigger.ttl_state), None, trigger.last_check["timestamp"] - trigger.ttl) yield event.compare_states(trigger, metric_state, t1.last_state, metric_state["timestamp"], metric=t1.name) except StopIteration: raise except Exception as e: log.error("Trigger check failed: {e}", e=e) check["state"] = state.EXCEPTION check["msg"] = "Trigger evaluation exception" yield event.compare_states(trigger, check, trigger.last_check, now) scores = sum( map(lambda m: state.SCORES[m["state"]], check["metrics"].itervalues())) check["score"] = scores + state.SCORES[check["state"]] yield trigger.db.setTriggerLastCheck(trigger.id, check)
def trigger(trigger, fromTime, now, cache_ttl): now = now or int(time()) log.msg("Checking trigger %s" % trigger.id) initialized = yield trigger.init(now, fromTime=fromTime) if not initialized: raise StopIteration if fromTime is None: fromTime = trigger.last_check.get("timestamp", now) requestContext = datalib.createRequestContext( str(fromTime - (trigger.ttl or 600)), str(now)) check = { "metrics": trigger.last_check["metrics"].copy(), "state": state.OK, "timestamp": now, "score": trigger.last_check.get("score") } try: time_series = yield trigger.get_timeseries(requestContext) for metric in requestContext['metrics']: yield trigger.db.cleanupMetricValues(metric, now - config.METRICS_TTL, cache_key=metric, cache_ttl=cache_ttl) if not time_series: if trigger.ttl: check["state"] = trigger.ttl_state check["msg"] = "Trigger has no metrics" yield event.compare_states(trigger, check, trigger.last_check, now) else: for t_series in time_series.values(): for tN in t_series: if not tN.stub: check["metrics"][tN.name] = tN.last_state.copy() for t1 in time_series[1]: metric_state = check["metrics"].get(t1.name) if not metric_state: continue checkpoint = max( t1.last_state["timestamp"] - config.CHECKPOINT_GAP, metric_state.get("event_timestamp", 0)) for value_timestamp in xrange(t1.start, now + t1.step, t1.step): if value_timestamp <= checkpoint: continue expression_values = time_series.get_expression_values( t1, value_timestamp) t1_value = expression_values["t1"] if None in expression_values.values(): continue expression_values.update({ 'warn_value': trigger.struct.get('warn_value'), 'error_value': trigger.struct.get('error_value'), 'PREV_STATE': metric_state['state'] }) expression_state = expression.getExpression( trigger.struct.get('expression'), **expression_values) time_series.update_state(t1, check, expression_state, expression_values, value_timestamp) yield event.compare_states(trigger, metric_state, t1.last_state, value_timestamp, value=t1_value, metric=t1.name) # compare with last_check timestamp in case if we have not run checker for a long time if trigger.ttl and metric_state[ "timestamp"] + trigger.ttl < trigger.last_check[ "timestamp"]: log.msg("Metric %s TTL expired for state %s" % (t1.name, metric_state)) if trigger.ttl_state == state.DEL and metric_state.get( "event_timestamp") is not None: log.msg("Remove metric %s" % t1.name) del check["metrics"][t1.name] for tN, tName in time_series.other_targets_names.iteritems( ): log.msg("Remove metric %s" % tName) del check["metrics"][tName] for pattern in trigger.struct.get("patterns"): yield trigger.db.delPatternMetrics(pattern) continue time_series.update_state( t1, check, state.to_metric_state(trigger.ttl_state), None, trigger.last_check["timestamp"] - trigger.ttl) yield event.compare_states(trigger, metric_state, t1.last_state, metric_state["timestamp"], metric=t1.name) except StopIteration: raise except Exception: log.err() check["state"] = state.EXCEPTION check["msg"] = "Trigger evaluation exception" yield event.compare_states(trigger, check, trigger.last_check, now) if trigger.update_score: update_score(check) yield trigger.db.setTriggerLastCheck(trigger.id, check)
def check(self, fromTime=None, now=None, cache_ttl=60): now = now or int(time()) log.msg("Checking trigger %s" % self.id) initialized = yield self.init(now, fromTime=fromTime) if not initialized: raise StopIteration if fromTime is None: fromTime = self.last_check.get("timestamp", now) requestContext = datalib.createRequestContext(str(fromTime - (self.ttl or 600)), str(now)) check = {"metrics": self.last_check["metrics"].copy(), "state": state.OK, "timestamp": now} try: time_series = yield self.get_timeseries(requestContext) for metric in requestContext['metrics']: yield self.db.cleanupMetricValues(metric, now - config.METRICS_TTL, cache_key=metric, cache_ttl=cache_ttl) if len(time_series) == 0: if self.ttl: check["state"] = self.ttl_state check["msg"] = "Trigger has no metrics" yield self.compare_state(check, self.last_check, now) else: for t_series in time_series.values(): for tN in t_series: check["metrics"][tN.name] = tN.last_state.copy() for t1 in time_series[1]: metric_state = check["metrics"][t1.name] for value_timestamp in xrange(t1.start, now + t1.step, t1.step): if value_timestamp <= t1.last_state["timestamp"]: continue expression_values = time_series.get_expression_values(t1, value_timestamp) t1_value = expression_values["t1"] if None in expression_values.values(): continue expression_values.update({'warn_value': self.struct.get('warn_value'), 'error_value': self.struct.get('error_value'), 'PREV_STATE': metric_state['state']}) expression_state = expression.getExpression(self.struct.get('expression'), **expression_values) time_series.update_state(t1, check, expression_state, expression_values, value_timestamp) yield self.compare_state(metric_state, t1.last_state, value_timestamp, value=t1_value, metric=t1.name) # compare with last_check timestamp in case if we have not run checker for a long time if self.ttl and metric_state["timestamp"] + self.ttl < self.last_check["timestamp"]: log.msg("Metric %s TTL expired for state %s" % (t1.name, metric_state)) if self.ttl_state == state.DEL and metric_state.get("event_timestamp") is not None: log.msg("Remove metric %s" % t1.name) del check["metrics"][t1.name] for tN, tName in time_series.other_targets_names.iteritems(): log.msg("Remove metric %s" % tName) del check["metrics"][tName] for pattern in self.struct.get("patterns"): yield self.db.delPatternMetrics(pattern) continue time_series.update_state(t1, check, state.toMetricState(self.ttl_state), None, self.last_check["timestamp"] - self.ttl) yield self.compare_state(metric_state, t1.last_state, metric_state["timestamp"], metric=t1.name) except StopIteration: raise except: log.err() check["state"] = state.EXCEPTION check["msg"] = "Trigger evaluation exception" yield self.compare_state(check, self.last_check, now) yield self.db.setTriggerLastCheck(self.id, check)
def testCustom(self): self.assertEqual(expression.getExpression("ERROR if t1 > 10 and t2 > 3 else OK", t1=11, t2=4), state.ERROR) with self.assertRaises(expression.ExpressionError): expression.getExpression("ERROR if f.min(t1,t2) else OK", t1=11, t2=4) with self.assertRaises(expression.ExpressionError): expression.getExpression("(lambda f: ())", t1=11, t2=4)