def test_get_metric_statistics(self): c = CloudWatchConnection() m = c.list_metrics()[0] end = datetime.datetime.utcnow() start = end - datetime.timedelta(hours=24 * 14) c.get_metric_statistics(3600 * 24, start, end, m.name, m.namespace, ['Average', 'Sum'])
def test_get_metric_statistics(self): c = CloudWatchConnection() m = c.list_metrics()[0] end = datetime.datetime.now() start = end - datetime.timedelta(hours=24*14) c.get_metric_statistics( 3600*24, start, end, m.name, m.namespace, ['Average', 'Sum'])
class BotoWatchInterface(WatchInterface): conn = None saveclcdata = False def __init__(self, clc_host, access_id, secret_key, token): #boto.set_stream_logger('foo') path='/services/CloudWatch' port=8773 if clc_host[len(clc_host)-13:] == 'amazonaws.com': clc_host = clc_host.replace('ec2', 'monitoring', 1) path = '/' reg = None port=443 reg = RegionInfo(name='eucalyptus', endpoint=clc_host) self.conn = CloudWatchConnection(access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0) self.conn.https_validate_certificates = False self.conn.http_connection_kwargs['timeout'] = 30 def __save_json__(self, obj, name): f = open(name, 'w') json.dump(obj, f, cls=BotoJsonWatchEncoder, indent=2) f.close() def get_metric_statistics(self, period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit): obj = self.conn.get_metric_statistics(period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Statistics.json") return obj def list_metrics(self, next_token=None, dimensions=None, metric_name=None, namespace=None): obj = self.conn.list_metrics(next_token, dimensions, metric_name, namespace) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Metrics.json") return obj def put_metric_data(self, namespace, name, value, timestamp, unit, dimensions, statistics): return self.conn.put_metric_data(namespace, name, value, timestamp, unit, dimensions, statistics) def describe_alarms(self, action_prefix=None, alarm_name_prefix=None, alarm_names=None, max_records=None, state_value=None, next_token=None): obj = self.conn.describe_alarms(action_prefix, alarm_name_prefix, alarm_names, max_records, state_value, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Alarms.json") return obj def delete_alarms(self, alarm_names): return self.conn.delete_alarms(alarm_names) def enable_alarm_actions(self, alarm_names): return self.conn.enable_alarm_actions(alarm_names) def disable_alarm_actions(self, alarm_names): return self.conn.disable_alarm_actions(alarm_names) def put_metric_alarm(self, alarm): return self.conn.put_metric_alarm(alarm)
def get_stats(key, secret, db_id, metric): end = datetime.now() start = end - timedelta(minutes=5) conn = CloudWatchConnection(key, secret) try: res = conn.get_metric_statistics(60, start, end, metric, "AWS/RDS", "Average", {"DBInstanceIdentifier": db_id}) except Exception, e: print(e) sys.exit(1)
def getCloudWatchMetric(): end_time = datetime.datetime.now() # adding 65 seconds due amazon caracteristic end_time = end_time - datetime.timedelta(seconds=65) start_time = end_time - datetime.timedelta(seconds=args.interval) if args.verbose: debug = args.verbose else: debug = 0 regions = boto.ec2.cloudwatch.regions() for reg in regions: if reg.name == args.region: cloudwatch = CloudWatchConnection(is_secure=True, debug=debug, region=reg) cloudwatch_result = None # Check if the metric has collected statistics. If it does not, say so metricsList = cloudwatch.list_metrics(dimensions=dimension, namespace=args.namespace) metricTest = 'Metric:' + args.metric strMetricsList = [] for item in metricsList: strMetricsList.append(str(item)) if metricTest in strMetricsList: # Specify the application load balancer as follows: app/load-balancer-name/1234567890123456 (the final portion of the load balancer ARN) #tested metrics for ALB: TargetResponseTime(Average),RequestCount(Sum),ActiveConnectionCount(Sum),NewConnectionCount(Sum),HTTPCode_Target_4XX_Count(Sum),HTTPCode_Target_5XX_Count(Sum),HealthyHostCount(Average) cloudwatch_result = cloudwatch.get_metric_statistics( args.interval, start_time, end_time, args.metric, args.namespace, statistics=args.statistic, dimensions=dimension) if len(cloudwatch_result) > 0: cloudwatch_result = cloudwatch_result[0] if len(cloudwatch_result) > 0: if len(repr(cloudwatch_result[args.statistic])) > 6: cloudwatch_result = long(cloudwatch_result[args.statistic]) else: cloudwatch_result = float( cloudwatch_result[args.statistic]) else: # Assuming value is 0 if AWS returned empty list cloudwatch_result = 0 print cloudwatch_result else: print 'Unsupported Metric' return
def get_cloudwatch_top_metrics(): conn = CloudWatchConnection() metrics_names = [] next_token = None while True: res = conn.list_metrics(next_token=next_token, dimensions=settings.CLOUDWATCH_DIMENSIONS, namespace=settings.CLOUDWATCH_NAMESPACE) metrics_names.extend([m.name for m in res]) next_token = res.next_token if next_token is None: break # List of tuples like [(metric_name, count), ...] metrics = [] for metric_name in metrics_names: res = conn.get_metric_statistics(int(START_DELTA_AGO.total_seconds()), datetime.datetime.now() - START_DELTA_AGO, datetime.datetime.now(), metric_name, settings.CLOUDWATCH_NAMESPACE, 'Sum', settings.CLOUDWATCH_DIMENSIONS, 'Count') if not res: # Some metrics will not have (or no longer have) results continue count = int(res[0]['Sum']) if count >= TOP_THRESHOLD_COUNT: metrics.append((metric_name, count)) metrics.sort(key=lambda x: x[1], reverse=True) text = 'Responses sent\n----------------------\n' for metric in metrics: metric_name = 'TOTAL' if metric[0] == settings.CLOUDWATCH_TOTAL_SENT_METRIC_NAME else metric[0] if metric_name == settings.CLOUDWATCH_PROCESSING_TIME_METRIC_NAME: continue text += '%s %s\n' % (str(metric[1]).rjust(5), metric_name) return text
class BotoWatchInterface(WatchInterface): conn = None saveclcdata = False def __init__(self, clc_host, access_id, secret_key, token): #boto.set_stream_logger('foo') path='/services/CloudWatch' port=8773 if clc_host[len(clc_host)-13:] == 'amazonaws.com': clc_host = clc_host.replace('ec2', 'monitoring', 1) path = '/' reg = None port=443 reg = RegionInfo(name='eucalyptus', endpoint=clc_host) if boto.__version__ < '2.6': self.conn = CloudWatchConnection(access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0) else: self.conn = CloudWatchConnection(access_id, secret_key, region=reg, port=port, path=path, validate_certs=False, is_secure=True, security_token=token, debug=0) self.conn.http_connection_kwargs['timeout'] = 30 def __save_json__(self, obj, name): f = open(name, 'w') json.dump(obj, f, cls=BotoJsonWatchEncoder, indent=2) f.close() def get_metric_statistics(self, period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit): obj = self.conn.get_metric_statistics(period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Statistics.json") return obj def list_metrics(self, next_token, dimensions, metric_name, namespace): obj = self.conn.list_metrics(next_token, dimensions, metric_name, namespace) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Metrics.json") return obj def put_metric_data(self, namespace, name, value, timestamp, unit, dimensions, statistics): return self.conn.put_metric_data(namespace, name, value, timestamp, unit, dimensions, statistics)
class BotoWatchInterface(WatchInterface): conn = None saveclcdata = False def __init__(self, clc_host, access_id, secret_key, token): # boto.set_stream_logger('foo') path = "/services/CloudWatch" port = 8773 if clc_host[len(clc_host) - 13 :] == "amazonaws.com": clc_host = clc_host.replace("ec2", "monitoring", 1) path = "/" reg = None port = 443 reg = RegionInfo(name="eucalyptus", endpoint=clc_host) self.conn = CloudWatchConnection( access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0 ) self.conn.https_validate_certificates = False self.conn.http_connection_kwargs["timeout"] = 30 def __save_json__(self, obj, name): f = open(name, "w") json.dump(obj, f, cls=BotoJsonWatchEncoder, indent=2) f.close() def get_metric_statistics(self, period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit): obj = self.conn.get_metric_statistics( period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit ) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Statistics.json") return obj def list_metrics(self, next_token=None, dimensions=None, metric_name=None, namespace=None): obj = self.conn.list_metrics(next_token, dimensions, metric_name, namespace) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Metrics.json") return obj def put_metric_data(self, namespace, name, value, timestamp, unit, dimensions, statistics): return self.conn.put_metric_data(namespace, name, value, timestamp, unit, dimensions, statistics) def describe_alarms( self, action_prefix=None, alarm_name_prefix=None, alarm_names=None, max_records=None, state_value=None, next_token=None, ): obj = self.conn.describe_alarms( action_prefix, alarm_name_prefix, alarm_names, max_records, state_value, next_token ) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Alarms.json") return obj def delete_alarms(self, alarm_names): return self.conn.delete_alarms(alarm_names) def enable_alarm_actions(self, alarm_names): return self.conn.enable_alarm_actions(alarm_names) def disable_alarm_actions(self, alarm_names): return self.conn.disable_alarm_actions(alarm_names) def put_metric_alarm(self, alarm): return self.conn.put_metric_alarm(alarm)
class WatchData: datafile = "/var/tmp/watchdata.p" dry = False low_limit = 72 high_limit = 90 high_urgent = 95 stats_period = 60 history_size = 0 def __init__(self): self.name = '' self.instances = 0 self.new_desired = 0 self.desired = 0 self.instances_info = None self.previous_instances = 0 self.action = "" self.action_ts = 0 self.changed_ts = 0 self.total_load = 0 self.avg_load = 0 self.max_load = 0 self.up_ts = 0 self.down_ts = 0 self.max_loaded = None self.loads = {} self.measures = {} self.emergency = False self.history = None self.trend = 0 self.exponential_average = 0 self.ts = 0 def __getstate__(self): """ Don't store these objets """ d = self.__dict__.copy() del d['ec2'] del d['cw'] del d['autoscale'] del d['group'] del d['instances_info'] return d def connect(self, groupname): self.ec2 = boto.connect_ec2() self.cw = CloudWatchConnection() self.autoscale = AutoScaleConnection() self.group = self.autoscale.get_all_groups(names=[groupname])[0] self.instances = len(self.group.instances) self.desired = self.group.desired_capacity self.name = groupname self.ts = int(time.time()) def get_instances_info(self): ids = [i.instance_id for i in self.group.instances] self.instances_info = self.ec2.get_only_instances(instance_ids=ids) def get_CPU_loads(self): """ Read instances load and store in data """ measures = 0 for instance in self.group.instances: load = self.get_instance_CPU_load(instance.instance_id) if load is None: continue measures += 1 self.total_load += load self.loads[instance.instance_id] = load if load > self.max_load: self.max_load = load self.max_loaded = instance.instance_id if measures > 0: self.avg_load = self.total_load / measures def get_instance_CPU_load(self, instance): end = datetime.datetime.now() start = end - datetime.timedelta(seconds=int(self.stats_period * 3)) m = self.cw.get_metric_statistics(self.stats_period, start, end, "CPUUtilization", "AWS/EC2", ["Average"], {"InstanceId": instance}) if len(m) > 0: measures = self.measures[instance] = len(m) ordered = sorted(m, key=lambda x: x['Timestamp']) averages = [x['Average'] for x in ordered] average = reduce(lambda x, y: 0.4 * x + 0.6 * y, averages[-2:]) return average return None @classmethod def from_file(cls): try: data = pickle.load(open(cls.datafile, "rb")) except: data = WatchData() return data def store(self, annotation=False): if self.history_size > 0: if not self.history: self.history = [] self.history.append([ int(time.time()), len(self.group.instances), int(round(self.total_load)), int(round(self.avg_load)) ]) self.history = self.history[-self.history_size:] pickle.dump(self, open(self.datafile, "wb")) if annotation: import utils text = json.dumps(self.__getstate__(), skipkeys=True) utils.store_annotation("ec2_watch", text) def check_too_low(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[ instance] > 1 and self.instances > 1 and load < self.avg_load * 0.2 and load < 4: self.emergency = True self.check_avg_low( ) # Check if the desired instanes can be decreased self.action = "EMERGENCY LOW (%s %5.2f%%) " % (instance, load) self.kill_instance(instance) return True return self.emergency def check_too_high(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[ instance] > 1 and load > self.high_urgent: self.emergency = True self.action = "EMERGENCY HIGH (%s %5.2f%%) " % (instance, load) if self.instances > 1 and load > self.avg_load * 1.5: self.action += " killing bad instance" self.kill_instance(instance) else: self.action += " increasing instances to %d" % ( self.instances + 1, ) self.set_desired(self.instances + 1) return True return self.emergency def check_avg_high(self): threshold = self.high_limit if self.instances == 1: threshold = threshold * 0.9 # Increase faster if there is just one instance if self.avg_load > threshold: self.action = "WARN, high load: %d -> %d " % (self.instances, self.instances + 1) self.set_desired(self.instances + 1) return True def check_avg_low(self): if self.instances <= self.group.min_size: return False if self.total_load / (self.instances - 1) < self.low_limit: self.action = "low load: %d -> %d " % (self.instances, self.instances - 1) self.set_desired(self.instances - 1) def kill_instance(self, id): if self.action: print(self.action) print("Kill instance", id) syslog.syslog( syslog.LOG_INFO, "ec2_watch kill_instance: %s instances: %d (%s)" % (id, self.instances, self.action)) if self.dry: return self.ec2.terminate_instances(instance_ids=[id]) self.action_ts = time.time() def set_desired(self, desired): if self.action: print(self.action) print("Setting instances from %d to %d" % (self.instances, desired)) syslog.syslog( syslog.LOG_INFO, "ec2_watch set_desired: %d -> %d (%s)" % (self.instances, desired, self.action)) if self.dry: return if desired >= self.group.min_size: self.group.set_capacity(desired) self.action_ts = time.time() self.new_desired = desired
class WatchData: datafile = "/tmp/watchdata.p" dry = False low_limit = 70 high_limit = 90 high_urgent = 95 stats_period = 120 history_size = 0 def __init__(self): self.name = '' self.instances = 0 self.new_desired = 0 self.desired = 0 self.instances_info = None self.previous_instances = 0 self.action = "" self.action_ts = 0 self.changed_ts = 0 self.total_load = 0 self.avg_load = 0 self.max_load = 0 self.up_ts = 0 self.down_ts= 0 self.max_loaded = None self.loads = {} self.measures = {} self.emergency = False self.history = None def __getstate__(self): """ Don't store these objets """ d = self.__dict__.copy() del d['ec2'] del d['cw'] del d['autoscale'] del d['group'] del d['instances_info'] return d def connect(self, groupname): self.ec2 = boto.connect_ec2() self.cw = CloudWatchConnection() self.autoscale = AutoScaleConnection() self.group = self.autoscale.get_all_groups(names=[groupname])[0] self.instances = len(self.group.instances) self.desired = self.group.desired_capacity self.name = groupname def get_instances_info(self): ids = [i.instance_id for i in self.group.instances] self.instances_info = self.ec2.get_only_instances(instance_ids = ids) def get_CPU_loads(self): """ Read instances load and store in data """ for instance in self.group.instances: load = self.get_instance_CPU_load(instance.instance_id) if load is None: continue self.total_load += load self.loads[instance.instance_id] = load if load > self.max_load: self.max_load = load self.max_loaded = instance.instance_id self.avg_load = self.total_load/self.instances def get_instance_CPU_load(self, instance): end = datetime.datetime.now() start = end - datetime.timedelta(seconds=300) m = self.cw.get_metric_statistics(self.stats_period, start, end, "CPUUtilization", "AWS/EC2", ["Average"], {"InstanceId": instance}) if len(m) > 0: self.measures[instance] = len(m) ordered = sorted(m, key=lambda x: x['Timestamp'], reverse=True) return ordered[0]['Average'] return None @classmethod def from_file(cls): try: data = pickle.load( open(cls.datafile, "rb" )) except: data = WatchData() return data def store(self, annotation = False): if self.history_size > 0: if not self.history: self.history = [] self.history.append([int(time.time()), len(self.group.instances), int(round(self.total_load))]) self.history = self.history[-self.history_size:] pickle.dump(self, open(self.datafile, "wb" )) if annotation: import utils text = json.dumps(self.__getstate__(), skipkeys=True) utils.store_annotation("ec2_watch", text) def check_too_low(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[instance] > 1 and self.instances > 1 and load < self.avg_load * 0.2 and load < 4: self.emergency = True self.check_avg_low() # Check if the desired instanes can be decreased self.action = "EMERGENCY LOW (%s %5.2f%%) " % (instance, load) self.kill_instance(instance) return True return self.emergency def check_too_high(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[instance] > 1 and load > self.high_urgent: self.emergency = True self.action = "EMERGENCY HIGH (%s %5.2f%%) " % (instance, load) if self.instances > 1 and load > self.avg_load * 1.5: self.action += " killing bad instance" self.kill_instance(instance) else: self.action += " increasing instances to %d" % (self.instances+1,) self.set_desired(self.instances+1) return True return self.emergency def check_avg_high(self): threshold = self.high_limit if self.instances == 1: threshold = threshold * 0.9 # Increase faster if there is just one instance if self.avg_load > threshold: self.action = "WARN, high load: %d -> %d " % (self.instances, self.instances + 1) self.set_desired(self.instances + 1) return True def check_avg_low(self): if self.instances <= self.group.min_size: return False if self.total_load/(self.instances-1) < self.low_limit: self.action = "low load: %d -> %d " % (self.instances, self.instances - 1) self.set_desired(self.instances - 1) def kill_instance(self, id): if self.action: print self.action print "Kill instance", id syslog.syslog(syslog.LOG_INFO, "ec2_watch kill_instance: %s instances: %d (%s)" % (id, self.instances, self.action)) if self.dry: return self.ec2.terminate_instances(instance_ids=[id]) self.action_ts = time.time() def set_desired(self, desired): if self.action: print self.action print "Setting instances from %d to %d" % (self.instances, desired) syslog.syslog(syslog.LOG_INFO, "ec2_watch set_desired: %d -> %d (%s)" % (self.instances, desired, self.action)) if self.dry: return if desired >= self.group.min_size: self.group.set_capacity(desired) self.action_ts = time.time() self.new_desired = desired
"value": None }, "FreeStorageSpace": { "type": "float", "value": None } } end = datetime.datetime.now() start = end - datetime.timedelta(minutes=5) conn = CloudWatchConnection(options.access_key, options.secret_key) for k, vh in metrics.items(): try: res = conn.get_metric_statistics( 60, start, end, k, "AWS/RDS", "Average", {"DBInstanceIdentifier": options.instance_id}) except Exception, e: print "status err Error running rds_stats: %s" % e.error_message sys.exit(1) average = res[-1]["Average"] # last item in result set if (k == "FreeStorageSpace" or k == "FreeableMemory"): average = average / 1024.0**3.0 if vh["type"] == "float": metrics[k]["value"] = "%.4f" % average if vh["type"] == "int": metrics[k]["value"] = "%i" % average # Iterating through the Array twice seems inelegant, but I don't know Python # well enough to do it the right way. print "status ok rds success"
if args.verbose: debug=args.verbose else: debug=0 cloudwatch = CloudWatchConnection(aws_access_key_id=aws_key, aws_secret_access_key=aws_secret, is_secure=True, debug=debug) cloudwatch_result = None if args.dimension: dimension = {} dimensions = args.dimension.split('=') dimension[dimensions[0]] = dimensions[1] cloudwatch_result = cloudwatch.get_metric_statistics(args.interval, start_time, end_time, args.metric, args.namespace, args.statistic, dimensions=dimension) if args.verbose: print "DEBUG:", cloudwatch_result else: cloudwatch_result = cloudwatch.get_metric_statistics(args.interval, start_time, end_time, args.metric, args.namespace, args.statistic) if len(cloudwatch_result)>0: cloudwatch_result = cloudwatch_result[0] cloudwatch_result = float(cloudwatch_result[args.statistic]) else: # Assuming value is 0 if AWS returned empty list cloudwatch_result = 0.0 print cloudwatch_result
"FreeableMemory":{"type":"float", "value":None}, "ReadIOPS":{"type":"int", "value":None}, "WriteLatency":{"type":"float", "value":None}, "WriteThroughput":{"type":"float", "value":None}, "WriteIOPS":{"type":"int", "value":None}, "SwapUsage":{"type":"float", "value":None}, "ReadThroughput":{"type":"float", "value":None}, "FreeStorageSpace":{"type":"float", "value":None}} end = datetime.datetime.now() start = end - datetime.timedelta(minutes=5) conn = CloudWatchConnection(options.access_key, options.secret_key) for k,vh in metrics.items(): try: res = conn.get_metric_statistics(60, start, end, k, "AWS/RDS", "Average", {"DBInstanceIdentifier": options.instance_id}) except Exception, e: print "status err Error running rds_stats: %s" % e.error_message sys.exit(1) average = res[-1]["Average"] # last item in result set if (k == "FreeStorageSpace" or k == "FreeableMemory"): average = average / 1024.0**3.0 if vh["type"] == "float": metrics[k]["value"] = "%.4f" % average if vh["type"] == "int": metrics[k]["value"] = "%i" % average # Iterating through the Array twice seems inelegant, but I don't know Python # well enough to do it the right way. print "status ok rds success" for k,vh in metrics.items():