def collect_performance_metrics(account, start_date, end_date): aws_service = AwsService() db_service = DbService() for service in account.services: print(service.name) metrics = service.list_metrics(start_date, end_date) for metric in metrics: statistics = 'Average' namespace = metric.namespace instance_value = metric.dimension_value instance_id = metric.dimension_name period = 3600 start_time = start_date end_time = end_date datapoints = aws_service.get_aws_metric_statistics( instance_id, instance_value, metric.name, period, start_time, end_time, namespace, statistics) if datapoints != []: metric.datapoints = datapoints service.metrics.append(metric) db_service.account_bulk_insert_elastic(account)
class DataParser: def __init__(self): self.db = DbService() def parse(self, url): try: html = requests.get(url) doc = lxml.html.fromstring(html.content) main_data = doc.xpath('//div[@id="content_left"]')[0] # parse the title and the username fields title = main_data.xpath( '//div[@class="paste_box_line1"]')[0].attrib['title'] user_div = main_data.xpath( '//div[@class="paste_box_line2"]/text()') str_list = list(filter(None, user_div)) str_list = [s.rstrip() for s in str_list] str_list = list(filter(None, str_list)) user_name = str_list[0].lower() if 'Guest'.lower() in user_name or 'Unknown'.lower() in user_name\ or 'Anonymous'.lower() in user_name\ or len(user_name) == 0: user_name = 'unknown' if 'Unknown'.lower() in title or len(title) == 0: title = 'unknown' # parse the date date = main_data.xpath('//div[@class="paste_box_line2"]/span' )[0].attrib['title'].split(' ') date[1] = date[ 1][:-2] # remove the `th` ending from the day of the month if len(date[1]) == 1: date[ 1] = f'0{date[1]}' # padding '0' if the day of the month is 1-9 del date[2] # remove the 'of' version from the dates list date = ' '.join(date) # cast the dates to string instead of list datetime_object = datetime.datetime.strptime( date, '%A %d %B %Y %I:%M:%S %p CDT') # parse the content content_div = main_data.xpath('//div[@id="selectable"]/ol')[0] total_content = [] for content in content_div: total_content.append(content.text_content()) total_content = ' '.join(total_content).strip( ) # cast the content to string instead of list # create object with all the relevant data data = PasteBinData(url, user_name, title, datetime_object, total_content) self.db.insert_data(data) except IndexError as e: print(f'error parsing {url}: {str(e)}\n') except UnicodeDecodeError as e: print( f'error parsing {url} - unicode does not match utf-8: {str(e)}\n' )
def collect_account_services_cost(start_date, end_date): aws_service = AwsService() db_service = DbService() account = Account(start_date, end_date) account.get_cost_and_usage(start_date, end_date) account.calc_services_forecast() db_service.account_bulk_insert_elastic(account) return account
def collect_ec2_utilization(ec2, metric_list, account_number, start_date, end_date): aws_service = AwsService() db_service = DbService() frames = [] for metric_name in metric_list: statistics = 'Average' namespace = 'AWS/EC2' instance_id = ec2.instance_id period = 3600 start_time = start_date end_time = end_date df = aws_service.get_aws_metric_statistics(ec2, metric_name, period, start_time, end_time, namespace, statistics) if not df.empty: frames.append(df) df_cost = aws_service.get_aws_cost_and_usage_with_resources(ec2 = ec2, start_time = start_date, end_time = end_date, granularity = "HOURLY", metrics = "AmortizedCost") if not df_cost.empty: frames.append(df_cost) # merge the different dataframes (cpu_utilization, network_in...) into one dataframe based on start_time try: if not frames == []: df_merged = db_service.merge_ec2_metrics_on_start_time(frames) df_merged['account_number'] = account_number #convert the merged dataframe to class members to ease insert to Elasticsearch ec2.performance_counters_list = db_service.create_performance_counters_list(df_merged, metric_list) #insert the data into proper elastic index response = db_service.ec2_bulk_insert_elastic(ec2) except ValueError as e: #happens when aws returns empty values pass
def save_subscription(email, nickname): try: db = DbService() db.db_connect() sub_service = SubscriptionDbService() id = sub_service.save_subscription(db.get_cursor(), email, nickname, 1) db.commit() db.close() return id except psycopg2.Error as e: logging.error(e) except Exception as e: logging.error(e) return None
def collect_accounts_cost(account_number, start_date, end_date): aws_service = AwsService() db_service = DbService() # in ordder to manipulate dates (compare, add ...), we must convert to datetime accounts_visibility_last_update_datetime = datetime.strptime(start_date, '%Y-%m-%d') granularity = 'DAILY' metrics = 'AMORTIZED_COST' groupby = 'SERVICE' # get cost per account on the last month response = aws_service.get_aws_cost_and_usage(account_number, start_date, end_date, granularity, metrics, groupby) #create objects to hold the accounts cost data account_list = db_service.create_account(account_number, response) account_list_with_forecast = add_forcase_to_account_list(account_list) db_service.print_account_list(account_list) #insert accounts to elastic db_service.account_bulk_insert_elastic(account_list)
from uuid import UUID from flask import Flask, jsonify, request, make_response, g from db_service import DbService from messaging_service import MessagingService app = Flask(__name__) db_service = DbService() messaging_service = MessagingService() def create_error_response(msg, code = 400): message = jsonify({ 'message': msg }) return make_response(message, code) def empty(): return jsonify({}) def index(): return jsonify(success=True) def is_uuid(val): try: UUID(val, version=4) return True except ValueError: return False @app.route('/photos/pending') def get_photos_pending(): try: results = db_service.get_by_status('pending') return jsonify(results)
def __init__(self): self.db = DbService()
def using_boto3(): ''' session = boto3.Session(region_name="eu-west-1") ec2 = session.resource('ec2') instances = ec2.instances.filter() for instance in instances: print(instance.id, instance.instance_type, instance.launch_time, instance.ebs_optimized, instance.state['Name'], instance.tags[0]['Value']) return ''' parallel_chunk_size = 2 ec2_list = [1, 2, 3, 4] for i in range(0, len(ec2_list), parallel_chunk_size): chunk = ec2_list[i:i + parallel_chunk_size] for ec2 in chunk: print(chunk) print("working on " + str(ec2)) #collect_ec2_utilization(ec2, ec2_metric_list, region) return db_service = DbService() cloudwatch = boto3.client('cloudwatch', region_name="eu-west-1") metric_name = 'NetworkOut' response = cloudwatch.get_metric_statistics( Namespace="AWS/EC2", Dimensions=[{ 'Name': 'InstanceId', 'Value': 'i-0d4dc0ddfe07c9259' }], MetricName=metric_name, StartTime='2020-12-06T00:00:00', EndTime='2020-12-07T00:00:00', Period=3600, Statistics=['Average'] #, #Unit='Bytes' ) datapoints = response["Datapoints"] df = pandas.DataFrame(columns=[metric_name, "start_time"]) #df = pandas.DataFrame({metric_name: pandas.Series([], dtype='float64'), "start_time": pandas.Series([], dtype='object')}) for datapoint in datapoints: new_row = { metric_name: datapoint["Average"], "start_time": datapoint["Timestamp"] } df = df.append(new_row, ignore_index=True) if metric_name == 'CPUUtilization': df['is_cpu_utilization_idle'] = np.where( df[metric_name] < Thresholds.cpu_utilization_threshold, 1, 0) elif metric_name == 'NetworkIn': df['is_network_in_idle'] = np.where( df[metric_name] < Thresholds.network_in_threshold, 1, 0) elif metric_name == 'NetworkOut': df['is_network_out_idle'] = np.where( df[metric_name] < Thresholds.network_out_threshold, 1, 0) elif metric_name == 'NetworkPacketsIn': df['is_network_packets_in_idle'] = np.where( df[metric_name] < Thresholds.network_packets_in_threshold, 1, 0) elif metric_name == 'NetworkPacketsOut': df['is_network_packets_out_idle'] = np.where( df[metric_name] < Thresholds.network_packets_out_threshold, 1, 0) elif metric_name == 'DiskWriteOps': df['is_disk_write_ops_idle'] = np.where( df[metric_name] < Thresholds.disk_write_ops_threshold, 1, 0) elif metric_name == 'DiskReadOps': df['is_disk_read_ops_idle'] = np.where( df[metric_name] < Thresholds.disk_read_ops_threshold, 1, 0) elif metric_name == 'DiskWriteBytes': df['is_disk_write_bytes_idle'] = np.where( df[metric_name] < Thresholds.disk_write_bytes_threshold, 1, 0) elif metric_name == 'DiskReadBytes': df['is_disk_read_bytes_idle'] = np.where( df[metric_name] < Thresholds.disk_read_bytes_threshold, 1, 0) df2 = df print(df2) #print(df.info()) metric_name = "CPUUtilization" response = cloudwatch.get_metric_statistics( Namespace="AWS/EC2", Dimensions=[{ 'Name': 'InstanceId', 'Value': 'i-0d4dc0ddfe07c9259' }], MetricName=metric_name, StartTime='2020-12-06T00:00:00', EndTime='2020-12-07T00:00:00', Period=3600, Statistics=['Average']) datapoints = response["Datapoints"] df = pandas.DataFrame(columns=[metric_name, "start_time"]) for datapoint in datapoints: new_row = { metric_name: datapoint["Average"], "start_time": datapoint["Timestamp"] } df = df.append(new_row, ignore_index=True) if metric_name == 'CPUUtilization': df['is_cpu_utilization_idle'] = np.where( df[metric_name] < Thresholds.cpu_utilization_threshold, 1, 0) elif metric_name == 'NetworkIn': df['is_network_in_idle'] = np.where( df[metric_name] < Thresholds.network_in_threshold, 1, 0) elif metric_name == 'NetworkOut': df['is_network_out_idle'] = np.where( df[metric_name] < Thresholds.network_out_threshold, 1, 0) elif metric_name == 'NetworkPacketsIn': df['is_network_packets_in_idle'] = np.where( df[metric_name] < Thresholds.network_packets_in_threshold, 1, 0) elif metric_name == 'NetworkPacketsOut': df['is_network_packets_out_idle'] = np.where( df[metric_name] < Thresholds.network_packets_out_threshold, 1, 0) elif metric_name == 'DiskWriteOps': df['is_disk_write_ops_idle'] = np.where( df[metric_name] < Thresholds.disk_write_ops_threshold, 1, 0) elif metric_name == 'DiskReadOps': df['is_disk_read_ops_idle'] = np.where( df[metric_name] < Thresholds.disk_read_ops_threshold, 1, 0) elif metric_name == 'DiskWriteBytes': df['is_disk_write_bytes_idle'] = np.where( df[metric_name] < Thresholds.disk_write_bytes_threshold, 1, 0) elif metric_name == 'DiskReadBytes': df['is_disk_read_bytes_idle'] = np.where( df[metric_name] < Thresholds.disk_read_bytes_threshold, 1, 0) print(df) frames = [] frames.append(df) frames.append(df2) df3 = db_service.merge_metrics_on_start_time(frames) print(df3) #df2 = db_service.convert_csv_to_dataframe('metric_files/NetworkOut_i-0d4dc0ddfe07c9259.csv') #print(df2.info()) #print.pprint(response) #print(response) #print(type(response)) #df = pandas.DataFrame.from_dict(response) #print(df) return client = boto3.client('ec2') response = client.describe_instances() response = response['Reservations'] #response = response['Instances'][0] #print(response) #InstanceType,LaunchTime,State.Name,EbsOptimized,Tags[0].Value] for i in response: for s in i['Instances']: print(s['InstanceId']) print(s['InstanceType']) print(s['LaunchTime']) print(s['EbsOptimized']) print(s['State']['Name']) print(s['Tags'][0]['Value'])