示例#1
0
def collect_performance_metrics(account, start_date, end_date):

    aws_service = AwsService()
    db_service = DbService()

    for service in account.services:

        print(service.name)

        metrics = service.list_metrics(start_date, end_date)

        for metric in metrics:

            statistics = 'Average'
            namespace = metric.namespace
            instance_value = metric.dimension_value
            instance_id = metric.dimension_name
            period = 3600
            start_time = start_date
            end_time = end_date

            datapoints = aws_service.get_aws_metric_statistics(
                instance_id, instance_value, metric.name, period, start_time,
                end_time, namespace, statistics)

            if datapoints != []:

                metric.datapoints = datapoints
                service.metrics.append(metric)

        db_service.account_bulk_insert_elastic(account)
示例#2
0
class DataParser:
    def __init__(self):
        self.db = DbService()

    def parse(self, url):
        try:
            html = requests.get(url)
            doc = lxml.html.fromstring(html.content)
            main_data = doc.xpath('//div[@id="content_left"]')[0]

            # parse the title and the username fields
            title = main_data.xpath(
                '//div[@class="paste_box_line1"]')[0].attrib['title']
            user_div = main_data.xpath(
                '//div[@class="paste_box_line2"]/text()')
            str_list = list(filter(None, user_div))
            str_list = [s.rstrip() for s in str_list]
            str_list = list(filter(None, str_list))
            user_name = str_list[0].lower()
            if 'Guest'.lower() in user_name or 'Unknown'.lower() in user_name\
                    or 'Anonymous'.lower() in user_name\
                    or len(user_name) == 0:
                user_name = 'unknown'
            if 'Unknown'.lower() in title or len(title) == 0:
                title = 'unknown'

            # parse the date
            date = main_data.xpath('//div[@class="paste_box_line2"]/span'
                                   )[0].attrib['title'].split(' ')
            date[1] = date[
                1][:-2]  # remove the `th` ending from the day of the month
            if len(date[1]) == 1:
                date[
                    1] = f'0{date[1]}'  # padding '0' if the day of the month is 1-9
            del date[2]  # remove the 'of' version from the dates list
            date = ' '.join(date)  # cast the dates to string instead of list
            datetime_object = datetime.datetime.strptime(
                date, '%A %d %B %Y %I:%M:%S %p CDT')

            # parse the content
            content_div = main_data.xpath('//div[@id="selectable"]/ol')[0]
            total_content = []
            for content in content_div:
                total_content.append(content.text_content())

            total_content = ' '.join(total_content).strip(
            )  # cast the content to string instead of list

            # create object with all the relevant data
            data = PasteBinData(url, user_name, title, datetime_object,
                                total_content)
            self.db.insert_data(data)

        except IndexError as e:
            print(f'error parsing {url}: {str(e)}\n')
        except UnicodeDecodeError as e:
            print(
                f'error parsing {url} - unicode does not match utf-8: {str(e)}\n'
            )
示例#3
0
def collect_account_services_cost(start_date, end_date):

    aws_service = AwsService()
    db_service = DbService()

    account = Account(start_date, end_date)

    account.get_cost_and_usage(start_date, end_date)

    account.calc_services_forecast()

    db_service.account_bulk_insert_elastic(account)

    return account
def collect_ec2_utilization(ec2, metric_list, account_number, start_date, end_date):

    aws_service = AwsService()    
    db_service = DbService()

    frames = []
                
    for metric_name in metric_list:
        statistics = 'Average'
        namespace = 'AWS/EC2'
        instance_id = ec2.instance_id
        period = 3600
        start_time = start_date
        end_time = end_date
                                            
        df = aws_service.get_aws_metric_statistics(ec2, metric_name, period, start_time, end_time, namespace, statistics)   
            
        
        if not df.empty:
            frames.append(df)              

    df_cost = aws_service.get_aws_cost_and_usage_with_resources(ec2 = ec2, start_time = start_date, end_time = end_date, granularity = "HOURLY", metrics = "AmortizedCost")           

    if not df_cost.empty:
        frames.append(df_cost)    
        
    # merge the different dataframes (cpu_utilization, network_in...) into one dataframe based on start_time    
    try:    
        if not frames == []:

            df_merged = db_service.merge_ec2_metrics_on_start_time(frames)           

            df_merged['account_number'] = account_number
                    
            #convert the merged dataframe to class members to ease insert to Elasticsearch
            ec2.performance_counters_list = db_service.create_performance_counters_list(df_merged, metric_list)

            #insert the data into proper elastic index
            response =  db_service.ec2_bulk_insert_elastic(ec2)

    except ValueError as e:
        #happens when aws returns empty values 
        pass   
示例#5
0
def save_subscription(email, nickname):
    try:
        db = DbService()
        db.db_connect()
        sub_service = SubscriptionDbService()
        id = sub_service.save_subscription(db.get_cursor(), email, nickname, 1)
        db.commit()
        db.close()

        return id
    except psycopg2.Error as e:
        logging.error(e)
    except Exception as e:
        logging.error(e)

    return None
def collect_accounts_cost(account_number, start_date, end_date):

    aws_service = AwsService() 
    db_service = DbService()

    # in ordder to manipulate dates (compare, add ...), we must convert to datetime
    accounts_visibility_last_update_datetime = datetime.strptime(start_date, '%Y-%m-%d') 

    granularity = 'DAILY'
    metrics = 'AMORTIZED_COST'
    groupby = 'SERVICE'

    # get cost per account on the last month
    response = aws_service.get_aws_cost_and_usage(account_number, start_date, end_date, granularity, metrics, groupby)

    #create objects to hold the accounts cost data
    account_list = db_service.create_account(account_number, response)

    account_list_with_forecast = add_forcase_to_account_list(account_list)

    db_service.print_account_list(account_list)

    #insert accounts to elastic
    db_service.account_bulk_insert_elastic(account_list)    
示例#7
0
from uuid import UUID
from flask import Flask, jsonify, request, make_response, g
from db_service import DbService
from messaging_service import MessagingService

app = Flask(__name__)
db_service = DbService()
messaging_service = MessagingService()

def create_error_response(msg, code = 400):
    message = jsonify({ 'message': msg })
    return make_response(message, code)

def empty():
    return jsonify({})

def index():
    return jsonify(success=True)

def is_uuid(val):
    try:
        UUID(val, version=4)
        return True
    except ValueError:
        return False

@app.route('/photos/pending')
def get_photos_pending():
    try:
        results = db_service.get_by_status('pending')
        return jsonify(results)
示例#8
0
 def __init__(self):
     self.db = DbService()
示例#9
0
def using_boto3():
    '''
    session = boto3.Session(region_name="eu-west-1")
    ec2 = session.resource('ec2')

    instances = ec2.instances.filter()

    for instance in instances:
        print(instance.id, instance.instance_type, instance.launch_time, instance.ebs_optimized, instance.state['Name'], instance.tags[0]['Value'])

    return
    '''
    parallel_chunk_size = 2
    ec2_list = [1, 2, 3, 4]
    for i in range(0, len(ec2_list), parallel_chunk_size):
        chunk = ec2_list[i:i + parallel_chunk_size]
        for ec2 in chunk:
            print(chunk)
            print("working on " + str(ec2))
            #collect_ec2_utilization(ec2, ec2_metric_list, region)
    return
    db_service = DbService()

    cloudwatch = boto3.client('cloudwatch', region_name="eu-west-1")
    metric_name = 'NetworkOut'
    response = cloudwatch.get_metric_statistics(
        Namespace="AWS/EC2",
        Dimensions=[{
            'Name': 'InstanceId',
            'Value': 'i-0d4dc0ddfe07c9259'
        }],
        MetricName=metric_name,
        StartTime='2020-12-06T00:00:00',
        EndTime='2020-12-07T00:00:00',
        Period=3600,
        Statistics=['Average']  #,
        #Unit='Bytes'
    )

    datapoints = response["Datapoints"]
    df = pandas.DataFrame(columns=[metric_name, "start_time"])

    #df = pandas.DataFrame({metric_name: pandas.Series([], dtype='float64'), "start_time": pandas.Series([], dtype='object')})

    for datapoint in datapoints:
        new_row = {
            metric_name: datapoint["Average"],
            "start_time": datapoint["Timestamp"]
        }
        df = df.append(new_row, ignore_index=True)
    if metric_name == 'CPUUtilization':
        df['is_cpu_utilization_idle'] = np.where(
            df[metric_name] < Thresholds.cpu_utilization_threshold, 1, 0)
    elif metric_name == 'NetworkIn':
        df['is_network_in_idle'] = np.where(
            df[metric_name] < Thresholds.network_in_threshold, 1, 0)
    elif metric_name == 'NetworkOut':
        df['is_network_out_idle'] = np.where(
            df[metric_name] < Thresholds.network_out_threshold, 1, 0)
    elif metric_name == 'NetworkPacketsIn':
        df['is_network_packets_in_idle'] = np.where(
            df[metric_name] < Thresholds.network_packets_in_threshold, 1, 0)
    elif metric_name == 'NetworkPacketsOut':
        df['is_network_packets_out_idle'] = np.where(
            df[metric_name] < Thresholds.network_packets_out_threshold, 1, 0)
    elif metric_name == 'DiskWriteOps':
        df['is_disk_write_ops_idle'] = np.where(
            df[metric_name] < Thresholds.disk_write_ops_threshold, 1, 0)
    elif metric_name == 'DiskReadOps':
        df['is_disk_read_ops_idle'] = np.where(
            df[metric_name] < Thresholds.disk_read_ops_threshold, 1, 0)
    elif metric_name == 'DiskWriteBytes':
        df['is_disk_write_bytes_idle'] = np.where(
            df[metric_name] < Thresholds.disk_write_bytes_threshold, 1, 0)
    elif metric_name == 'DiskReadBytes':
        df['is_disk_read_bytes_idle'] = np.where(
            df[metric_name] < Thresholds.disk_read_bytes_threshold, 1, 0)

    df2 = df
    print(df2)
    #print(df.info())

    metric_name = "CPUUtilization"

    response = cloudwatch.get_metric_statistics(
        Namespace="AWS/EC2",
        Dimensions=[{
            'Name': 'InstanceId',
            'Value': 'i-0d4dc0ddfe07c9259'
        }],
        MetricName=metric_name,
        StartTime='2020-12-06T00:00:00',
        EndTime='2020-12-07T00:00:00',
        Period=3600,
        Statistics=['Average'])

    datapoints = response["Datapoints"]
    df = pandas.DataFrame(columns=[metric_name, "start_time"])
    for datapoint in datapoints:
        new_row = {
            metric_name: datapoint["Average"],
            "start_time": datapoint["Timestamp"]
        }
        df = df.append(new_row, ignore_index=True)
    if metric_name == 'CPUUtilization':
        df['is_cpu_utilization_idle'] = np.where(
            df[metric_name] < Thresholds.cpu_utilization_threshold, 1, 0)
    elif metric_name == 'NetworkIn':
        df['is_network_in_idle'] = np.where(
            df[metric_name] < Thresholds.network_in_threshold, 1, 0)
    elif metric_name == 'NetworkOut':
        df['is_network_out_idle'] = np.where(
            df[metric_name] < Thresholds.network_out_threshold, 1, 0)
    elif metric_name == 'NetworkPacketsIn':
        df['is_network_packets_in_idle'] = np.where(
            df[metric_name] < Thresholds.network_packets_in_threshold, 1, 0)
    elif metric_name == 'NetworkPacketsOut':
        df['is_network_packets_out_idle'] = np.where(
            df[metric_name] < Thresholds.network_packets_out_threshold, 1, 0)
    elif metric_name == 'DiskWriteOps':
        df['is_disk_write_ops_idle'] = np.where(
            df[metric_name] < Thresholds.disk_write_ops_threshold, 1, 0)
    elif metric_name == 'DiskReadOps':
        df['is_disk_read_ops_idle'] = np.where(
            df[metric_name] < Thresholds.disk_read_ops_threshold, 1, 0)
    elif metric_name == 'DiskWriteBytes':
        df['is_disk_write_bytes_idle'] = np.where(
            df[metric_name] < Thresholds.disk_write_bytes_threshold, 1, 0)
    elif metric_name == 'DiskReadBytes':
        df['is_disk_read_bytes_idle'] = np.where(
            df[metric_name] < Thresholds.disk_read_bytes_threshold, 1, 0)
    print(df)

    frames = []
    frames.append(df)
    frames.append(df2)

    df3 = db_service.merge_metrics_on_start_time(frames)
    print(df3)

    #df2 = db_service.convert_csv_to_dataframe('metric_files/NetworkOut_i-0d4dc0ddfe07c9259.csv')
    #print(df2.info())
    #print.pprint(response)

    #print(response)
    #print(type(response))

    #df = pandas.DataFrame.from_dict(response)
    #print(df)
    return

    client = boto3.client('ec2')

    response = client.describe_instances()

    response = response['Reservations']

    #response = response['Instances'][0]

    #print(response)

    #InstanceType,LaunchTime,State.Name,EbsOptimized,Tags[0].Value]

    for i in response:
        for s in i['Instances']:
            print(s['InstanceId'])
            print(s['InstanceType'])
            print(s['LaunchTime'])
            print(s['EbsOptimized'])
            print(s['State']['Name'])
            print(s['Tags'][0]['Value'])