示例#1
0
    def get(self):
        # get cloudtrail ec2 type changes for all instances

        # if not configured, just return
        if self.cache_man is None:
            df_fresh = super().get()
            return df_fresh

        # check cache first
        if self.cache_man.isReady():
            df_cache = self.cache_man.get(self.cache_key)
            if df_cache is not None:
                logger.debug("Found cloudtrail data in redis cache")
                return df_cache

        # if no cache, then download
        df_fresh = super().get()

        # if caching enabled, store it for later fetching
        # https://stackoverflow.com/a/57986261/4126114
        if self.cache_man.isReady():
            self.cache_man.set(self.cache_key, df_fresh)

        # done
        return df_fresh
示例#2
0
    def connect(self):
        logger.info("Connecting to redis cache")
        logger.debug(self.redis_args)
        import pyarrow as pa

        self.redis_client = redis.Redis(**self.redis_args)
        self.pyarrow_context = pa.default_serialization_context()
示例#3
0
    def diffLatest(self):
        if self.latest_df is None:
            raise IsitfitCliError(
                "Internal dev error: Call TagsPush::pullLatest before TagsPush::diffLatest",
                self.ctx)

        if self.csv_df is None:
            raise IsitfitCliError(
                "Internal dev error: Call TagsPush::read_csv before TagsPush::diffLatest",
                self.ctx)

        # diff columns
        from .tagsCsvDiff import TagsCsvDiff
        td = TagsCsvDiff(self.latest_df, self.csv_df)
        td.noChanges()
        td.noNewInstances()
        td.getDiffCols()
        td.renamedTags()
        td.newTags()
        td.droppedTags()
        # print(td.migrations, td.old_minus_new, td.new_minus_old)
        td.anyRemaining()

        # get migrations
        import pandas as pd
        self.mig_df = pd.DataFrame(td.migrations,
                                   columns=['action', 'old', 'new'])
        logger.debug("")
        logger.debug("Tag migrations")
        if self.mig_df.shape[0] == 0:
            logger.debug("None")
        else:
            logger.debug(self.mig_df)

        logger.debug("")
示例#4
0
    def fetch(self):
        logger.debug("TagsSuggestBasic::fetch")
        logger.info("Counting EC2 instances")
        n_ec2_total = len(list(self.ec2_resource.instances.all()))
        msg_total = "Found a total of %i EC2 instances" % n_ec2_total
        if n_ec2_total == 0:
            from isitfit.cli.click_descendents import IsitfitCliError
            raise IsitfitCliError(msg_total, self.ctx)

        logger.warning(msg_total)

        self.tags_list = []
        from tqdm import tqdm
        desc = "Scanning EC2 instances"
        ec2_all = self.ec2_resource.instances.all()
        for ec2_obj in tqdm(ec2_all, total=n_ec2_total, desc=desc, initial=1):
            if ec2_obj.tags is None:
                tags_dict = {}
            else:
                tags_dict = self.tags_to_dict(ec2_obj)

            tags_dict['instance_id'] = ec2_obj.instance_id
            self.tags_list.append(tags_dict)

        # convert to pandas dataframe when done
        self.tags_df = self._list_to_df()
示例#5
0
    def _handleEvent(self, event):
        if 'CloudTrailEvent' not in event:
            logger.debug("No CloudTrailEvent key in event. Skipping")
            return None  # ignore this situation

        ce_dict = json.loads(event['CloudTrailEvent'])

        if 'requestParameters' not in ce_dict:
            logger.debug(
                "No requestParameters key in event['CloudTrailEvent']. Skipping"
            )
            return None  # ignore this situation

        rp_dict = ce_dict['requestParameters']

        import jmespath
        nodeType = jmespath.search('instanceType', rp_dict)
        numberOfNodes = jmespath.search('numberOfNodes', rp_dict)

        ts_obj = event['EventTime']
        # ts_obj = dt.datetime.utcfromtimestamp(ts_int)
        # ts_str = ts_obj.strftime('%Y-%m-%d %H:%M:%S')

        result = {
            'ServiceName': 'Redshift',
            'ResourceName': rp_dict['clusterIdentifier'],
            'EventTime': ts_obj,  # ts_str,
            'EventName': self.eventName,
            'ResourceSize1': nodeType,
            'ResourceSize2': numberOfNodes,
        }

        return result
示例#6
0
    def get(self):
        # get cloudtrail ec2 type changes for all instances
        logger.debug("Downloading cloudtrail data (from %i regions)" %
                     len(self.region_include))
        df_2 = []
        import boto3

        # add some spaces for aligning the progress bars
        desc = "Cloudtrail events in all regions"
        desc = "%-50s" % desc

        iter_wrap = self.region_include
        iter_wrap = self.tqdmman(iter_wrap,
                                 desc=desc,
                                 total=len(self.region_include))
        for region_name in iter_wrap:
            boto3.setup_default_session(region_name=region_name)
            df_1 = super().get()
            df_1[
                'Region'] = region_name  # bugfix, field name was "region" (lower-case)
            df_2.append(df_1.reset_index())

        # concatenate
        df_3 = pd.concat(df_2, axis=0, sort=False)

        # check if empty
        if df_3.shape[0] == 0:
            return df_3

        # sort again
        df_3 = df_3.set_index(
            ["Region", "ServiceName", "ResourceName",
             "EventTime"]).sort_index()

        return df_3
示例#7
0
 def _try_cloudwatch(self, host_id, host_region, host_created):
     try:
         df_cw = self.cloudwatch.handle_main({'Region': host_region},
                                             host_id, host_created)
         return df_cw, "ok"
     except NoCloudwatchException:
         logger.debug("Cloudwatch: data not found for %s" % host_id)
         return None, "no data"
    def __init__(self, ctx):
        logger.debug("TagsSuggestAdvanced::constructor")

        # api manager
        self.api_man = ApiMan(tryAgainIn=2, ctx=ctx)

        # proceed with parent constructor
        return super().__init__(ctx)
示例#9
0
 def __init__(self, ctx):
     logger.debug("TagsSuggestBasic::constructor")
     # boto3 ec2 and cloudwatch data
     import boto3
     self.ec2_resource = boto3.resource('ec2')
     self.tags_list = []
     self.tags_df = None
     self.ctx = ctx
示例#10
0
    def set_ndays(self, ndays):
        self.ndays = ndays

        # set start/end dates
        dt_now_d = dt.datetime.now().replace(tzinfo=pytz.utc)
        self.StartTime = dt_now_d - dt.timedelta(days=self.ndays)
        self.EndTime = dt_now_d
        logger.debug("Metrics start..end: %s .. %s" %
                     (self.StartTime, self.EndTime))
示例#11
0
 def suggest(self):
     logger.debug("TagsSuggestBasic::suggest")
     logger.info("Generating suggested tags")
     from .tagsImplier import TagsImplierMain
     tags_implier = TagsImplierMain(self.tags_df)
     self.suggested_df = tags_implier.imply()
     self.csv_fn = dump_df_to_csv(self.suggested_df,
                                  'isitfit-tags-suggestBasic-')
     self.suggested_shape = self.suggested_df.shape
示例#12
0
    def get_metrics_all(self, aws_id):
        # convert aws ID to datadog hostname
        if self.map_aws_dd is None:
            self.build_map_aws_dd()
            if self.map_aws_dd is None:
                raise Exception("Failed to build aws-datadog ID map")

        # fail if not found
        if aws_id not in self.map_aws_dd:
            raise HostNotFoundInDdg(
                "Did not find host aws ID %s in datadog reverse map" % aws_id)

        dd_hostname = self.map_aws_dd[aws_id]

        # FIXME: we already have cpu from cloudwatch, so maybe just focus on ram from datadog
        logger.debug(
            "Fetching datadog data for aws ID %s, datadog hostname %s" %
            (aws_id, dd_hostname))
        ddgL2 = DatadogAssistant(self.start, self.end, dd_hostname)
        df_cpu_max = ddgL2.get_metrics_cpu_max()
        df_cpu_min = ddgL2.get_metrics_cpu_min()
        df_cpu_avg = ddgL2.get_metrics_cpu_avg()
        df_ram_max = ddgL2.get_metrics_ram_max()
        df_ram_min = ddgL2.get_metrics_ram_min()
        df_ram_avg = ddgL2.get_metrics_ram_avg()
        df_count = ddgL2.get_metrics_count()
        df_all = (df_cpu_max.merge(df_cpu_min, how='outer', on=[
            'ts_dt'
        ]).merge(df_cpu_avg, how='outer', on=['ts_dt']).merge(
            df_ram_max, how='outer',
            on=['ts_dt']).merge(df_ram_min, how='outer',
                                on=['ts_dt']).merge(df_ram_avg,
                                                    how='outer',
                                                    on=['ts_dt'
                                                        ]).merge(df_count,
                                                                 how='outer',
                                                                 on=['ts_dt']))
        df_all = df_all[[
            'ts_dt', 'cpu_used_max', 'cpu_used_min', 'cpu_used_avg',
            'ram_used_max', 'ram_used_min', 'ram_used_avg', 'nhours'
        ]]

        # convert from datetime to date to be able to merge with cloudtrail
        df_all['ts_dt'] = df_all.ts_dt.dt.date

        # rename like cloudwatch
        df_all.rename(columns={'ts_dt': 'Timestamp'}, inplace=True)

        return df_all
示例#13
0
    def _try_datadog(self, aws_id):
        if not self.datadog.is_configured():
            return None, "not configured"

        try:
            df_ddg = self.datadog.get_metrics_all(aws_id)
            return df_ddg, "ok"
        except HostNotFoundInDdg as e:
            logger.debug("Datadog: host not found for aws ID %s: %s" %
                         (aws_id, str(e)))
            return None, "host not found"
        except DataNotFoundForHostInDdg as e:
            logger.debug("Datadog: data not found for aws ID %s: %s" %
                         (aws_id, str(e)))
            return None, "no data"
示例#14
0
    def handle_host(self, host_id, host_region, host_created):
        logger.debug("host id, region, created: %s, %s, %s" %
                     (host_id, host_region, host_created))

        self.status[host_id] = {
            'ID': host_id,
            'datadog': 'Did not try',
            'cloudwatch': 'Did not try'
        }
        df, status = self._try_datadog(host_id)
        self.status[host_id]['datadog'] = status
        if status != 'ok':
            # "df is None" and status!=ok are equivalent
            df, status = self._try_cloudwatch(host_id, host_region,
                                              host_created)
            self.status[host_id]['cloudwatch'] = status

        return df
示例#15
0
    def iterator2metric(self, metrics_iterator, rc_id):

        #logger.debug("redshift cluster details")
        #logger.debug(rc_describe_entry)

        for m_i in metrics_iterator:

            # skip node stats for now, and focus on cluster stats
            # i.e. dimensions only ClusterIdentifier, without the NodeID key
            if len(m_i.dimensions) > 1:
                continue

            # exit the for loop and return this particular metric (cluster)
            return m_i

        # in case no cluster metrics found
        logger.debug("No cloudwatch metrics found for %s" % rc_id)
        raise_noCwExc(rc_id)
示例#16
0
    def handle_main(self, rc_describe_entry, rc_id, rc_created):
        logger.debug("Fetching cloudwatch data for resource %s" % rc_id)

        # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudwatch.html#metric
        self.assistant.set_resource(region_name=rc_describe_entry['Region'])

        metrics_iterator = self.assistant.id2iterator(
            rc_id, self.cloudwatch_namespace, self.entry_keyId)

        # filter for 1 metric
        metric_single = self.assistant.iterator2metric(metrics_iterator, rc_id)
        response = self.assistant.metric2stats(metric_single)

        # dataframe of CPU Utilization, max and min, over 90 days
        df = self.assistant.stats2df(response, rc_id, rc_created,
                                     self.cloudwatch_namespace)

        return df
示例#17
0
    def handle_pre(self, context_pre):
        import requests
        from cachecontrol import CacheControl
        from cachecontrol.caches.file_cache import FileCache

        from isitfit.utils import logger

        logger.debug("Downloading ec2 catalog (cached to local file)")

        # based on URL = 'http://www.ec2instances.info/instances.json'
        # URL = 's3://...csv'
        # Edit 2019-09-10 use CDN link instead of direct gitlab link
        if self.allow_ec2_different_family:
            URL = 'https://cdn.jsdelivr.net/gh/autofitcloud/[email protected]/www.ec2instances.info/t3c_smaller_familyNone.json'
        else:
            # URL = 'https://gitlab.com/autofitcloud/www.ec2instances.info-ec2op/raw/master/www.ec2instances.info/t3b_smaller_familyL2.json'
            URL = 'https://cdn.jsdelivr.net/gh/autofitcloud/[email protected]/www.ec2instances.info/t3b_smaller_familyL2.json'

        # Update 2019-12-03: move into /tmp/isitfit/
        # fc_dir = '/tmp/isitfit_ec2info.cache'
        from isitfit.dotMan import DotMan
        import os
        fc_dir = os.path.join(DotMan().tempdir(), 'ec2info.cache')

        # cached https://cachecontrol.readthedocs.io/en/latest/
        sess = requests.session()
        cached_sess = CacheControl(sess, cache=FileCache(fc_dir))
        r = cached_sess.request('get', URL)

        # read catalog, copy from ec2op-cli/ec2op/optimizer/cwDailyMaxMaxCpu
        import json
        j = json.dumps(r.json(), indent=4, sort_keys=True)
        from pandas import read_json
        df = read_json(j, orient='split')

        # Edit 2019-09-13 no need to subsample the columns at this stage
        # df = df[['API Name', 'Linux On Demand cost']]

        df = df.rename(columns={'Linux On Demand cost': 'cost_hourly'})
        # df = df.set_index('API Name') # need to use merge, not index
        context_pre['df_cat'] = df

        return context_pre
示例#18
0
    def read(self):
        # in case of first run
        self._create()

        # insert "new" migrations
        df_mer = self._insertNew()

        # append docstrings
        df_mer['description'] = df_mer.func.apply(
            lambda x: x.__doc__.strip() if x.__doc__ is not None else None)

        logger.debug("Migrations")
        logger.debug(df_mer[['migname', 'executed', 'description']])

        # subset for those that don't have an executed date yet
        df_mer = df_mer[df_mer.executed.isna()]

        # save
        self.df_mig = df_mer
示例#19
0
    def metric2stats(self, metric):
        """
    For newly created instances, the Timestamp field is not reliable from here.
    It needs postprocessing by stats2df.
    For example, if today is 2019-12-17, an instance created today could return
    Timestamp=datetime.datetime(2019, 12, 13, 9, 0, tzinfo=tzutc())
    """
        logger.debug("fetch cw")
        logger.debug(metric.dimensions)

        # util func
        # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudwatch.html#CloudWatch.Metric.get_statistics
        # https://docs.aws.amazon.com/redshift/latest/mgmt/metrics-listing.html
        #
        # Note for redshift cluster
        # remember that max for redshift cluster = max of stats of all nodes
        response = metric.get_statistics(
            Dimensions=metric.dimensions,
            StartTime=self.StartTime,
            EndTime=self.EndTime,
            Period=SECONDS_IN_ONE_DAY,
            Statistics=['Minimum', 'Average', 'Maximum', 'SampleCount'],
            Unit='Percent')
        logger.debug(response)
        return response
示例#20
0
    def _initCache(self):
        """
    # try to load region_include from cache
    """
        if self.filter_region is not None:
            self.region_include = [self.filter_region]
            self.regionInclude_ready = True
            return

        # need to use the profile name
        # because a profile could have ec2 in us-east-1
        # whereas another could have ec2 in us-west-1
        import boto3
        profile_name = boto3.session.Session().profile_name

        # cache filename and key to use
        # Update 2019-12-03: move from ~/.isitfit to /tmp/isitfit/
        from isitfit.dotMan import DotMan
        import os
        cache_filename = 'iterator_cache-%s-%s.pkl' % (profile_name,
                                                       self.service_name)
        cache_filename = os.path.join(DotMan().tempdir(), cache_filename)

        # set of keys to save in local cache file with simple_cache
        self.simpleCacheMan = SimpleCacheMan(filename=cache_filename,
                                             namespace="iterator")

        # load cached keys
        ri_cached = self.simpleCacheMan.load_key(key='region_include')
        if ri_cached is not None:
            logger.debug("Loading regions containing EC2 from cache file")
            self.region_include = ri_cached
            self.regionInclude_ready = True

        ri_cached = self.simpleCacheMan.load_key(key='region_accessdenied')
        if ri_cached is not None:
            self.region_accessdenied = ri_cached
示例#21
0
    def _handleEvent(self, event):
        if 'CloudTrailEvent' not in event:
            logger.debug("No CloudTrailEvent key in event. Skipping")
            return None  # ignore this situation

        ce_dict = json.loads(event['CloudTrailEvent'])

        if 'requestParameters' not in ce_dict:
            logger.debug(
                "No requestParameters key in event['CloudTrailEvent']. Skipping"
            )
            return None  # ignore this situation

        rp_dict = ce_dict['requestParameters']
        newType = None

        #newType = jmespath.search('instanceType', rp_dict)
        #if newType is None:
        #  newType = jmespath.search('attributeName==`instanceType`', rp_dict)

        if 'instanceType' in rp_dict:
            # logging.error(json.dumps(rp_dict))
            newType = rp_dict['instanceType']['value']

        if 'attribute' in rp_dict:
            if rp_dict['attribute'] == 'instanceType':
                newType = rp_dict['value']

        if newType is None:
            return None

        ts_obj = event['EventTime']
        # ts_obj = dt.datetime.utcfromtimestamp(ts_int)
        # ts_str = ts_obj.strftime('%Y-%m-%d %H:%M:%S')

        if 'instanceId' not in rp_dict:
            logger.debug("No instanceId key in requestParameters. Skipping")
            return None  # ignore this situation

        result = {
            'ServiceName': 'EC2',
            'EventName': self.eventName,
            'EventTime': ts_obj,  # ts_str,
            'ResourceName': rp_dict['instanceId'],
            'ResourceSize1': newType,
            'ResourceSize2': None
        }

        return result
示例#22
0
    def after_all(self, context_all):
        # for debugging
        df_all = pd.DataFrame(self.df_all)
        logger.debug("\ncapacity/used per instance")
        logger.debug(df_all)
        logger.debug("\n")

        # set n analysed
        context_all['n_ec2_analysed'] = len(self.df_all)

        # dump to csv for details
        if self.save_details:
            import click

            # display message for first file
            csvi_desc = 'Per ec2 and day'
            msg_info = "💾 Detail file 1/2: %s: %s" % (
                csvi_desc, self.csv_fn_intermediate.name)
            msg_info = colored(msg_info, "cyan")
            click.echo(msg_info)

            # save 2nd file and display message
            import tempfile
            from isitfit.dotMan import DotMan
            csvi_prefix = 'isitfit-cost-analyze-ec2-details-2-'
            csv_fh_final = tempfile.NamedTemporaryFile(prefix=csvi_prefix,
                                                       suffix='.csv',
                                                       delete=False,
                                                       dir=DotMan().tempdir())

            df_all.to_csv(csv_fh_final.name, index=False)

            # display message about 2nd file
            csvi_desc = 'Per ec2 only   '  # 3 spaces just to align with "per ec2 and day
            msg_info = "💾 Detail file 2/2: %s: %s" % (csvi_desc,
                                                         csv_fh_final.name)
            msg_info = colored(msg_info, "cyan")
            click.echo(msg_info)

            click.echo(
                colored(
                    "Consider viewing the CSVs in the terminal with visidata: `vd file.csv` (http://visidata.org/).",
                    "cyan"))

            click.echo("")  # empty breather line
        return context_all
示例#23
0
    def tag_list(self):
        logger.info("Step 4: convert the set of tags to a list of tags")

        df_ori = self.df_ori

        # initialize
        # just doing [[None]*3]*len_ori doesn't work
        df_ori['tag_list'] = None
        for i1 in range(self.len_ori):
            df_ori.at[i1, 'tag_list'] = [None] * 3

        # distributing the tag_set to tag_1, tag_2, tag_3 in such a way that for example "app" is at tag_1 for all the instances
        tag_processed = set()
        for i1 in range(self.len_ori):
            for tag_value in df_ori.iloc[i1].tag_set:
                if tag_value in tag_processed:
                    continue

                tag_processed.add(tag_value)
                logger.debug("<<<<<<<<>>>>>>>>>>>>")
                logger.debug("%i: %s" % (i1, tag_value))
                logger.debug(df_ori)

                if tag_value in df_ori.at[i1, 'tag_list']:
                    continue  # already inserted this tag

                # find free indeces in current list
                if None not in df_ori.at[i1, 'tag_list']:
                    raise Exception("No more space in list for %s" % tag_value)

                # https://stackoverflow.com/a/6294205/4126114
                free_indices = [
                    i for i, x in enumerate(df_ori.at[i1, 'tag_list'])
                    if x is None
                ]

                # find the first free index which is ok for all entries having this tag
                free_chosen = None
                logger.debug("Searching for free index for %s" % tag_value)
                for free_i1 in free_indices:
                    found_conflict = False
                    for i2 in range(self.len_ori):
                        if found_conflict: break
                        if i2 <= i1: continue
                        logger.debug("Checking row %i" % i2)
                        # if tag in set of tags for this 2nd row
                        if tag_value in df_ori.loc[i2].tag_set:
                            # and if the value for this tag is not *already* set
                            if tag_value not in df_ori.loc[i2].tag_list:
                                if df_ori.loc[i2,
                                              'tag_list'][free_i1] is not None:
                                    logger.debug("Found conflict")
                                    found_conflict = True

                    if not found_conflict:
                        logger.debug("Found chosen free index at %i" % free_i1)
                        free_chosen = free_i1
                        break

                # if no free index chosen, raise Exception
                if free_chosen is None:
                    raise Exception(
                        "Conflict found: %s didn't find a free index to use" %
                        (tag_value))

                # otherwise use the chosen index
                # Old way of getting first None only # free_chosen = df_ori.at[i1, 'tag_list'].index(None)
                free_chosen = free_i1
                df_ori.at[i1, 'tag_list'][free_chosen] = tag_value

                # set this tag for all other rows at "free_chosen"
                for i2 in range(self.len_ori):
                    if i2 <= i1: continue
                    if tag_value in df_ori.loc[i2].tag_set:
                        if tag_value not in df_ori.loc[i2].tag_list:
                            if df_ori.loc[i2,
                                          'tag_list'][free_chosen] is not None:
                                raise Exception(
                                    "Conflict found despite pre-check? %s wants to be at %i but found %s already"
                                    %
                                    (tag_value, free_chosen,
                                     df_ori.loc[i2, 'tag_list'][free_chosen]))

                        df_ori.at[i2, 'tag_list'][free_chosen] = tag_value

        # mesh out the tag_list to tag_1 tag_2 tag_3
        df_ori['tag_1'] = df_ori.tag_list.apply(lambda x: x[1 - 1])
        df_ori['tag_2'] = df_ori.tag_list.apply(lambda x: x[2 - 1])
        df_ori['tag_3'] = df_ori.tag_list.apply(lambda x: x[3 - 1])

        # re-order columns
        df_ori = df_ori.rename(columns={'original': 'instance_name'})
        df_ori = df_ori[[
            'instance_id', 'instance_name', 'tag_1', 'tag_2', 'tag_3'
        ]]

        # done
        #print("")
        #print("tagged")
        #print(df_ori)

        self.df_ori = df_ori
示例#24
0
    def stats2df(self, response_metric, rc_id, ClusterCreateTime,
                 cloudwatch_namespace):
        if len(response_metric['Datapoints']) == 0:
            raise_noCwExc(rc_id)

        # convert to dataframe
        df = pd.DataFrame(response_metric['Datapoints'])

        # edit 2019-09-13: no need to subsample columns
        # The initial goal was to drop the "Unit" column (which just said "Percent"),
        # but it's not such a big deal, and avoiding this subsampling simplifies the code a bit
        # df = df[['Timestamp', 'SampleCount', 'Average']]

        # sort and append in case of multiple metrics
        df = df.sort_values(['Timestamp'], ascending=True)

        # before returning, convert dateutil timezone to pytz
        # for https://github.com/pandas-dev/pandas/issues/25423
        # via https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.tz_convert.html
        # Edit 2019-09-25 Instead of keeping the full timestamp, just truncate to date, especially that this is just daily data
        # df['Timestamp'] = df.Timestamp.dt.tz_convert(pytz.utc)
        df['Timestamp'] = df.Timestamp.dt.date

        # cloudwatch bug: a newly created instance today will return a Timestamp before today
        # In this case, correcting the timestamp
        # Update 2019-12-17 This was fixed by setting the EndTime.hour,minute,second to the next midnight
        #if df.shape[0]==1:
        #  dt_now = dt.datetime.now().date()
        #  if ClusterCreateTime.date() == dt_now:
        #    if df.Timestamp.iloc[0] != dt_now:
        #      raise Exception("This cloudwatch bug was fixed by setting the hours/minutes/seconds of start/end time (location 1)")
        #      df.iloc[0, df.columns=='Timestamp'] = dt_now
        #
        ## drop points "before create time" (bug in cloudwatch?)
        ## Edit 2019-11-18 since this is daily data, and we don't really care about hours/minutes, just compare the y-m-d parts
        ## Update 2019-12-16 This is a weird bug
        #idx_cwbug = df['Timestamp'] >= ClusterCreateTime.date()
        #if not idx_cwbug.all():
        #  raise Exception("This cloudwatch bug was fixed by setting the hours/minutes/seconds of start/end time (location 2)")
        #  logger.debug("Cloudwatch bug of metric data after resource creation time: %s"%rc_id)

        #df = df[ idx_cwbug ]
        #if df.shape[0]==0: raise_noCwExc(rc_id)

        # calculate number of running hours
        # In the latest 90 days, sampling is per minute in cloudwatch
        # https://aws.amazon.com/cloudwatch/faqs/
        # Q: What is the minimum resolution for the data that Amazon CloudWatch receives and aggregates?
        # A: ... For example, if you request for 1-minute data for a day from 10 days ago, you will receive the 1440 data points ...
        if cloudwatch_namespace == 'AWS/EC2':
            df['nhours'] = np.ceil(df.SampleCount / 60)
        elif cloudwatch_namespace == 'AWS/Redshift':
            # Redshift cloudwatch metrics are every 30 seconds (this seems to be the case by trial and error)
            # X points * 0.5 mins/point / 60 minutes/hr = Y hours
            df['nhours'] = np.ceil(df.SampleCount / 60 / 2)

        # rename columns
        df.rename(columns={
            'Maximum': 'cpu_used_max',
            'Average': 'cpu_used_avg',
            'Minimum': 'cpu_used_min',
        },
                  inplace=True)

        # append nan for memory
        df['ram_used_max'] = np.nan
        df['ram_used_avg'] = np.nan
        df['ram_used_min'] = np.nan

        logger.debug("returning dataframe.head")
        logger.debug(df.head())

        # print
        return df
示例#25
0
 def display(self):
     logger.debug("TagsSuggestBasic::display")
     from ..utils import display_df
     display_df("Suggested tags:", self.suggested_df, self.csv_fn,
                self.suggested_shape, logger)
示例#26
0
    def iterate_core(self, display_tqdm=False):
        fx_l = [
            'service_name', 'service_description', 'paginator_name',
            'paginator_entryJmespath', 'paginator_exception', 'entry_keyId',
            'entry_keyCreated'
        ]
        for fx_i in fx_l:
            # https://stackoverflow.com/a/9058315/4126114
            if fx_i not in self.__class__.__dict__.keys():
                raise Exception("Derived class should set %s" % fx_i)

        # iterate on regions
        import botocore
        import boto3
        import jmespath
        redshift_regions_full = boto3.Session().get_available_regions(
            self.service_name)
        import copy
        redshift_regions_sub = copy.deepcopy(redshift_regions_full)
        # redshift_regions_sub = ['us-west-2'] # FIXME

        if self.filter_region is not None:
            if self.filter_region not in redshift_regions_sub:
                msg_err = "Invalid region specified: %s. Supported values: %s"
                msg_err = msg_err % (self.filter_region,
                                     ", ".join(redshift_regions_sub))
                raise IsitfitCliError(msg_err,
                                      None)  # passing None for click context

            # over-ride
            redshift_regions_sub = [self.filter_region]

        # Before iterating, display a message that skipping some regions due to load from cache
        # The following conditions = region_include was loaded from cache
        if self.regionInclude_ready and len(redshift_regions_sub) != len(
                self.region_include) and not self.displayed_willskip:
            msg1 = "%s: Will skip %i out of %i regions which were either empty or inaccessible. To re-check, delete the local cache file %s"
            msg1 = msg1 % (self.service_description,
                           len(redshift_regions_sub) -
                           len(self.region_include), len(redshift_regions_sub),
                           self.simpleCacheMan.filename)
            import click
            click.echo(colored(msg1, "yellow"))
            self.displayed_willskip = True

        # iterate
        region_iterator = redshift_regions_sub
        if display_tqdm:
            # add some spaces for aligning the progress bars
            desc = "%s, counting in all regions     " % self.service_description
            desc = "%-50s" % desc
            region_iterator = self.tqdmman(region_iterator,
                                           total=len(redshift_regions_sub),
                                           desc=desc)

        for region_name in region_iterator:
            if self.regionInclude_ready and self.filter_region is None:
                if region_name not in self.region_include:
                    # skip since already failed to use it
                    continue

            logger.debug("Region %s" % region_name)
            boto3.setup_default_session(region_name=region_name)

            # boto3 clients
            # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/redshift.html#Redshift.Client.describe_logging_status
            # Update 2019-12-09
            #   Unfolding the iterator can cause a rate limiting error for accounts with more than 200 EC2
            #   as reported by u/moofishies on 2019-11-12
            #   Similar to: https://github.com/boto/botocore/pull/891#issuecomment-303526763
            #   The max_attempts config here is increased from the default 4 to decrease the rate limiting chances
            #   https://github.com/boto/botocore/pull/1260
            #   Note that with each extra retry, an exponential backoff is already implemented inside botocore
            #   More: https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
            from botocore.config import Config
            service_client = boto3.client(
                self.service_name, config=Config(retries={'max_attempts': 10}))

            # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudwatch.html#metric
            self.cloudwatch_resource = boto3.resource('cloudwatch')

            # iterate on service resources, eg ec2 instances, redshift clusters
            paginator = service_client.get_paginator(self.paginator_name)
            rc_iterator = paginator.paginate()
            try:
                region_anyClusterFound = False
                for rc_describe_page in rc_iterator:
                    rc_describe_entries = jmespath.search(
                        self.paginator_entryJmespath, rc_describe_page)
                    for rc_describe_entry in rc_describe_entries:
                        region_anyClusterFound = True
                        # add field for region
                        rc_describe_entry['Region'] = region_name
                        # yield
                        yield rc_describe_entry

                if not self.regionInclude_ready and self.filter_region is None:
                    if region_anyClusterFound:
                        # only include if found clusters in this region
                        self.region_include.append(region_name)

            except botocore.exceptions.ClientError as e:
                # Exception that means "no access to region"
                if e.response['Error']['Code'] == self.paginator_exception:
                    continue

                # eg if user doesnt have access arn:aws:redshift:ap-northeast-1:974668457921:cluster:*
                # it could be because of specific access to region, or general access to the full redshift service
                # Note: capturing this exception means that the region is no longer included in the iterator, but it will still iterate over other regions
                if e.response['Error']['Code'] == 'AccessDenied':
                    self.region_accessdenied.append(e)
                    continue

                # Handle error:
                # botocore.exceptions.ClientError: An error occurred (InvalidClientTokenId) when calling the AssumeRole operation: The security token included in the request is invalid.
                # Not sure what this means, but maybe that a role is not allowed to assume into a region?
                # This error can be raised for example with using my local AWS profile "afc_external_readCur".
                # Here is an excerpt from my ~/.aws/credentials file
                # # Role created in Autofitcloud giving access to shadiakiki1986 to read CUR S3
                # [afc_external_readCur]
                # role_arn = arn:aws:iam::123456789:role/external-read-athena-role-ExternalReadCURRole-abcdef
                # source_profile = a_user_profile_not_a_role
                # region = us-east-1
                if e.response['Error']['Code'] == 'InvalidClientTokenId':
                    continue

                # after setting up the InvalidClientTokenId filter above on the profile afc_external_readCur,
                # faced error: botocore.exceptions.ClientError: An error occurred (UnauthorizedOperation) when calling the DescribeInstances operation: You are not authorized to perform this operation.
                if e.response['Error']['Code'] == 'UnauthorizedOperation':
                    continue

                # all other exceptions raised
                raise e

        # before exiting, check if a count just completed, and mark region_include as usable
        if not self.regionInclude_ready and self.filter_region is None:
            self.regionInclude_ready = True

            # save to cache
            self.simpleCacheMan.save_key(key='region_include',
                                         value=self.region_include)
            self.simpleCacheMan.save_key(key='region_accessdenied',
                                         value=self.region_accessdenied)

        # before exiting, if got some AccessDenied errors, display to user
        # Note 1: originally, I wanted to break the iterator on the 1st AccessDenied error,
        # thinking that it's because the user doesn't have permission to the service as a whole.
        # Later, I figured out that maybe the user has permission to a subset of regions,
        # in which case getting an error on region R1 is normal,
        # and the iterator should still proceed to the next region R2.
        if not self.displayed_accessdenied and len(
                self.region_accessdenied) > 0:
            # 1st part goes to stdout
            msgx = "AWS returned AccessDenied errors on %i out of %i regions. Use `isitfit --verbose ...` and re-run the command for more details"
            msgx = msgx % (len(
                self.region_accessdenied), len(redshift_regions_sub))
            import click
            click.echo(colored(msgx, "yellow"))

            # 2nd part is too long, send it to --verbose
            msg2 = "\n".join(
                ["- %s" % str(e) for e in self.region_accessdenied])
            msgx = "Here are the full error messages:\n%s"
            msgx = msgx % (msg2)
            logger.info(colored(msgx, "yellow"))

            self.displayed_accessdenied = True
示例#27
0
    def _handleEvent(self, event):
        # logger.debug("Cloudtrail event: %s"%json.dumps(event, default=json_serial))

        if 'Resources' not in event:
            logger.debug("No 'Resources' key in event. Skipping")
            return None  # ignore this situation

        instanceId = [
            x for x in event['Resources']
            if x['ResourceType'] == 'AWS::EC2::Instance'
        ]
        if len(instanceId) == 0:
            logger.debug("No AWS EC2 instances in event. Skipping")
            return None  # ignore this situation

        # proceed
        instanceId = instanceId[0]

        if 'ResourceName' not in instanceId:
            logger.debug("No ResourceName key in event. Skipping")
            return None  # ignore this situation

        # proceed
        instanceId = instanceId['ResourceName']

        if 'CloudTrailEvent' not in event:
            logger.debug("No CloudTrailEvent key in event. Skipping")
            return None  # ignore this situation

        ce_dict = json.loads(event['CloudTrailEvent'])

        if 'requestParameters' not in ce_dict:
            logger.debug(
                "No requestParameters key in event['CloudTrailEvent']. Skipping"
            )
            return None  # ignore this situation

        if 'instanceType' not in ce_dict['requestParameters']:
            logger.debug(
                "No instanceType key in event['CloudTrailEvent']['requestParameters']. Skipping"
            )
            return None  # ignore this situation

        newType = ce_dict['requestParameters']['instanceType']

        if 'EventTime' not in event:
            logger.debug("No EventTime key in event. Skipping")
            return None  # ignore this situation

        ts_obj = event['EventTime']
        # ts_obj = dt.datetime.utcfromtimestamp(ts_int)
        # ts_str = ts_obj.strftime('%Y-%m-%d %H:%M:%S')

        result = {
            'ServiceName': 'EC2',
            'EventName': self.eventName,
            'EventTime': ts_obj,  # ts_str,
            'ResourceName': instanceId,
            'ResourceSize1': newType,
            'ResourceSize2': None
        }

        return result
示例#28
0
    def _handleEvent(self, event):
        # logger.debug("Cloudtrail event: %s"%json.dumps(event, default=json_serial))

        if 'Resources' not in event:
            logger.debug("No 'Resources' key in event. Skipping")
            return None  # ignore this situation

        instanceId = [
            x for x in event['Resources']
            if x['ResourceType'] == 'AWS::Redshift::Cluster'
        ]
        if len(instanceId) == 0:
            logger.debug("No AWS redshift clusters in event. Skipping")
            return None  # ignore this situation

        # proceed
        instanceId = instanceId[0]

        if 'ResourceName' not in instanceId:
            logger.debug("No ResourceName key in event. Skipping")
            return None  # ignore this situation

        # proceed
        instanceId = instanceId['ResourceName']

        if 'CloudTrailEvent' not in event:
            logger.debug("No CloudTrailEvent key in event. Skipping")
            return None  # ignore this situation

        ce_dict = json.loads(event['CloudTrailEvent'])

        import jmespath
        nodeType = jmespath.search('requestParameters.nodeType', ce_dict)
        numberOfNodes = jmespath.search('requestParameters.numberOfNodes',
                                        ce_dict)
        if numberOfNodes is None:
            numberOfNodes = jmespath.search('responseElements.numberOfNodes',
                                            ce_dict)

        if nodeType is None:
            logger.debug(
                "No nodeType key in event['CloudTrailEvent']['requestParameters']. Skipping"
            )
            return None  # ignore this situation

        if numberOfNodes is None:
            logger.debug(
                "No numberOfNodes key in event['CloudTrailEvent']['requestParameters']. Skipping"
            )
            return None  # ignore this situation

        if 'EventTime' not in event:
            logger.debug("No EventTime key in event. Skipping")
            return None  # ignore this situation

        ts_obj = event['EventTime']
        # ts_obj = dt.datetime.utcfromtimestamp(ts_int)
        # ts_str = ts_obj.strftime('%Y-%m-%d %H:%M:%S')

        result = {
            'ServiceName':
            'Redshift',  # bugfix: was using Ec2 instead of Redshift
            'EventName': self.eventName,
            'EventTime': ts_obj,  # ts_str,
            'ResourceName': instanceId,
            'ResourceSize1': nodeType,
            'ResourceSize2': numberOfNodes,
        }

        return result
示例#29
0
    def get_ifi(self, tqdml2_obj):
        # display name of runner
        logger.info(self.description)

        # 0th pass to count
        n_ec2_total = self.ec2_it.count()

        if n_ec2_total == 0:
            import click
            click.secho("No resources found in %s" %
                        self.ec2_it.service_description,
                        fg="red")
            return

        # context for pre listeners
        context_pre = {}
        context_pre['ec2_instances'] = self.ec2_it
        context_pre['region_include'] = self.ec2_it.get_regionInclude()
        context_pre['n_ec2_total'] = n_ec2_total
        context_pre['click_ctx'] = self.ctx
        context_pre['mainManager'] = self

        # call listeners
        for l in self.listeners['pre']:
            context_pre = l(context_pre)
            if context_pre is None:
                raise Exception(
                    "Breaking the chain is not allowed in listener/pre")

        # iterate over all ec2 instances
        sum_capacity = 0
        sum_used = 0
        df_all = []
        ec2_noCloudwatch = []  # FIXME DEPRECATED
        ec2_noCloudtrail = []

        # add some spaces for aligning the progress bars
        desc = "Pass 2/2 through %s" % self.ec2_it.service_description
        desc = "%-50s" % desc

        # Edit 2019-11-12 use "initial=0" instead of "=1". Check more details in a similar note in "cloudtrail_ec2type.py"
        iter_wrap = tqdml2_obj(self.ec2_it,
                               total=n_ec2_total,
                               desc=desc,
                               initial=0)
        for ec2_dict, ec2_id, ec2_launchtime, ec2_obj in iter_wrap:

            # context dict to be passed between listeners
            context_ec2 = {}
            context_ec2['mainManager'] = self
            if 'df_cat' in context_pre:
                context_ec2['df_cat'] = context_pre[
                    'df_cat']  # copy object between contexts
            context_ec2['ec2_dict'] = ec2_dict
            context_ec2['ec2_id'] = ec2_id
            context_ec2['ec2_launchtime'] = ec2_launchtime
            context_ec2['ec2_obj'] = ec2_obj

            try:
                # call listeners
                # Listener can return None to break out of loop,
                # i.e. to stop processing with other listeners
                for l in self.listeners['ec2']:
                    context_ec2 = l(context_ec2)

                    # skip rest of listeners if one of them returned None
                    if context_ec2 is None:
                        logger.debug(
                            "Listener %s is breaking per_resource for resource %s"
                            % (l, ec2_id))
                        break

            except NoCloudtrailException:
                ec2_noCloudtrail.append(ec2_id)

            except IsitfitCliRunnerBreakIterator as e:
                # check request for breaking from the iterator loop
                # eg for isitfit cost optimize --n=1
                logger.debug("Breaking from the per-resource iterator")
                break

        # call listeners
        #logger.info("... done")
        #logger.info("")
        #logger.info("")

        # set up context
        context_all = {}
        context_all['n_ec2_total'] = n_ec2_total
        context_all['mainManager'] = self
        context_all['region_include'] = self.ec2_it.region_include
        if 'df_cat' in context_pre:
            context_all['df_cat'] = context_pre[
                'df_cat']  # copy object between contexts

        # more
        context_all['ec2_noCloudwatch'] = ec2_noCloudwatch  # FIXME DEPRECATED
        context_all['ec2_noCloudtrail'] = ec2_noCloudtrail
        context_all['click_ctx'] = self.ctx

        # call listeners
        for l in self.listeners['all']:
            context_all = l(context_all)
            if context_all is None:
                raise Exception(
                    "Breaking the chain is not allowed in listener/all: %s" %
                    str(l))

        # done
        #logger.info("")
        return context_all
示例#30
0
    def processPush(self, dryRun: bool):
        # max ec2 per call is 20
        # but just doing 1 at a time for now
        # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/resourcegroupstaggingapi.html#ResourceGroupsTaggingAPI.Client.tag_resources
        import boto3
        tagging_client = boto3.client('resourcegroupstaggingapi')
        ec2_resource = boto3.resource('ec2')
        account_id = boto3.client('sts').get_caller_identity()['Account']

        import json
        preproc = lambda x: x[sorted(list(x.columns))].set_index('instance_id')
        self.latest_df = preproc(self.latest_df)
        self.csv_df = preproc(self.csv_df)
        from tqdm import tqdm
        runType_prefix = "Dry run" if dryRun else "Live"
        for instance_id, row_new in tqdm(self.csv_df.iterrows(),
                                         total=self.csv_df.shape[0],
                                         desc="Tag CSV row (%s)" %
                                         runType_prefix,
                                         initial=1):
            row_old = self.latest_df.loc[instance_id]
            tags_new = row_new.to_dict()
            tags_old = row_old.to_dict()
            if tags_new == tags_old:
                logger.debug("Skipping %s since no changes" % instance_id)
                continue

            # keeping only changed keys
            keys_dotag = {}
            for k in tags_new:
                if not tags_new[k]:
                    continue  # empty tags are skipped

                if k not in tags_old:
                    keys_dotag[k] = tags_new[k]
                    continue

                if tags_new[k] != tags_old[k]:
                    keys_dotag[k] = tags_new[k]
                    continue

            # proceed with untagging
            keys_untag = []
            for k in tags_old:
                if not tags_old[k]:
                    continue  # empty tags are skipped

                if k not in tags_new:
                    keys_untag.append(k)

            if not keys_dotag and not keys_untag:
                continue

            # if any of them set:
            instance_obj = ec2_resource.Instance(instance_id)
            instance_arn = 'arn:aws:ec2:%s:%s:instance/%s' % (
                instance_obj.placement['AvailabilityZone'][:-1], account_id,
                instance_id)

            if keys_dotag:
                logger.debug(
                    "[%s] Will tag %s with %s" %
                    (runType_prefix, instance_id, json.dumps(keys_dotag)))
                if not dryRun:
                    response = tagging_client.tag_resources(
                        ResourceARNList=[instance_arn], Tags=keys_dotag)

            if keys_untag:
                logger.debug(
                    "[%s] Will untag %s with %s" %
                    (runType_prefix, instance_id, json.dumps(keys_untag)))
                if not dryRun:
                    response = tagging_client.untag_resources(
                        ResourceARNList=[instance_arn], TagKeys=keys_untag)

        if dryRun:
            from termcolor import colored
            logger.info(
                colored(
                    "This was a dry run. Execute the same command again with `--not-dry-run` for actual tags push to aws ec2",
                    "red"))