def get(self): # get cloudtrail ec2 type changes for all instances # if not configured, just return if self.cache_man is None: df_fresh = super().get() return df_fresh # check cache first if self.cache_man.isReady(): df_cache = self.cache_man.get(self.cache_key) if df_cache is not None: logger.debug("Found cloudtrail data in redis cache") return df_cache # if no cache, then download df_fresh = super().get() # if caching enabled, store it for later fetching # https://stackoverflow.com/a/57986261/4126114 if self.cache_man.isReady(): self.cache_man.set(self.cache_key, df_fresh) # done return df_fresh
def connect(self): logger.info("Connecting to redis cache") logger.debug(self.redis_args) import pyarrow as pa self.redis_client = redis.Redis(**self.redis_args) self.pyarrow_context = pa.default_serialization_context()
def diffLatest(self): if self.latest_df is None: raise IsitfitCliError( "Internal dev error: Call TagsPush::pullLatest before TagsPush::diffLatest", self.ctx) if self.csv_df is None: raise IsitfitCliError( "Internal dev error: Call TagsPush::read_csv before TagsPush::diffLatest", self.ctx) # diff columns from .tagsCsvDiff import TagsCsvDiff td = TagsCsvDiff(self.latest_df, self.csv_df) td.noChanges() td.noNewInstances() td.getDiffCols() td.renamedTags() td.newTags() td.droppedTags() # print(td.migrations, td.old_minus_new, td.new_minus_old) td.anyRemaining() # get migrations import pandas as pd self.mig_df = pd.DataFrame(td.migrations, columns=['action', 'old', 'new']) logger.debug("") logger.debug("Tag migrations") if self.mig_df.shape[0] == 0: logger.debug("None") else: logger.debug(self.mig_df) logger.debug("")
def fetch(self): logger.debug("TagsSuggestBasic::fetch") logger.info("Counting EC2 instances") n_ec2_total = len(list(self.ec2_resource.instances.all())) msg_total = "Found a total of %i EC2 instances" % n_ec2_total if n_ec2_total == 0: from isitfit.cli.click_descendents import IsitfitCliError raise IsitfitCliError(msg_total, self.ctx) logger.warning(msg_total) self.tags_list = [] from tqdm import tqdm desc = "Scanning EC2 instances" ec2_all = self.ec2_resource.instances.all() for ec2_obj in tqdm(ec2_all, total=n_ec2_total, desc=desc, initial=1): if ec2_obj.tags is None: tags_dict = {} else: tags_dict = self.tags_to_dict(ec2_obj) tags_dict['instance_id'] = ec2_obj.instance_id self.tags_list.append(tags_dict) # convert to pandas dataframe when done self.tags_df = self._list_to_df()
def _handleEvent(self, event): if 'CloudTrailEvent' not in event: logger.debug("No CloudTrailEvent key in event. Skipping") return None # ignore this situation ce_dict = json.loads(event['CloudTrailEvent']) if 'requestParameters' not in ce_dict: logger.debug( "No requestParameters key in event['CloudTrailEvent']. Skipping" ) return None # ignore this situation rp_dict = ce_dict['requestParameters'] import jmespath nodeType = jmespath.search('instanceType', rp_dict) numberOfNodes = jmespath.search('numberOfNodes', rp_dict) ts_obj = event['EventTime'] # ts_obj = dt.datetime.utcfromtimestamp(ts_int) # ts_str = ts_obj.strftime('%Y-%m-%d %H:%M:%S') result = { 'ServiceName': 'Redshift', 'ResourceName': rp_dict['clusterIdentifier'], 'EventTime': ts_obj, # ts_str, 'EventName': self.eventName, 'ResourceSize1': nodeType, 'ResourceSize2': numberOfNodes, } return result
def get(self): # get cloudtrail ec2 type changes for all instances logger.debug("Downloading cloudtrail data (from %i regions)" % len(self.region_include)) df_2 = [] import boto3 # add some spaces for aligning the progress bars desc = "Cloudtrail events in all regions" desc = "%-50s" % desc iter_wrap = self.region_include iter_wrap = self.tqdmman(iter_wrap, desc=desc, total=len(self.region_include)) for region_name in iter_wrap: boto3.setup_default_session(region_name=region_name) df_1 = super().get() df_1[ 'Region'] = region_name # bugfix, field name was "region" (lower-case) df_2.append(df_1.reset_index()) # concatenate df_3 = pd.concat(df_2, axis=0, sort=False) # check if empty if df_3.shape[0] == 0: return df_3 # sort again df_3 = df_3.set_index( ["Region", "ServiceName", "ResourceName", "EventTime"]).sort_index() return df_3
def _try_cloudwatch(self, host_id, host_region, host_created): try: df_cw = self.cloudwatch.handle_main({'Region': host_region}, host_id, host_created) return df_cw, "ok" except NoCloudwatchException: logger.debug("Cloudwatch: data not found for %s" % host_id) return None, "no data"
def __init__(self, ctx): logger.debug("TagsSuggestAdvanced::constructor") # api manager self.api_man = ApiMan(tryAgainIn=2, ctx=ctx) # proceed with parent constructor return super().__init__(ctx)
def __init__(self, ctx): logger.debug("TagsSuggestBasic::constructor") # boto3 ec2 and cloudwatch data import boto3 self.ec2_resource = boto3.resource('ec2') self.tags_list = [] self.tags_df = None self.ctx = ctx
def set_ndays(self, ndays): self.ndays = ndays # set start/end dates dt_now_d = dt.datetime.now().replace(tzinfo=pytz.utc) self.StartTime = dt_now_d - dt.timedelta(days=self.ndays) self.EndTime = dt_now_d logger.debug("Metrics start..end: %s .. %s" % (self.StartTime, self.EndTime))
def suggest(self): logger.debug("TagsSuggestBasic::suggest") logger.info("Generating suggested tags") from .tagsImplier import TagsImplierMain tags_implier = TagsImplierMain(self.tags_df) self.suggested_df = tags_implier.imply() self.csv_fn = dump_df_to_csv(self.suggested_df, 'isitfit-tags-suggestBasic-') self.suggested_shape = self.suggested_df.shape
def get_metrics_all(self, aws_id): # convert aws ID to datadog hostname if self.map_aws_dd is None: self.build_map_aws_dd() if self.map_aws_dd is None: raise Exception("Failed to build aws-datadog ID map") # fail if not found if aws_id not in self.map_aws_dd: raise HostNotFoundInDdg( "Did not find host aws ID %s in datadog reverse map" % aws_id) dd_hostname = self.map_aws_dd[aws_id] # FIXME: we already have cpu from cloudwatch, so maybe just focus on ram from datadog logger.debug( "Fetching datadog data for aws ID %s, datadog hostname %s" % (aws_id, dd_hostname)) ddgL2 = DatadogAssistant(self.start, self.end, dd_hostname) df_cpu_max = ddgL2.get_metrics_cpu_max() df_cpu_min = ddgL2.get_metrics_cpu_min() df_cpu_avg = ddgL2.get_metrics_cpu_avg() df_ram_max = ddgL2.get_metrics_ram_max() df_ram_min = ddgL2.get_metrics_ram_min() df_ram_avg = ddgL2.get_metrics_ram_avg() df_count = ddgL2.get_metrics_count() df_all = (df_cpu_max.merge(df_cpu_min, how='outer', on=[ 'ts_dt' ]).merge(df_cpu_avg, how='outer', on=['ts_dt']).merge( df_ram_max, how='outer', on=['ts_dt']).merge(df_ram_min, how='outer', on=['ts_dt']).merge(df_ram_avg, how='outer', on=['ts_dt' ]).merge(df_count, how='outer', on=['ts_dt'])) df_all = df_all[[ 'ts_dt', 'cpu_used_max', 'cpu_used_min', 'cpu_used_avg', 'ram_used_max', 'ram_used_min', 'ram_used_avg', 'nhours' ]] # convert from datetime to date to be able to merge with cloudtrail df_all['ts_dt'] = df_all.ts_dt.dt.date # rename like cloudwatch df_all.rename(columns={'ts_dt': 'Timestamp'}, inplace=True) return df_all
def _try_datadog(self, aws_id): if not self.datadog.is_configured(): return None, "not configured" try: df_ddg = self.datadog.get_metrics_all(aws_id) return df_ddg, "ok" except HostNotFoundInDdg as e: logger.debug("Datadog: host not found for aws ID %s: %s" % (aws_id, str(e))) return None, "host not found" except DataNotFoundForHostInDdg as e: logger.debug("Datadog: data not found for aws ID %s: %s" % (aws_id, str(e))) return None, "no data"
def handle_host(self, host_id, host_region, host_created): logger.debug("host id, region, created: %s, %s, %s" % (host_id, host_region, host_created)) self.status[host_id] = { 'ID': host_id, 'datadog': 'Did not try', 'cloudwatch': 'Did not try' } df, status = self._try_datadog(host_id) self.status[host_id]['datadog'] = status if status != 'ok': # "df is None" and status!=ok are equivalent df, status = self._try_cloudwatch(host_id, host_region, host_created) self.status[host_id]['cloudwatch'] = status return df
def iterator2metric(self, metrics_iterator, rc_id): #logger.debug("redshift cluster details") #logger.debug(rc_describe_entry) for m_i in metrics_iterator: # skip node stats for now, and focus on cluster stats # i.e. dimensions only ClusterIdentifier, without the NodeID key if len(m_i.dimensions) > 1: continue # exit the for loop and return this particular metric (cluster) return m_i # in case no cluster metrics found logger.debug("No cloudwatch metrics found for %s" % rc_id) raise_noCwExc(rc_id)
def handle_main(self, rc_describe_entry, rc_id, rc_created): logger.debug("Fetching cloudwatch data for resource %s" % rc_id) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudwatch.html#metric self.assistant.set_resource(region_name=rc_describe_entry['Region']) metrics_iterator = self.assistant.id2iterator( rc_id, self.cloudwatch_namespace, self.entry_keyId) # filter for 1 metric metric_single = self.assistant.iterator2metric(metrics_iterator, rc_id) response = self.assistant.metric2stats(metric_single) # dataframe of CPU Utilization, max and min, over 90 days df = self.assistant.stats2df(response, rc_id, rc_created, self.cloudwatch_namespace) return df
def handle_pre(self, context_pre): import requests from cachecontrol import CacheControl from cachecontrol.caches.file_cache import FileCache from isitfit.utils import logger logger.debug("Downloading ec2 catalog (cached to local file)") # based on URL = 'http://www.ec2instances.info/instances.json' # URL = 's3://...csv' # Edit 2019-09-10 use CDN link instead of direct gitlab link if self.allow_ec2_different_family: URL = 'https://cdn.jsdelivr.net/gh/autofitcloud/[email protected]/www.ec2instances.info/t3c_smaller_familyNone.json' else: # URL = 'https://gitlab.com/autofitcloud/www.ec2instances.info-ec2op/raw/master/www.ec2instances.info/t3b_smaller_familyL2.json' URL = 'https://cdn.jsdelivr.net/gh/autofitcloud/[email protected]/www.ec2instances.info/t3b_smaller_familyL2.json' # Update 2019-12-03: move into /tmp/isitfit/ # fc_dir = '/tmp/isitfit_ec2info.cache' from isitfit.dotMan import DotMan import os fc_dir = os.path.join(DotMan().tempdir(), 'ec2info.cache') # cached https://cachecontrol.readthedocs.io/en/latest/ sess = requests.session() cached_sess = CacheControl(sess, cache=FileCache(fc_dir)) r = cached_sess.request('get', URL) # read catalog, copy from ec2op-cli/ec2op/optimizer/cwDailyMaxMaxCpu import json j = json.dumps(r.json(), indent=4, sort_keys=True) from pandas import read_json df = read_json(j, orient='split') # Edit 2019-09-13 no need to subsample the columns at this stage # df = df[['API Name', 'Linux On Demand cost']] df = df.rename(columns={'Linux On Demand cost': 'cost_hourly'}) # df = df.set_index('API Name') # need to use merge, not index context_pre['df_cat'] = df return context_pre
def read(self): # in case of first run self._create() # insert "new" migrations df_mer = self._insertNew() # append docstrings df_mer['description'] = df_mer.func.apply( lambda x: x.__doc__.strip() if x.__doc__ is not None else None) logger.debug("Migrations") logger.debug(df_mer[['migname', 'executed', 'description']]) # subset for those that don't have an executed date yet df_mer = df_mer[df_mer.executed.isna()] # save self.df_mig = df_mer
def metric2stats(self, metric): """ For newly created instances, the Timestamp field is not reliable from here. It needs postprocessing by stats2df. For example, if today is 2019-12-17, an instance created today could return Timestamp=datetime.datetime(2019, 12, 13, 9, 0, tzinfo=tzutc()) """ logger.debug("fetch cw") logger.debug(metric.dimensions) # util func # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudwatch.html#CloudWatch.Metric.get_statistics # https://docs.aws.amazon.com/redshift/latest/mgmt/metrics-listing.html # # Note for redshift cluster # remember that max for redshift cluster = max of stats of all nodes response = metric.get_statistics( Dimensions=metric.dimensions, StartTime=self.StartTime, EndTime=self.EndTime, Period=SECONDS_IN_ONE_DAY, Statistics=['Minimum', 'Average', 'Maximum', 'SampleCount'], Unit='Percent') logger.debug(response) return response
def _initCache(self): """ # try to load region_include from cache """ if self.filter_region is not None: self.region_include = [self.filter_region] self.regionInclude_ready = True return # need to use the profile name # because a profile could have ec2 in us-east-1 # whereas another could have ec2 in us-west-1 import boto3 profile_name = boto3.session.Session().profile_name # cache filename and key to use # Update 2019-12-03: move from ~/.isitfit to /tmp/isitfit/ from isitfit.dotMan import DotMan import os cache_filename = 'iterator_cache-%s-%s.pkl' % (profile_name, self.service_name) cache_filename = os.path.join(DotMan().tempdir(), cache_filename) # set of keys to save in local cache file with simple_cache self.simpleCacheMan = SimpleCacheMan(filename=cache_filename, namespace="iterator") # load cached keys ri_cached = self.simpleCacheMan.load_key(key='region_include') if ri_cached is not None: logger.debug("Loading regions containing EC2 from cache file") self.region_include = ri_cached self.regionInclude_ready = True ri_cached = self.simpleCacheMan.load_key(key='region_accessdenied') if ri_cached is not None: self.region_accessdenied = ri_cached
def _handleEvent(self, event): if 'CloudTrailEvent' not in event: logger.debug("No CloudTrailEvent key in event. Skipping") return None # ignore this situation ce_dict = json.loads(event['CloudTrailEvent']) if 'requestParameters' not in ce_dict: logger.debug( "No requestParameters key in event['CloudTrailEvent']. Skipping" ) return None # ignore this situation rp_dict = ce_dict['requestParameters'] newType = None #newType = jmespath.search('instanceType', rp_dict) #if newType is None: # newType = jmespath.search('attributeName==`instanceType`', rp_dict) if 'instanceType' in rp_dict: # logging.error(json.dumps(rp_dict)) newType = rp_dict['instanceType']['value'] if 'attribute' in rp_dict: if rp_dict['attribute'] == 'instanceType': newType = rp_dict['value'] if newType is None: return None ts_obj = event['EventTime'] # ts_obj = dt.datetime.utcfromtimestamp(ts_int) # ts_str = ts_obj.strftime('%Y-%m-%d %H:%M:%S') if 'instanceId' not in rp_dict: logger.debug("No instanceId key in requestParameters. Skipping") return None # ignore this situation result = { 'ServiceName': 'EC2', 'EventName': self.eventName, 'EventTime': ts_obj, # ts_str, 'ResourceName': rp_dict['instanceId'], 'ResourceSize1': newType, 'ResourceSize2': None } return result
def after_all(self, context_all): # for debugging df_all = pd.DataFrame(self.df_all) logger.debug("\ncapacity/used per instance") logger.debug(df_all) logger.debug("\n") # set n analysed context_all['n_ec2_analysed'] = len(self.df_all) # dump to csv for details if self.save_details: import click # display message for first file csvi_desc = 'Per ec2 and day' msg_info = "💾 Detail file 1/2: %s: %s" % ( csvi_desc, self.csv_fn_intermediate.name) msg_info = colored(msg_info, "cyan") click.echo(msg_info) # save 2nd file and display message import tempfile from isitfit.dotMan import DotMan csvi_prefix = 'isitfit-cost-analyze-ec2-details-2-' csv_fh_final = tempfile.NamedTemporaryFile(prefix=csvi_prefix, suffix='.csv', delete=False, dir=DotMan().tempdir()) df_all.to_csv(csv_fh_final.name, index=False) # display message about 2nd file csvi_desc = 'Per ec2 only ' # 3 spaces just to align with "per ec2 and day msg_info = "💾 Detail file 2/2: %s: %s" % (csvi_desc, csv_fh_final.name) msg_info = colored(msg_info, "cyan") click.echo(msg_info) click.echo( colored( "Consider viewing the CSVs in the terminal with visidata: `vd file.csv` (http://visidata.org/).", "cyan")) click.echo("") # empty breather line return context_all
def tag_list(self): logger.info("Step 4: convert the set of tags to a list of tags") df_ori = self.df_ori # initialize # just doing [[None]*3]*len_ori doesn't work df_ori['tag_list'] = None for i1 in range(self.len_ori): df_ori.at[i1, 'tag_list'] = [None] * 3 # distributing the tag_set to tag_1, tag_2, tag_3 in such a way that for example "app" is at tag_1 for all the instances tag_processed = set() for i1 in range(self.len_ori): for tag_value in df_ori.iloc[i1].tag_set: if tag_value in tag_processed: continue tag_processed.add(tag_value) logger.debug("<<<<<<<<>>>>>>>>>>>>") logger.debug("%i: %s" % (i1, tag_value)) logger.debug(df_ori) if tag_value in df_ori.at[i1, 'tag_list']: continue # already inserted this tag # find free indeces in current list if None not in df_ori.at[i1, 'tag_list']: raise Exception("No more space in list for %s" % tag_value) # https://stackoverflow.com/a/6294205/4126114 free_indices = [ i for i, x in enumerate(df_ori.at[i1, 'tag_list']) if x is None ] # find the first free index which is ok for all entries having this tag free_chosen = None logger.debug("Searching for free index for %s" % tag_value) for free_i1 in free_indices: found_conflict = False for i2 in range(self.len_ori): if found_conflict: break if i2 <= i1: continue logger.debug("Checking row %i" % i2) # if tag in set of tags for this 2nd row if tag_value in df_ori.loc[i2].tag_set: # and if the value for this tag is not *already* set if tag_value not in df_ori.loc[i2].tag_list: if df_ori.loc[i2, 'tag_list'][free_i1] is not None: logger.debug("Found conflict") found_conflict = True if not found_conflict: logger.debug("Found chosen free index at %i" % free_i1) free_chosen = free_i1 break # if no free index chosen, raise Exception if free_chosen is None: raise Exception( "Conflict found: %s didn't find a free index to use" % (tag_value)) # otherwise use the chosen index # Old way of getting first None only # free_chosen = df_ori.at[i1, 'tag_list'].index(None) free_chosen = free_i1 df_ori.at[i1, 'tag_list'][free_chosen] = tag_value # set this tag for all other rows at "free_chosen" for i2 in range(self.len_ori): if i2 <= i1: continue if tag_value in df_ori.loc[i2].tag_set: if tag_value not in df_ori.loc[i2].tag_list: if df_ori.loc[i2, 'tag_list'][free_chosen] is not None: raise Exception( "Conflict found despite pre-check? %s wants to be at %i but found %s already" % (tag_value, free_chosen, df_ori.loc[i2, 'tag_list'][free_chosen])) df_ori.at[i2, 'tag_list'][free_chosen] = tag_value # mesh out the tag_list to tag_1 tag_2 tag_3 df_ori['tag_1'] = df_ori.tag_list.apply(lambda x: x[1 - 1]) df_ori['tag_2'] = df_ori.tag_list.apply(lambda x: x[2 - 1]) df_ori['tag_3'] = df_ori.tag_list.apply(lambda x: x[3 - 1]) # re-order columns df_ori = df_ori.rename(columns={'original': 'instance_name'}) df_ori = df_ori[[ 'instance_id', 'instance_name', 'tag_1', 'tag_2', 'tag_3' ]] # done #print("") #print("tagged") #print(df_ori) self.df_ori = df_ori
def stats2df(self, response_metric, rc_id, ClusterCreateTime, cloudwatch_namespace): if len(response_metric['Datapoints']) == 0: raise_noCwExc(rc_id) # convert to dataframe df = pd.DataFrame(response_metric['Datapoints']) # edit 2019-09-13: no need to subsample columns # The initial goal was to drop the "Unit" column (which just said "Percent"), # but it's not such a big deal, and avoiding this subsampling simplifies the code a bit # df = df[['Timestamp', 'SampleCount', 'Average']] # sort and append in case of multiple metrics df = df.sort_values(['Timestamp'], ascending=True) # before returning, convert dateutil timezone to pytz # for https://github.com/pandas-dev/pandas/issues/25423 # via https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.tz_convert.html # Edit 2019-09-25 Instead of keeping the full timestamp, just truncate to date, especially that this is just daily data # df['Timestamp'] = df.Timestamp.dt.tz_convert(pytz.utc) df['Timestamp'] = df.Timestamp.dt.date # cloudwatch bug: a newly created instance today will return a Timestamp before today # In this case, correcting the timestamp # Update 2019-12-17 This was fixed by setting the EndTime.hour,minute,second to the next midnight #if df.shape[0]==1: # dt_now = dt.datetime.now().date() # if ClusterCreateTime.date() == dt_now: # if df.Timestamp.iloc[0] != dt_now: # raise Exception("This cloudwatch bug was fixed by setting the hours/minutes/seconds of start/end time (location 1)") # df.iloc[0, df.columns=='Timestamp'] = dt_now # ## drop points "before create time" (bug in cloudwatch?) ## Edit 2019-11-18 since this is daily data, and we don't really care about hours/minutes, just compare the y-m-d parts ## Update 2019-12-16 This is a weird bug #idx_cwbug = df['Timestamp'] >= ClusterCreateTime.date() #if not idx_cwbug.all(): # raise Exception("This cloudwatch bug was fixed by setting the hours/minutes/seconds of start/end time (location 2)") # logger.debug("Cloudwatch bug of metric data after resource creation time: %s"%rc_id) #df = df[ idx_cwbug ] #if df.shape[0]==0: raise_noCwExc(rc_id) # calculate number of running hours # In the latest 90 days, sampling is per minute in cloudwatch # https://aws.amazon.com/cloudwatch/faqs/ # Q: What is the minimum resolution for the data that Amazon CloudWatch receives and aggregates? # A: ... For example, if you request for 1-minute data for a day from 10 days ago, you will receive the 1440 data points ... if cloudwatch_namespace == 'AWS/EC2': df['nhours'] = np.ceil(df.SampleCount / 60) elif cloudwatch_namespace == 'AWS/Redshift': # Redshift cloudwatch metrics are every 30 seconds (this seems to be the case by trial and error) # X points * 0.5 mins/point / 60 minutes/hr = Y hours df['nhours'] = np.ceil(df.SampleCount / 60 / 2) # rename columns df.rename(columns={ 'Maximum': 'cpu_used_max', 'Average': 'cpu_used_avg', 'Minimum': 'cpu_used_min', }, inplace=True) # append nan for memory df['ram_used_max'] = np.nan df['ram_used_avg'] = np.nan df['ram_used_min'] = np.nan logger.debug("returning dataframe.head") logger.debug(df.head()) # print return df
def display(self): logger.debug("TagsSuggestBasic::display") from ..utils import display_df display_df("Suggested tags:", self.suggested_df, self.csv_fn, self.suggested_shape, logger)
def iterate_core(self, display_tqdm=False): fx_l = [ 'service_name', 'service_description', 'paginator_name', 'paginator_entryJmespath', 'paginator_exception', 'entry_keyId', 'entry_keyCreated' ] for fx_i in fx_l: # https://stackoverflow.com/a/9058315/4126114 if fx_i not in self.__class__.__dict__.keys(): raise Exception("Derived class should set %s" % fx_i) # iterate on regions import botocore import boto3 import jmespath redshift_regions_full = boto3.Session().get_available_regions( self.service_name) import copy redshift_regions_sub = copy.deepcopy(redshift_regions_full) # redshift_regions_sub = ['us-west-2'] # FIXME if self.filter_region is not None: if self.filter_region not in redshift_regions_sub: msg_err = "Invalid region specified: %s. Supported values: %s" msg_err = msg_err % (self.filter_region, ", ".join(redshift_regions_sub)) raise IsitfitCliError(msg_err, None) # passing None for click context # over-ride redshift_regions_sub = [self.filter_region] # Before iterating, display a message that skipping some regions due to load from cache # The following conditions = region_include was loaded from cache if self.regionInclude_ready and len(redshift_regions_sub) != len( self.region_include) and not self.displayed_willskip: msg1 = "%s: Will skip %i out of %i regions which were either empty or inaccessible. To re-check, delete the local cache file %s" msg1 = msg1 % (self.service_description, len(redshift_regions_sub) - len(self.region_include), len(redshift_regions_sub), self.simpleCacheMan.filename) import click click.echo(colored(msg1, "yellow")) self.displayed_willskip = True # iterate region_iterator = redshift_regions_sub if display_tqdm: # add some spaces for aligning the progress bars desc = "%s, counting in all regions " % self.service_description desc = "%-50s" % desc region_iterator = self.tqdmman(region_iterator, total=len(redshift_regions_sub), desc=desc) for region_name in region_iterator: if self.regionInclude_ready and self.filter_region is None: if region_name not in self.region_include: # skip since already failed to use it continue logger.debug("Region %s" % region_name) boto3.setup_default_session(region_name=region_name) # boto3 clients # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/redshift.html#Redshift.Client.describe_logging_status # Update 2019-12-09 # Unfolding the iterator can cause a rate limiting error for accounts with more than 200 EC2 # as reported by u/moofishies on 2019-11-12 # Similar to: https://github.com/boto/botocore/pull/891#issuecomment-303526763 # The max_attempts config here is increased from the default 4 to decrease the rate limiting chances # https://github.com/boto/botocore/pull/1260 # Note that with each extra retry, an exponential backoff is already implemented inside botocore # More: https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html from botocore.config import Config service_client = boto3.client( self.service_name, config=Config(retries={'max_attempts': 10})) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudwatch.html#metric self.cloudwatch_resource = boto3.resource('cloudwatch') # iterate on service resources, eg ec2 instances, redshift clusters paginator = service_client.get_paginator(self.paginator_name) rc_iterator = paginator.paginate() try: region_anyClusterFound = False for rc_describe_page in rc_iterator: rc_describe_entries = jmespath.search( self.paginator_entryJmespath, rc_describe_page) for rc_describe_entry in rc_describe_entries: region_anyClusterFound = True # add field for region rc_describe_entry['Region'] = region_name # yield yield rc_describe_entry if not self.regionInclude_ready and self.filter_region is None: if region_anyClusterFound: # only include if found clusters in this region self.region_include.append(region_name) except botocore.exceptions.ClientError as e: # Exception that means "no access to region" if e.response['Error']['Code'] == self.paginator_exception: continue # eg if user doesnt have access arn:aws:redshift:ap-northeast-1:974668457921:cluster:* # it could be because of specific access to region, or general access to the full redshift service # Note: capturing this exception means that the region is no longer included in the iterator, but it will still iterate over other regions if e.response['Error']['Code'] == 'AccessDenied': self.region_accessdenied.append(e) continue # Handle error: # botocore.exceptions.ClientError: An error occurred (InvalidClientTokenId) when calling the AssumeRole operation: The security token included in the request is invalid. # Not sure what this means, but maybe that a role is not allowed to assume into a region? # This error can be raised for example with using my local AWS profile "afc_external_readCur". # Here is an excerpt from my ~/.aws/credentials file # # Role created in Autofitcloud giving access to shadiakiki1986 to read CUR S3 # [afc_external_readCur] # role_arn = arn:aws:iam::123456789:role/external-read-athena-role-ExternalReadCURRole-abcdef # source_profile = a_user_profile_not_a_role # region = us-east-1 if e.response['Error']['Code'] == 'InvalidClientTokenId': continue # after setting up the InvalidClientTokenId filter above on the profile afc_external_readCur, # faced error: botocore.exceptions.ClientError: An error occurred (UnauthorizedOperation) when calling the DescribeInstances operation: You are not authorized to perform this operation. if e.response['Error']['Code'] == 'UnauthorizedOperation': continue # all other exceptions raised raise e # before exiting, check if a count just completed, and mark region_include as usable if not self.regionInclude_ready and self.filter_region is None: self.regionInclude_ready = True # save to cache self.simpleCacheMan.save_key(key='region_include', value=self.region_include) self.simpleCacheMan.save_key(key='region_accessdenied', value=self.region_accessdenied) # before exiting, if got some AccessDenied errors, display to user # Note 1: originally, I wanted to break the iterator on the 1st AccessDenied error, # thinking that it's because the user doesn't have permission to the service as a whole. # Later, I figured out that maybe the user has permission to a subset of regions, # in which case getting an error on region R1 is normal, # and the iterator should still proceed to the next region R2. if not self.displayed_accessdenied and len( self.region_accessdenied) > 0: # 1st part goes to stdout msgx = "AWS returned AccessDenied errors on %i out of %i regions. Use `isitfit --verbose ...` and re-run the command for more details" msgx = msgx % (len( self.region_accessdenied), len(redshift_regions_sub)) import click click.echo(colored(msgx, "yellow")) # 2nd part is too long, send it to --verbose msg2 = "\n".join( ["- %s" % str(e) for e in self.region_accessdenied]) msgx = "Here are the full error messages:\n%s" msgx = msgx % (msg2) logger.info(colored(msgx, "yellow")) self.displayed_accessdenied = True
def _handleEvent(self, event): # logger.debug("Cloudtrail event: %s"%json.dumps(event, default=json_serial)) if 'Resources' not in event: logger.debug("No 'Resources' key in event. Skipping") return None # ignore this situation instanceId = [ x for x in event['Resources'] if x['ResourceType'] == 'AWS::EC2::Instance' ] if len(instanceId) == 0: logger.debug("No AWS EC2 instances in event. Skipping") return None # ignore this situation # proceed instanceId = instanceId[0] if 'ResourceName' not in instanceId: logger.debug("No ResourceName key in event. Skipping") return None # ignore this situation # proceed instanceId = instanceId['ResourceName'] if 'CloudTrailEvent' not in event: logger.debug("No CloudTrailEvent key in event. Skipping") return None # ignore this situation ce_dict = json.loads(event['CloudTrailEvent']) if 'requestParameters' not in ce_dict: logger.debug( "No requestParameters key in event['CloudTrailEvent']. Skipping" ) return None # ignore this situation if 'instanceType' not in ce_dict['requestParameters']: logger.debug( "No instanceType key in event['CloudTrailEvent']['requestParameters']. Skipping" ) return None # ignore this situation newType = ce_dict['requestParameters']['instanceType'] if 'EventTime' not in event: logger.debug("No EventTime key in event. Skipping") return None # ignore this situation ts_obj = event['EventTime'] # ts_obj = dt.datetime.utcfromtimestamp(ts_int) # ts_str = ts_obj.strftime('%Y-%m-%d %H:%M:%S') result = { 'ServiceName': 'EC2', 'EventName': self.eventName, 'EventTime': ts_obj, # ts_str, 'ResourceName': instanceId, 'ResourceSize1': newType, 'ResourceSize2': None } return result
def _handleEvent(self, event): # logger.debug("Cloudtrail event: %s"%json.dumps(event, default=json_serial)) if 'Resources' not in event: logger.debug("No 'Resources' key in event. Skipping") return None # ignore this situation instanceId = [ x for x in event['Resources'] if x['ResourceType'] == 'AWS::Redshift::Cluster' ] if len(instanceId) == 0: logger.debug("No AWS redshift clusters in event. Skipping") return None # ignore this situation # proceed instanceId = instanceId[0] if 'ResourceName' not in instanceId: logger.debug("No ResourceName key in event. Skipping") return None # ignore this situation # proceed instanceId = instanceId['ResourceName'] if 'CloudTrailEvent' not in event: logger.debug("No CloudTrailEvent key in event. Skipping") return None # ignore this situation ce_dict = json.loads(event['CloudTrailEvent']) import jmespath nodeType = jmespath.search('requestParameters.nodeType', ce_dict) numberOfNodes = jmespath.search('requestParameters.numberOfNodes', ce_dict) if numberOfNodes is None: numberOfNodes = jmespath.search('responseElements.numberOfNodes', ce_dict) if nodeType is None: logger.debug( "No nodeType key in event['CloudTrailEvent']['requestParameters']. Skipping" ) return None # ignore this situation if numberOfNodes is None: logger.debug( "No numberOfNodes key in event['CloudTrailEvent']['requestParameters']. Skipping" ) return None # ignore this situation if 'EventTime' not in event: logger.debug("No EventTime key in event. Skipping") return None # ignore this situation ts_obj = event['EventTime'] # ts_obj = dt.datetime.utcfromtimestamp(ts_int) # ts_str = ts_obj.strftime('%Y-%m-%d %H:%M:%S') result = { 'ServiceName': 'Redshift', # bugfix: was using Ec2 instead of Redshift 'EventName': self.eventName, 'EventTime': ts_obj, # ts_str, 'ResourceName': instanceId, 'ResourceSize1': nodeType, 'ResourceSize2': numberOfNodes, } return result
def get_ifi(self, tqdml2_obj): # display name of runner logger.info(self.description) # 0th pass to count n_ec2_total = self.ec2_it.count() if n_ec2_total == 0: import click click.secho("No resources found in %s" % self.ec2_it.service_description, fg="red") return # context for pre listeners context_pre = {} context_pre['ec2_instances'] = self.ec2_it context_pre['region_include'] = self.ec2_it.get_regionInclude() context_pre['n_ec2_total'] = n_ec2_total context_pre['click_ctx'] = self.ctx context_pre['mainManager'] = self # call listeners for l in self.listeners['pre']: context_pre = l(context_pre) if context_pre is None: raise Exception( "Breaking the chain is not allowed in listener/pre") # iterate over all ec2 instances sum_capacity = 0 sum_used = 0 df_all = [] ec2_noCloudwatch = [] # FIXME DEPRECATED ec2_noCloudtrail = [] # add some spaces for aligning the progress bars desc = "Pass 2/2 through %s" % self.ec2_it.service_description desc = "%-50s" % desc # Edit 2019-11-12 use "initial=0" instead of "=1". Check more details in a similar note in "cloudtrail_ec2type.py" iter_wrap = tqdml2_obj(self.ec2_it, total=n_ec2_total, desc=desc, initial=0) for ec2_dict, ec2_id, ec2_launchtime, ec2_obj in iter_wrap: # context dict to be passed between listeners context_ec2 = {} context_ec2['mainManager'] = self if 'df_cat' in context_pre: context_ec2['df_cat'] = context_pre[ 'df_cat'] # copy object between contexts context_ec2['ec2_dict'] = ec2_dict context_ec2['ec2_id'] = ec2_id context_ec2['ec2_launchtime'] = ec2_launchtime context_ec2['ec2_obj'] = ec2_obj try: # call listeners # Listener can return None to break out of loop, # i.e. to stop processing with other listeners for l in self.listeners['ec2']: context_ec2 = l(context_ec2) # skip rest of listeners if one of them returned None if context_ec2 is None: logger.debug( "Listener %s is breaking per_resource for resource %s" % (l, ec2_id)) break except NoCloudtrailException: ec2_noCloudtrail.append(ec2_id) except IsitfitCliRunnerBreakIterator as e: # check request for breaking from the iterator loop # eg for isitfit cost optimize --n=1 logger.debug("Breaking from the per-resource iterator") break # call listeners #logger.info("... done") #logger.info("") #logger.info("") # set up context context_all = {} context_all['n_ec2_total'] = n_ec2_total context_all['mainManager'] = self context_all['region_include'] = self.ec2_it.region_include if 'df_cat' in context_pre: context_all['df_cat'] = context_pre[ 'df_cat'] # copy object between contexts # more context_all['ec2_noCloudwatch'] = ec2_noCloudwatch # FIXME DEPRECATED context_all['ec2_noCloudtrail'] = ec2_noCloudtrail context_all['click_ctx'] = self.ctx # call listeners for l in self.listeners['all']: context_all = l(context_all) if context_all is None: raise Exception( "Breaking the chain is not allowed in listener/all: %s" % str(l)) # done #logger.info("") return context_all
def processPush(self, dryRun: bool): # max ec2 per call is 20 # but just doing 1 at a time for now # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/resourcegroupstaggingapi.html#ResourceGroupsTaggingAPI.Client.tag_resources import boto3 tagging_client = boto3.client('resourcegroupstaggingapi') ec2_resource = boto3.resource('ec2') account_id = boto3.client('sts').get_caller_identity()['Account'] import json preproc = lambda x: x[sorted(list(x.columns))].set_index('instance_id') self.latest_df = preproc(self.latest_df) self.csv_df = preproc(self.csv_df) from tqdm import tqdm runType_prefix = "Dry run" if dryRun else "Live" for instance_id, row_new in tqdm(self.csv_df.iterrows(), total=self.csv_df.shape[0], desc="Tag CSV row (%s)" % runType_prefix, initial=1): row_old = self.latest_df.loc[instance_id] tags_new = row_new.to_dict() tags_old = row_old.to_dict() if tags_new == tags_old: logger.debug("Skipping %s since no changes" % instance_id) continue # keeping only changed keys keys_dotag = {} for k in tags_new: if not tags_new[k]: continue # empty tags are skipped if k not in tags_old: keys_dotag[k] = tags_new[k] continue if tags_new[k] != tags_old[k]: keys_dotag[k] = tags_new[k] continue # proceed with untagging keys_untag = [] for k in tags_old: if not tags_old[k]: continue # empty tags are skipped if k not in tags_new: keys_untag.append(k) if not keys_dotag and not keys_untag: continue # if any of them set: instance_obj = ec2_resource.Instance(instance_id) instance_arn = 'arn:aws:ec2:%s:%s:instance/%s' % ( instance_obj.placement['AvailabilityZone'][:-1], account_id, instance_id) if keys_dotag: logger.debug( "[%s] Will tag %s with %s" % (runType_prefix, instance_id, json.dumps(keys_dotag))) if not dryRun: response = tagging_client.tag_resources( ResourceARNList=[instance_arn], Tags=keys_dotag) if keys_untag: logger.debug( "[%s] Will untag %s with %s" % (runType_prefix, instance_id, json.dumps(keys_untag))) if not dryRun: response = tagging_client.untag_resources( ResourceARNList=[instance_arn], TagKeys=keys_untag) if dryRun: from termcolor import colored logger.info( colored( "This was a dry run. Execute the same command again with `--not-dry-run` for actual tags push to aws ec2", "red"))