def add_instances(self, instances): ''' Add instances to ELB ''' logging.debug('Adding instances to ELB: %s' % instances, self.verbose) try: self.elb.register_instances(instances) except BotoServerError, msg: logging.error(msg.message)
def wait_for_servers(self, servers, start_time, success): """ Wait for servers that didn't fail to start to start up. """ started = 0 failed = 0 azs_used = list() logging.info("Waiting for %d server(s)to start" % success) stime = self.cconfig.get_sleep_step() for host in self.hostnames: status = servers[host].analyze_state() while status == 1: if check_timed_out(start_time, self.cconfig.get_launch_timeout(), verbose=self.opts.verbose): logging.error("%s failed to start before time out" % host) break logging.info("Wating for %s to enter running state" % host) time.sleep(stime) status = servers[host].analyze_state() if status == 0: started += 1 if servers[host].get_az() not in azs_used: azs_used.append(servers[host].get_az()) logging.info("%s is running" % host) elif status == 2: failed += 1 logging.debug("%d started, %d failed, of %d total" % (started, failed, success), self.opts.verbose) if failed == success: logging.error("All %d server(s) failed to start" % failed) sys.exit(1) elif started != success: logging.error("%d started, %d failed to start" % (started, failed)) else: logging.info("%d of %d started" % (started, success)) if self.cconfig.elb.enabled and azs_used: self.setup_elb(servers, azs_used)
def analyze_state(self, desired_state='up'): ''' Find out if we're done (0), waiting (1), or screwed (2) ''' result = None state = self.get_state() if state: logging.debug( 'Analyze state %s is %s' % (self._instance.id, state), self.verbose) pending_up = ['pending'] pending_down = ['shutting-down', 'stopping'] ec2_up = 'running' ec2_down = ['terminated', 'stopped'] if desired_state is 'up': if state in pending_up: result = 1 elif state in pending_down: result = 2 elif state == ec2_up: self.finalize_start() result = 0 else: if state in pending_down: result = 1 elif state == ec2_up: result = 2 elif state in ec2_down: result = 0 return result else: logging.error('Unable to get status for %s' % self._instance.id) return False
def check_timed_out(start_time, timeout, verbose=False): ''' Times out application if we are running to long. ''' logging.debug( "check_timed_out(): Checking if %d - %d (%d) > %d" % (time.time(), start_time, time.time()-start_time, timeout), verbose) if time.time() - start_time > timeout: return True return False
def remove_instances(self, instances): ''' Remove instances from ELB ''' logging.debug( "Removing instances from ELB: %s" % instances, self.verbose) try: self.elb.deregister_instances(instances) except BotoServerError, msg: logging.error(msg.message)
def new_eip(self): ''' Allocates a new EIP address to an EC2 instance ''' logging.debug('Allocating a new EIP!', self.verbose) self.allocateeips = False try: eip = self.conn.allocate_address() except EC2ResponseError, msg: logging.error(msg.message) return None
def init_elb(self): ''' Initialize ELB, create if does not exist by default ''' try: self.elb = self.conn.get_all_load_balancers( load_balancer_names=[self.name])[0] logging.debug('Found existing ELB: %s' % self.name, self.verbose) except (IndexError, BotoServerError), msg: if self.find_only: logging.info('%s does not exist' % self.name) return False
def create_elb(self): ''' Create an ELB ''' logging.debug('Create ELB %s' % self.name, self.verbose) try: self.elb = self.conn.create_load_balancer( self.name, self.azs_used, self.elb_listeners) logging.info('Created %s: %s' % (self.name, self.elb)) self.created = True except BotoServerError, msg: logging.error(msg.message)
def set_azs(self): ''' Ensure AZs add to ELB from config ''' azs = self.azs_used logging.debug("Trying to add AZs to ELB: %s" % azs, self.verbose) for zone in azs: if zone not in self.elb.availability_zones: logging.debug("Adding %s to ELB" % azs, self.verbose) try: self.elb.enable_zones(zone) except BotoServerError, msg: logging.error(msg.message)
def add_tags(self, rids, tagset, tag_type=None): ''' Set one or more tags on a list of IDs ''' failed = 0 for rid in rids: logging.debug('Tagging resource: %s' % rid, self.verbose) if not self._add_tags(rid, tagset, tag_type=tag_type): failed += 1 if not failed: return True else: return False
def read_files(self, conf_files): ''' Read in ini files ''' logging.debug('Trying %s' % conf_files, self.verbose) for ini in conf_files: if os.path.exists(ini): self.ini_files.append(ini) logging.debug('Reading %s' % self.ini_files, self.verbose) self.ini = ConfigParser.ConfigParser({ 'AWS_BASE_DIR': os.getenv('AWS_BASE_DIR'), 'AWS_CONF_DIR': os.getenv('AWS_CONF_DIR')}) self.ini.read(self.ini_files)
def destroy(self): ''' Teardown and terminate an EC2 Instance ''' logging.debug( 'Checking to see if we need to destory EIP', self.verbose) if self.destroy_eip(): logging.debug('Terminating %s' % self.name, self.verbose) try: self._instance.terminate() except EC2ResponseError, msg: logging.error(msg.message) return None return True
def get_user_data(self, replacements=None): ''' Get userdata and set replacements ''' path = self.get_user_data_file() logging.debug('user_data_file: %s' % path, self.verbose) if not self.user_data_raw: if os.path.exists(path): try: udfile = file(path, "r") except IOError, msg: logging.error(msg) return None self.user_data_raw = udfile.read() udfile.close()
def add_ingress(self, port_pair, protocol, src_cidr=None, src_acct=None, src_sg=None): """ Add Ingress rule """ # pylint: disable=too-many-arguments # Appropriate number arguments of for add_ingress if src_cidr: try: self.sgrp.authorize(protocol, port_pair[0], port_pair[1], src_cidr) return True except EC2ResponseError, msg: if msg.error_code == "InvalidPermission.Duplicate": logging.debug(msg.error_message, self.verbose) else: logging.error(msg.error_message) return False
def re_associate_eip(self, steal=False, eip=None): ''' Reassociates an EIP address to an EC2 instance ''' if self.node_db: logging.debug('Checking for EIP', self.verbose) eip = self.get_eip(steal) else: logging.info('No external data source is defined, skipping') if eip: logging.debug( 'Will re-associate EIP %s to %s' % (eip.public_ip, self.inst_id), self.verbose) self.reeip = eip else: self.reeip = None
def wake(self): ''' Wake instances in cluster. ''' count = 0 for iid in self.c3instances: if iid.wake(): logging.info('Waiting for %s to start' % iid.name) if wait_for_instance(iid, verbose=self.verbose): logging.debug('Wait for %s successful' % iid.name, self.verbose) count += 1 if count != len(self.c3instances): logging.warn( 'Asked for %d but only %d started' % (len(self.c3instances), count)) return count
def hibernate(self): ''' Hibernate an EC2 instance ''' if self.get_state() == 'running': name_tag = self._instance.tags.get('Name') try: self._instance.stop() except EC2ResponseError, msg: logging.error(msg.message) return None logging.debug( 'stopped (%s: %s) state: %s' % (self.inst_id, name_tag, self.get_state()), self.verbose) if self.node_db: return self.set_state('hibernating') else: return True
def finalize_start(self): ''' Perform EC2 actions once the instance is finally 'running' ''' logging.debug( 'Finalize start: %s' % self.inst_id, self.verbose) if self.allocateeips: return self.new_eip() if self.reeip: logging.debug('Reassociating EIP %s with instance %s' % (self.reeip, self.inst_id), self.verbose) try: self.reeip.associate(self.inst_id) self.reeip = None return True except EC2ResponseError, msg: logging.error(msg.message) return None
def start(self, ami, sshkey, sgs, user_data, hostname, isize, zone, nodegroups, allocateeips, use_ebsoptimized): ''' Starts an EC2 instance ''' # pylint:disable=too-many-arguments # Required for boto API logging.debug( 'C3Instance.start(%s, %s, %s, %s, %s, %s, %s, %s)' % (ami, sshkey, sgs, len(user_data), hostname, isize, zone, nodegroups), self.verbose) try: self._reservation = self.conn.run_instances( ami, 1, 1, sshkey, sgs, user_data, None, isize, zone, None, None, False, None, None, ebs_optimized=use_ebsoptimized) except EC2ResponseError, msg: logging.error(msg.message) return None
def wake(self): ''' Start an EC2 instance that is stopped ''' if self.get_state() == 'stopped': name_tag = self._instance.tags.get('Name') logging.debug('Attempt starting instance %s' % name_tag, self.verbose) self.re_associate_eip() try: self._instance.start() except EC2ResponseError, msg: logging.error(msg.message) return None logging.debug( 'Start succeeded (%s: %s) state: %s' % (self.inst_id, name_tag, self.get_state()), self.verbose) if self.node_db: return self.set_state('inservice') else: return True
def wait_for_instance(instance, desired_state="up", timeout=120, verbose=False): ''' Waits for instance to enter desired state. ''' logging.debug('Instance: %s, Desired State: %s' % (instance.name, desired_state), verbose) stt = time.time() while time.time() - stt < timeout: time.sleep(10) state = instance.analyze_state(desired_state) if state == 0: logging.debug('Instance entered desired state: %s' % desired_state, verbose) return True elif state == 2: logging.error('Instance failed to enter desired state: %s' % desired_state) return False logging.error( 'Waiting for %s timed out after %ds' % (instance.name, timeout)) return False
def get_resolved_ami(self, node_db): ''' Return resolved AMI ''' ami = self.get_ami() if ami[:4] == 'ami-': logging.error( 'AMI statically set to %s. Please use graffiti values' % ami) return ami try: amis = node_db.get_amis(self.get_cg_region(), ami) except: raise AMINotFoundError("No AMI matching '%s' found" % ami) if amis is None: raise AMINotFoundError("No AMI matching '%s' found" % ami) if len(amis) == 1: newami = amis.values()[0] self.set_ami(newami) logging.debug( "Converted '%s' to '%s'" % (ami, newami), self.verbose) return newami elif len(amis) > 1: raise TooManyAMIsError("%s matches too many AMIs: %s" % (ami, amis))
def read_rds_sg_config(self): ''' Reads RDS SG authorizations from ini files. ''' for rule in self.ini.items('rds_securitygroup'): if re.match('.*rule', rule[0]): (rtype, rvalue) = rule[1].split(':') if rtype == 'Net': cidr = c3.utils.naming.get_cidr(rvalue) if cidr: logging.debug('Appending RDS CIDR rule %s' % cidr, self.verbose) self.rds_sg.add_cidr(cidr) elif rtype == 'CIDR': logging.debug('Appending RDS CIDR rule %s' % rvalue, self.verbose) self.rds_sg.add_cidr(rvalue) elif rtype == 'SG': (oid, sid) = rvalue.split('/') if oid != 'self': acctid = c3.utils.accounts.get_account_id(oid) else: acctid = c3.utils.accounts.get_account_id( self.get_aws_account()) if acctid: logging.debug( 'Appending RDS SG rule %s:%s' % (acctid, sid), self.verbose) self.rds_sg.add_sg(acctid, sid) else: logging.warn("Can't find account for %s" % oid)
def destroy_eip(self): ''' Destroy EIP address associated with an EC2 instance ''' eip = self.get_associated_eip() if eip: logging.info('Disassociating EIP from %s' % self.name) try: eip.disassociate() except EC2ResponseError, msg: logging.error(msg.message) return None tries = 1 while tries <= 10: try: eip.release() logging.debug( 'EIP released on try %d' % tries, self.verbose) return True except EC2ResponseError, msg: logging.error(msg.message) return None tries += 1 time.sleep(5)
def read_sg_config(self): ''' Reads in SG config options ''' for item in self.ini.items("securitygroup"): if item[1][:7] == "ingress": (rtype, proto, ports, remote) = item[1].split(" ") if ports == "None": (prt1, prt2) = [-1, -1] elif '-' in ports: (prt1, prt2) = ports.split("-") else: prt1 = prt2 = ports prt1 = int(prt1) prt2 = int(prt2) if remote[:5] == 'CIDR:': self.sgrp.add_cidr(proto, prt1, prt2, remote[5:]) elif remote[:4] == 'Net:': cidr = c3.utils.naming.get_cidr(remote[4:]) if not cidr: raise InvalidCIDRNameError( "Network '%s' is invalid" % remote[4:]) self.sgrp.add_cidr(proto, prt1, prt2, cidr) elif remote[:3] == 'SG:': acct, sgrp = remote[3:].split("/") if acct == 'self': acctid = c3.utils.accounts.get_account_id( account_name=self.get_aws_account()) elif acct == 'amazon-elb': logging.debug('acctid set to amazon-elb', self.verbose) acctid = 'amazon-elb' else: acctid = c3.utils.accounts.get_account_id( account_name=acct) logging.debug('%s == %s' % (acct, acctid), self.verbose) if acctid: self.sgrp.add_sg(proto, prt1, prt2, acctid, sgrp) else: logging.error("Can't find my own account.") logging.debug( "Allowing %s %s for ports %d to %d from %s" % (rtype, proto, prt1, prt2, remote), self.verbose)
return True except EC2ResponseError, msg: if msg.error_code == "InvalidPermission.Duplicate": logging.debug(msg.error_message, self.verbose) else: logging.error(msg.error_message) return False elif src_acct and src_sg: try: self.conn.authorize_security_group( self.name, src_sg, src_acct, protocol, port_pair[0], port_pair[1], None ) return True except EC2ResponseError, msg: if msg.error_code == "InvalidPermission.Duplicate": logging.debug(msg.error_message, self.verbose) else: logging.error(msg.error_message) return False else: logging.error( "add_ingress(port_pair=%s, protocol=%s, src_cidr=%s, " "rc_acct=%s, src_sg=%s) FAILED (missing data?)" % (port_pair, protocol, src_cidr, src_acct, src_sg) ) return False def destroy(self): """ Destroys a Security Group """ stime = 10 timeout = 120 while timeout > 0:
return True except EC2ResponseError, msg: logging.error(msg.message) return None def get_eip(self, steal=False): ''' Figure out if my hostname is an EIP ''' if self.eip: return self.eip data = self.node_db.get_node_by_instance_id(self.inst_id)[0] try: myip = socket.gethostbyname(data['ec2_public_hostname']) except socket.gaierror, msg: logging.error(msg) return None logging.debug('myip: %s' % myip, self.verbose) eip = self.get_eip_by_addr(myip) if eip: logging.debug('EIP: %s associated with %s' % (eip, eip.instance_id), self.verbose) if steal or not eip.instance_id: return eip return None def get_associated_eip(self): ''' Return EIP associated with EC2 instance ''' try: eips = self.conn.get_all_addresses() except EC2ResponseError, msg: logging.error(msg.message) return None
path = self.get_user_data_file() logging.debug('user_data_file: %s' % path, self.verbose) if not self.user_data_raw: if os.path.exists(path): try: udfile = file(path, "r") except IOError, msg: logging.error(msg) return None self.user_data_raw = udfile.read() udfile.close() udata = self.user_data_raw if replacements: for key in replacements.keys(): logging.debug( 'Replacing %s with %s in %s' % (key, replacements[key], path), self.verbose) udata = udata.replace(key, replacements[key]) return udata.strip() def get_tagset(self): ''' Return the tagset cost tags ''' self.tagset['BusinessUnit'] = self.get_ini("tags", "business_unit", str) self.tagset['Team'] = self.get_ini("tags", "team", str) self.tagset['Project'] = self.get_ini("tags", "project", str) if any(ent for ent in self.ini_files if ent.endswith('meta.ini')): self.tagset['Component'] = self.get_ini("tags", "component", str) self.tagset['Env'] = self.get_ini("tags", "env", str) else: comp = self.get_ini("tags", "component", str) if comp[:4] == self.server_class + ' ':
def cluster_create(self): """ Provisions a new cluster based on a config. """ self.conn = self.aws_conn("ec2") node_db = nv_connect(self.opts.nv_ini) success = 0 failed = 0 self.check_config_types() logging.info("Applying SG Rules to %s" % self.cconfig.get_primary_sg()) self.sg_rules() if self.cconfig.get_count(): servers = dict() logging.debug( "Creating %d %s in %s using %s." % (self.cconfig.get_count(), self.cconfig.get_size(), self.cconfig.get_azs(), self.cconfig.get_ami()), self.opts.verbose, ) self.hostnames = c3.utils.naming.find_available_hostnames( self.cconfig.get_primary_sg(), self.cconfig.get_count(), self.cconfig.get_aws_account(), self.cconfig.get_aws_region(), "ctgrd.com", node_db, ) start_time = time.time() logging.debug("Creating new servers: %s" % self.hostnames, self.opts.verbose) for host in self.hostnames: servers[host] = C3Instance(conn=self.conn, node_db=node_db, verbose=self.opts.verbose) userdata = self.cconfig.get_user_data(self.userdata_replacements(host)) tries = 1 if self.opts.substitute_zones: tries = len(self.cconfig.get_azs()) while tries > 0: tries -= 1 used_az = self.cconfig.get_next_az() logging.info("Starting %s in %s" % (host, used_az)) instance = servers[host].start( self.cconfig.get_ami(), self.cconfig.get_ssh_key(), self.cconfig.get_sgs(), userdata, host, self.cconfig.get_size(), used_az, self.cconfig.get_node_groups(), self.cconfig.get_allocate_eips(), self.cconfig.get_use_ebs_optimized(), self.cconfig.get_placement_group(), ) if instance: success += 1 break else: if tries: logging.warn("Failed to create %s in %s, retrying" % (host, used_az)) else: logging.error("Failed to create %s in all AZs, trying next instance" % host) failed += 1 if len(self.cconfig.get_ebs_config()) > 0: self.create_ebs(used_az, host, servers[host].get_id()) if failed == self.cconfig.get_count(): logging.error("%d of %d failed to create, dying" % (failed, self.cconfig.get_count())) sys.exit(1) logging.info("%d of %d server(s) created" % (success, self.cconfig.get_count())) self.wait_for_servers(servers, start_time, success) if self.volume_instances: self.attach_ebs() self.tag_by_instance(servers) if self.cconfig.get_server_env() == "prd": self.puppet_whitelist() logging.info("Cluster config complete")
return True else: return False def tag_s3_bucket(self, rid, tagset): ''' Tags S3 buckets with complicated s3 tagging shenanigans ''' new_tags = list() failed = 0 logging.info('Tagging S3 bucket %s' % rid) try: bucket = self.conn.get_bucket(rid) except S3ResponseError, msg: logging.error(msg.message) failed += 1 try: logging.debug('Check for existing tags...', self.verbose) existing_tags = bucket.get_tags()[0] logging.debug('Existing tags: %s' % existing_tags.to_xml(), self.verbose) except S3ResponseError, msg: logging.error(msg.message) existing_tags = dict() try: tags = boto.s3.tagging.Tags() tset = boto.s3.tagging.TagSet() except S3ResponseError, msg: logging.error(msg.message) failed += 1 for tname, tvalue in tagset.items(): logging.debug('%s:%s is a new tag' % (tname, tvalue), self.verbose) try: tset.add_tag(tname, tvalue)