def _get_instance(self, vm_id): # contact EC2 API to get VM info try: reservations = self.conn.get_all_instances(instance_ids=[vm_id]) except boto.exception.EC2ResponseError as err: # scrape actual error kind and message out of the # exception; we do this mostly for sensible logging, but # could be an actual improvement to Boto to provide # different exception classes based on the <Code> # element... # XXX: is there a more robust way of doing this? match = _BOTO_ERRMSG_RE.search(str(err)) if match: raise UnrecoverableError( "Error getting info on VM %s: EC2ResponseError/%s: %s" % (vm_id, match.group('code'), match.group('message')), do_log=True) else: # fall back to normal reporting... raise UnrecoverableError("Error getting VM %s: %s" % (vm_id, err), do_log=True) if not reservations: raise InstanceNotFound("No instance with id %s has been found." % vm_id) instances = dict( (i.id, i) for i in reservations[0].instances if reservations) if vm_id not in instances: raise UnrecoverableError("No instance with id %s has been found." % vm_id) return instances[vm_id]
def get_vm(self, vm_id, force_reload=False): """ Return the VM object with id `vm_id`. If it is found in the local cache, that object is returned. Otherwise a new VM object is searched for in the EC2 endpoint. """ # return cached info, if any if not force_reload and vm_id in self._vm_cache: return self._vm_cache[vm_id] # XXX: should this be an `assert` instead? if not self.conn: raise UnrecoverableError( "No connection set for `VMPool('%s')`" % self.path) vm = self._get_instance(vm_id) if not hasattr(vm, 'preferred_ip'): # read from file vm.preferred_ip = gc3libs.utils.read_contents( os.path.join(self.path, vm.id)) self._vm_cache[vm_id] = vm if vm_id not in self._vm_ids: self._vm_ids.add(vm_id) self.changed = True return vm
def _setup_security_groups(self): """ Check the current configuration and set up the security group if it does not exist. """ self._connect() if not self.security_group_name: gc3libs.log.error("Group name in `security_group_name`" " configuration option cannot be empty!") return try: self._get_security_group(self.security_group_name) except NotFound: try: gc3libs.log.info("Creating security group %s", self.security_group_name) self.client.security_groups.create( self.security_group_name, "GC3Pie_%s" % self.security_group_name) except Exception as ex: gc3libs.log.error("Error creating security group %s: %s", self.security_group_name, ex) raise UnrecoverableError( "Error creating security group %s: %s" % (self.security_group_name, ex)) self._get_security_group(self.security_group_name)
def _import_keypair(self): """ Create a new keypair and import the public key defined in the configuration file. """ with open(os.path.expanduser(self.public_key)) as fd: try: key_material = fd.read() imported_key = self._conn.import_key_pair( self.keypair_name, key_material) gc3libs.log.info( "Successfully imported key `%s`" " with fingerprint `%s` as keypair `%s`", imported_key.name, imported_key.fingerprint, self.keypair_name) except Exception as ex: raise UnrecoverableError("Error importing keypair %s: %s" % (self.keypair_name, ex))
def _import_keypair(self): """ Create a new keypair and import the public key defined in the configuration file. """ fd = open(os.path.expanduser(self.public_key)) try: key_material = fd.read() self.client.keypairs.create(self.keypair_name, key_material) keypair = self.client.keypairs.get(self.keypair_name) gc3libs.log.info( "Successfully imported key `%s` with fingerprint `%s`" " as keypair `%s`" % (self.public_key, keypair.fingerprint, self.keypair_name)) return keypair except Exception as ex: fd.close() raise UnrecoverableError("Error importing keypair %s: %s" % (self.keypair_name, ex))
def _setup_security_groups(self): """ Check the current configuration and set up the security group if it does not exist. """ if not self.security_group_name: gc3libs.log.error("Group name in `security_group_name`" " configuration option cannot be empty!") return security_groups = self._conn.get_all_security_groups() groups = dict((g.name, g) for g in security_groups) # Check if the security group exists already if self.security_group_name not in groups: try: gc3libs.log.info("Creating security group %s", self.security_group_name) security_group = self._conn.create_security_group( self.security_group_name, "GC3Pie_%s" % self.security_group_name) except Exception as ex: gc3libs.log.error("Error creating security group %s: %s", self.security_group_name, ex) raise UnrecoverableError( "Error creating security group %s: %s" % (self.security_group_name, ex)) for rule in self.security_group_rules: try: gc3libs.log.debug("Adding rule %s to security group %s.", rule, self.security_group_name) security_group.authorize(**rule) except Exception as ex: if gc3libs.error_ignored( # context: # - module 'ec2', # - class 'EC2Lrms', # - method 'setup_security_groups', # - actual error class ex.__class__.__name__, # - additional keywords 'setup', 'security', 'network', 'cloud', ): gc3libs.log.info( "Ignoring error adding rule %s" " to security group %s: %s", rule, self.security_group_name, ex) else: # propagate exception to caller raise else: # Check if the security group has all the rules we want security_group = groups[self.security_group_name] current_rules = [] for rule in security_group.rules: rule_dict = { 'ip_protocol': rule.ip_protocol, 'from_port': int(rule.from_port), 'to_port': int(rule.to_port), 'cidr_ip': str(rule.grants[0]), } current_rules.append(rule_dict) for new_rule in self.security_group_rules: if new_rule not in current_rules: security_group.authorize(**new_rule)
def _create_instance(self, image_id, instance_type=None, user_data=None): """ Create an instance using the image `image_id` and instance type `instance_type`. If not `instance_type` is defined, use the default. This method will also setup the keypair and the security groups, if needed. """ self._connect() args = {'key_name': self.keypair_name, 'min_count': 1, 'max_count': 1} if instance_type: args['instance_type'] = instance_type if user_data: args['user_data'] = user_data # Check if the desired keypair is present keypairs = dict((k.name, k) for k in self._conn.get_all_key_pairs()) if self.keypair_name not in keypairs: gc3libs.log.info( "Keypair `%s` not found: creating it using public key `%s`" % (self.keypair_name, self.public_key)) # Create keypair if it does not exist and give an error if it # exists but have different fingerprint self._import_keypair() else: self._have_keypair(keypairs[self.keypair_name]) # Setup security groups if 'security_group_name' in self: self._setup_security_groups() args['security_groups'] = [self.security_group_name] # FIXME: we should add check/creation of proper security # groups gc3libs.log.debug("Create new VM using image id `%s`", image_id) try: reservation = self._conn.run_instances(image_id, **args) except boto.exception.EC2ResponseError as err: # scrape actual error kind and message out of the # exception; we do this mostly for sensible logging, but # could be an actual improvement to Boto to provide # different exception classes based on the <Code> # element... # XXX: is there a more robust way of doing this? match = _BOTO_ERRMSG_RE.search(str(err)) if match: raise UnrecoverableError( "Error starting instance: EC2ResponseError/%s: %s" % (match.group('code'), match.group('message'))) else: # fall back to normal reporting... raise UnrecoverableError("Error starting instance: %s" % err) except Exception as ex: raise UnrecoverableError("Error starting instance: %s" % ex) vm = reservation.instances[0] self._vmpool.add_vm(vm) gc3libs.log.info( "VM with id `%s` has been created and is in %s state.", vm.id, vm.state) return vm
def __init__( self, name, # these parameters are inherited from the `LRMS` class architecture, max_cores, max_cores_per_job, max_memory_per_core, max_walltime, # these are specific of the EC2Lrms class ec2_region, keypair_name, public_key, vm_auth, image_id=None, image_name=None, ec2_url=None, instance_type=None, auth=None, vm_pool_max_size=None, user_data=None, **extra_args): LRMS.__init__(self, name, architecture, max_cores, max_cores_per_job, max_memory_per_core, max_walltime, auth, **extra_args) self.free_slots = int(max_cores) self.user_run = 0 self.user_queued = 0 self.queued = 0 self.vm_pool_max_size = vm_pool_max_size if vm_pool_max_size is not None: try: self.vm_pool_max_size = int(self.vm_pool_max_size) except ValueError: raise ConfigurationError( "Value for `vm_pool_max_size` must be an integer," " was %s instead." % vm_pool_max_size) self.subresource_type = self.type.split('+', 1)[1] if self.subresource_type not in available_subresource_types: raise UnrecoverableError("Invalid resource type: %s" % self.type) self.region = ec2_region # Mapping of job.execution._lrms_vm_id => LRMS self.subresources = {} auth = self._auth_fn() self.ec2_access_key = auth.ec2_access_key self.ec2_secret_key = auth.ec2_secret_key if ec2_url is None: ec2_url = os.getenv('EC2_URL') if ec2_url is None: raise gc3libs.exceptions.InvalidArgument( "Cannot connect to the EC2 API:" " No 'EC2_URL' environment variable defined," " and no 'ec2_url' argument passed to the EC2 backend.") self.ec2_url = gc3libs.url.Url(ec2_url) # Keypair names can only contain alphanumeric chars! if re.match(r'.*\W.*', keypair_name): raise ConfigurationError( "Keypair name `%s` is invalid: keypair names can only contain " "alphanumeric chars: [a-zA-Z0-9_]" % keypair_name) self.keypair_name = keypair_name self.public_key = os.path.expanduser( os.path.expandvars(public_key.strip())) self.image_id = image_id self.image_name = image_name self.instance_type = instance_type self._instance_type_specs = {} self.user_data = user_data self._parse_security_group() self._conn = None # `self.subresource_args` is used to create subresources self.subresource_args = extra_args self.subresource_args['type'] = self.subresource_type self.subresource_args['architecture'] = self['architecture'] self.subresource_args['max_cores'] = self['max_cores'] self.subresource_args['max_cores_per_job'] = self['max_cores_per_job'] self.subresource_args['max_memory_per_core'] = \ self['max_memory_per_core'] self.subresource_args['max_walltime'] = self['max_walltime'] # SSH-specific configuration self.subresource_args['transport'] = 'ssh' self.subresource_args['auth'] = vm_auth self.subresource_args['ssh_timeout'] = 7 # FIXME: hard-coded! self.subresource_args['ignore_ssh_host_keys'] = True self.subresource_args['keyfile'] = self.public_key if self.subresource_args['keyfile'].endswith('.pub'): self.subresource_args['keyfile'] = \ self.subresource_args['keyfile'][:-len('.pub')] # ShellcmdLrms by default trusts the configuration, instead of # checking the real amount of memory and number of cpus, but # we need the real values instead. if self.subresource_type == gc3libs.Default.SHELLCMD_LRMS: self.subresource_args['override'] = 'True' if not image_name and not image_id: raise ConfigurationError( "No `image_id` or `image_name` has been specified in the" " configuration file.")
def _create_instance(self, image_id, name='gc3pie-instance', instance_type=None, user_data=None): """ Create an instance using the image `image_id` and instance type `instance_type`. If no `instance_type` is defined, use the default. This method will also setup the keypair and the security groups, if needed. """ args = {} if user_data: args['userdata'] = user_data # Check if the desired keypair is present try: keypair = self._get_keypair(self.keypair_name) except NotFound: gc3libs.log.info( "Keypair `%s` not found: creating it using public key `%s`" % (self.keypair_name, self.public_key)) # Create keypair if it does not exist and give an error if it # exists but have different fingerprint self._import_keypair() else: self._have_keypair(keypair) instance_type = instance_type or self.instance_type # Setup security groups if 'security_group_name' in self: self._setup_security_groups() args['security_groups'] = [self.security_group_name] # FIXME: we should add check/creation of proper security # groups nics = None if self.network_ids: nics=[{'net-id': netid.strip(), 'v4-fixed-ip': ''} for netid in self.network_ids.split(',')] gc3libs.log.debug("Specifying networks for vm %s: %s", name, str.join(', ', [nic['net-id'] for nic in nics])) args['nics'] = nics gc3libs.log.debug("Create new VM using image id `%s`", image_id) try: vm = self.client.servers.create(name, image_id, instance_type, key_name=self.keypair_name, **args) except Exception as err: # scrape actual error kind and message out of the # exception; we do this mostly for sensible logging, but # could be an actual improvement to Boto to provide # different exception classes based on the <Code> # element... # XXX: is there a more robust way of doing this? # fall back to normal reporting... raise UnrecoverableError("Error starting instance: %s" % err) self._vmpool.add_vm(vm) gc3libs.log.info( "VM with id `%s` has been created and is in %s state.", vm.id, vm.status) return vm
def __init__(self, name, # these parameters are inherited from the `LRMS` class architecture, max_cores, max_cores_per_job, max_memory_per_core, max_walltime, # these are specific of the OpenStackLrms class keypair_name, public_key, vm_auth, os_region=None, image_id=None, os_auth_url=None, instance_type=None, auth=None, vm_pool_max_size=None, user_data=None, vm_os_overhead=gc3libs.Default.VM_OS_OVERHEAD, # extra args are used to instanciate "sub-resources" **extra_args): # Note: this creates attributes from key/value pairs given in the # `extra_args` parameters. In particular, the `self.type` attribute # (referenced below) is set in this chained constructor... LRMS.__init__( self, name, architecture, max_cores, max_cores_per_job, max_memory_per_core, max_walltime, auth, **extra_args) self.free_slots = int(max_cores) self.user_run = 0 self.user_queued = 0 self.queued = 0 self._flavors = [] self.vm_pool_max_size = vm_pool_max_size if vm_pool_max_size is not None: try: self.vm_pool_max_size = int(self.vm_pool_max_size) except ValueError: raise ConfigurationError( "Value for `vm_pool_max_size` must be an integer," " was %s instead." % vm_pool_max_size) # pylint: disable=no-member self.subresource_type = self.type.split('+', 1)[1] if self.subresource_type not in available_subresource_types: raise UnrecoverableError("Invalid resource type: %s" % self.type) # Mapping of job.execution.instance_id => LRMS self.subresources = {} auth = self._auth_fn() if os_auth_url is None: os_auth_url = os.getenv('OS_AUTH_URL') if os_auth_url is None: raise gc3libs.exceptions.InvalidArgument( "Cannot connect to the OpenStack API:" " No 'OS_AUTH_URL' environment variable defined," " and no 'os_auth_url' argument passed" " to the OpenStack backend.") self.os_auth_url = os_auth_url self.os_username = auth.os_username self.os_password = auth.os_password self.os_tenant_name = auth.os_project_name self.os_region_name = os_region if self.os_auth_url is None: raise gc3libs.exceptions.InvalidArgument( "Cannot connect to the OpenStack API:" " No 'os_auth_url' argument passed to the OpenStack backend.") # Keypair names can only contain alphanumeric chars! if not set(keypair_name).issubset(set(ascii_letters + digits + '_')): raise ConfigurationError( "Keypair name `%s` is invalid: keypair names can only contain " "alphanumeric chars: [a-zA-Z0-9_]" % keypair_name) self.keypair_name = keypair_name self.public_key = os.path.expanduser( os.path.expandvars(public_key.strip())) self.image_id = image_id self.instance_type = instance_type self.user_data = user_data self.vm_os_overhead = gc3libs.quantity.Memory(vm_os_overhead) self._parse_security_group() self._conn = None # `*_instance_type` config items should be consumed here, # not in any sub-resource for key, value in extra_args.items(): if key.endswith('_instance_type'): self[key] = value extra_args.pop(key) # `self.subresource_args` is used to create subresources self.subresource_args = extra_args self.subresource_args['type'] = self.subresource_type self.subresource_args['architecture'] = self['architecture'] self.subresource_args['max_cores'] = self['max_cores'] self.subresource_args['max_cores_per_job'] = self['max_cores_per_job'] self.subresource_args['max_memory_per_core'] = \ self['max_memory_per_core'] self.subresource_args['max_walltime'] = self['max_walltime'] # SSH-specific configuration self.subresource_args['transport'] = 'ssh' self.subresource_args['auth'] = vm_auth self.subresource_args['ssh_timeout'] = 7 # FIXME: hard-coded! self.subresource_args['ignore_ssh_host_keys'] = True self.subresource_args['keyfile'] = self.public_key if self.subresource_args['keyfile'].endswith('.pub'): self.subresource_args['keyfile'] = \ self.subresource_args['keyfile'][:-len('.pub')] # ShellcmdLrms by default trusts the configuration, instead of # checking the real amount of memory and number of cpus, but # we need the real values instead. if self.subresource_type == gc3libs.Default.SHELLCMD_LRMS: self.subresource_args['override'] = 'True' if image_id is None: raise ConfigurationError( "No `image_id` specified in the configuration file.") # "Connect" to the cloud (connection is actually performed # only when needed by the `Client` class. self.client = self._new_client() # Set up the VMPool persistent class. This has been delayed # until here because otherwise self._conn is None pooldir = os.path.join(os.path.expandvars(OpenStackLrms.RESOURCE_DIR), 'vmpool', self.name) self._vmpool = OpenStackVMPool(pooldir, self.client)