def _ensure_repos(self, updatedb=True): if "release-latest" in self.repo_url or "release-stable" in self.repo_url: LOG.warn("Special branches release/latest and release/stable currently doesn't work") self.repo_url = devel_repo_url_for_branch("master") repo = pkgmgr.repository("scalr-{0}".format(self.repository), self.repo_url) # Delete previous repository for filename in glob.glob(os.path.dirname(repo.filename) + os.path.sep + "scalr*"): if os.path.isfile(filename): os.remove(filename) if "buildbot.scalr-labs.com" in self.repo_url and not linux.os.windows: self._configure_devel_repo(repo) elif linux.os.debian_family: self._apt_pin_release("scalr") # make downgrades possible elif linux.os.redhat_family or linux.os.oracle_family: self._yum_prioritize(repo) # Ensure new repository repo.ensure() if updatedb: LOG.info("Updating packages cache") def do_updatedb(): try: self.pkgmgr.updatedb() return True except: LOG.warn("Package manager error", exc_info=sys.exc_info()) wait_until(do_updatedb, sleep=10, timeout=120)
def create_volume(conn, name, zone_id, size=None, disk_offering_id=None, snap_id=None, logger=None, timeout=DEFAULT_TIMEOUT): logger = logger or LOG if snap_id: disk_offering_id = None msg = "Creating volume '%s' in zone %s%s%s%s" % ( name, zone_id, size and ' (size: %sG)' % size or '', snap_id and ' from snapshot %s' % snap_id or '', disk_offering_id and ' with disk offering %s' % disk_offering_id or '' ) logger.debug(msg) if snap_id: wait_snapshot(conn, snap_id, logger) vol = conn.createVolume(name, size=size, diskOfferingId=disk_offering_id, snapshotId=snap_id, zoneId=zone_id) logger.debug('Volume %s created%s', vol.id, snap_id and ' from snapshot %s' % snap_id or '') if vol.state not in AVAIL_STATES: logger.debug('Checking that volume %s is available', vol.id) wait_until( lambda: conn.listVolumes(id=vol.id)[0].state in AVAIL_STATES, logger=logger, timeout=timeout, error_text="Volume %s wasn't available in a reasonable time" % vol.id ) logger.debug('Volume %s available', vol.id) return vol
def make_snapshot(self, volume): prepared_image_path = os.path.join(self.destination, self.image_name) LOG.debug('sgp_dd image into volume %s' % volume.device) system2(('sgp_dd', 'if='+prepared_image_path, 'of='+volume.device, 'bs=8k', 'count=%s' % (self.image_size*1024*1024/8))) # coreutils.dd(**{'if': prepared_image_path, 'of': volume.device, 'bs': '8M'}) volume.mount() self.clean_snapshot(volume) LOG.debug('detaching volume') volume.detach() LOG.debug('Making snapshot of volume %s' % volume.device) snapshot = volume.snapshot() util.wait_until( lambda: snapshot.status() == 'completed', logger=LOG, error_text='EBS snapshot %s wasnt completed' % snapshot.id) LOG.debug('Snapshot is made') volume.ensure(mount=True) return snapshot.id
def on_BeforeHostTerminate(self, *args): cassandra.start_service() err = system2('nodetool -h localhost decommission', shell=True)[2] if err: raise HandlerError('Cannot decommission node: %s' % err) wait_until(self._is_decommissioned, timeout=300, error_text="Node wasn't decommissioned in a reasonable time") cassandra.stop_service()
def on_host_init_response(self, hir): LOG.info('Configuring block device mountpoints') with bus.initialization_op as op: with op.phase(self._phase_plug_volume): wait_until( self._plug_all_volumes, sleep=10, timeout=600, error_text= 'Cannot attach and mount disks in a reasonable time') volumes = hir.body.get('volumes') or [] if volumes: LOG.debug('HIR volumes: %s', volumes) for i in range(0, len(volumes)): vol = volumes[i] template = vol.pop('template', None) from_template_if_missing = vol.pop('from_template_if_missing', None) vol = storage2.volume(**vol) LOG.info('Ensuring %s volume %s', vol.type, dict(vol)) try: vol.ensure(mount=bool(vol.mpoint), mkfs=True) except storage2.VolumeNotExistsError, e: if template and from_template_if_missing == '1': vol = storage2.volume(**template) LOG.warn( 'Volume %s not exists, re-creating %s volume from config: %s', str(e), vol.type, dict(vol)) vol.ensure(mount=bool(vol.mpoint), mkfs=True) else: raise self._volumes.append(dict(vol))
def _detach_volume(self): volume_id = self.id self._check_cinder_connection() volume = self._cinder.volumes.get(volume_id) LOG.debug('Detaching Cinder volume %s', volume_id) if volume.status != 'available': try: # self._cinder.volumes.detach(volume_id) self._check_nova_connection() server_id = volume.attachments[0]['server_id'] self._nova.volumes.delete_server_volume(server_id, volume_id) except BaseException, e: LOG.error('Exception caught when detaching volume: %s', e) LOG.debug('Checking that Cinder volume %s is detached ' 'and available', volume_id) def exit_condition(): vol = self._cinder.volumes.get(volume_id) return vol.status == 'available' msg = "Cinder volume %s is not in 'available' state. " \ "Timeout reached (%s seconds)" % \ (volume_id, self._global_timeout) util.wait_until( exit_condition, logger=LOG, timeout=self._global_timeout, error_text=msg) LOG.debug('Cinder volume %s is available', volume_id)
def start(self): try: if not self.running: #TODO: think about moving this code elsewhere if self.port == __redis__['defaults']['port']: base_dir = self.redis_conf.dir snap_src = os.path.join(base_dir, __redis__['db_filename']) snap_dst = os.path.join(base_dir, get_snap_db_filename(__redis__['defaults']['port'])) if os.path.exists(snap_src) and not os.path.exists(snap_dst): shutil.move(snap_src, snap_dst) if 'snapshotting' == __redis__["persistence_type"]: self.redis_conf.dbfilename = snap_dst aof_src = os.path.join(base_dir, __redis__['aof_filename']) aof_dst = os.path.join(base_dir, get_aof_db_filename(__redis__['defaults']['port'])) if os.path.exists(aof_src) and not os.path.exists(aof_dst): shutil.move(aof_src, aof_dst) if 'aof' == __redis__["persistence_type"]: self.redis_conf.appendfilename = aof_dst LOG.debug('Starting %s on port %s' % (__redis__['redis-server'], self.port)) system2('%s %s -s %s -c "%s %s"' % ( __redis__['su'], __redis__['defaults']['user'], __redis__['bash'], __redis__['redis-server'], self.config_path), shell=True, close_fds=True, preexec_fn=os.setsid) wait_until(lambda: self.running) wait_until(lambda: self.cli.test_connection()) LOG.debug('%s process has been started.' % SERVICE_NAME) except PopenError, e: LOG.error('Unable to start redis process: %s' % e) raise initdv2.InitdError(e)
def start(self): try: if not self.running: #TODO: think about moving this code elsewhere if self.port == DEFAULT_PORT: base_dir = self.redis_conf.dir snap_src = os.path.join(base_dir, DB_FILENAME) snap_dst = os.path.join(base_dir, get_snap_db_filename(DEFAULT_PORT)) if os.path.exists(snap_src) and not os.path.exists(snap_dst): shutil.move(snap_src, snap_dst) self.redis_conf.dbfilename = snap_dst aof_src = os.path.join(base_dir, AOF_FILENAME) aof_dst = os.path.join(base_dir, get_aof_db_filename(DEFAULT_PORT)) if os.path.exists(aof_src) and not os.path.exists(aof_dst): shutil.move(aof_src, aof_dst) self.redis_conf.appendfilename = aof_dst LOG.debug('Starting %s on port %s' % (BIN_PATH, self.port)) system2('%s %s -s %s -c "%s %s"'%(SU_EXEC, DEFAULT_USER, BASH, BIN_PATH, self.config_path), shell=True, close_fds=True, preexec_fn=os.setsid) wait_until(lambda: self.running, timeout=MAX_START_TIMEOUT) wait_until(lambda: self.cli.test_connection(), timeout=MAX_START_TIMEOUT) LOG.debug('%s process has been started.' % SERVICE_NAME) except PopenError, e: LOG.error('Unable to start redis process: %s' % e) raise initdv2.InitdError(e)
def _attach_volume(self, volume, device_name=None): ebs = self._ebs_volume(volume) LOG.debug('Attaching EBS volume %s (device: %s)', ebs.id, device_name) ebs.attach(self._instance_id(), device_name) LOG.debug('Checking that EBS volume %s is attached', ebs.id) msg = "EBS volume %s wasn't attached. Timeout reached (%s seconds)" % ( ebs.id, self._global_timeout) util.wait_until( lambda: ebs.update() and ebs.attachment_state() == 'attached', logger=LOG, timeout=self._global_timeout, error_text=msg ) LOG.debug('EBS volume %s attached', ebs.id) device = name2device(device_name) LOG.debug('EBS device name %s is mapped to %s in operation system', device_name, device) LOG.debug('Checking that device %s is available', device) msg = 'Device %s is not available in operation system. ' \ 'Timeout reached (%s seconds)' % ( device, self._global_timeout) util.wait_until( lambda: os.access(device, os.F_OK | os.R_OK), sleep=1, logger=LOG, timeout=self._global_timeout, error_text=msg ) LOG.debug('Device %s is available', device)
def _check_attachement(self): self._native_vol = self._conn.listVolumes(id=self.id)[0] if self._attached(): if self._native_vol.virtualmachineid == __cloudstack__['instance_id']: LOG.debug('Volume %s is attached to this instance', self.id) return self.device = None # Volume will have a new device name LOG.warning('Volume %s is not available. ' 'It is attached to different instance %s. ' 'Now scalarizr will detach it', self.id, self._native_vol.virtualmachineid) # We should wait for state chage if self._native_vol.vmstate == 'Stopping': def vm_state_changed(): self._native_vol = self._conn.listVolumes(id=self._native_vol.id)[0] return not hasattr(self._native_vol, 'virtualmachineid') or \ self._native_vol.vmstate != 'Stopping' util.wait_until(vm_state_changed) # If stil attached, detaching if hasattr(self._native_vol, 'virtualmachineid'): self._detach() LOG.debug('Volume %s detached', self.id) return self._attach(__cloudstack__['instance_id'])
def _wait_snapshot(self, snapshot_id): LOG.debug('Checking that Cinder snapshot %s is completed', snapshot_id) msg = "Cinder snapshot %s wasn't completed. " \ "Timeout reached (%s seconds)" % ( snapshot_id, self._global_timeout) snap = [None] def exit_condition(): snap[0] = self._cinder.volume_snapshots.get(snapshot_id) return snap[0].status != 'creating' util.wait_until( exit_condition, logger=LOG, timeout=self._global_timeout, error_text=msg ) if snap[0].status == 'error': msg = 'Cinder snapshot %s creation failed.' \ 'AWS status is "error"' % snapshot_id raise storage2.StorageError(msg) elif snap[0].status == 'available': LOG.debug('Snapshot %s completed', snapshot_id)
def _check_attachement(self): self._native_vol = self._conn.listVolumes(id=self.id)[0] if self._attached(): if self._native_vol.virtualmachineid == __cloudstack__['instance_id']: LOG.debug('Volume %s is attached to this instance', self.id) return LOG.warning('Volume %s is not available. ' 'It is attached to different instance %s. ' 'Now scalarizr will detach it', self.id, self._native_vol.virtualmachineid) # We should wait for state chage if self._native_vol.vmstate == 'Stopping': def vm_state_changed(): self._native_vol = self._conn.listVolumes(self._native_vol.id)[0] return not hasattr(self._native_vol, 'virtualmachineid') or \ self._native_vol.vmstate != 'Stopping' wait_until(vm_state_changed) # If stil attached, detaching if hasattr(self._native_vol, 'virtualmachineid'): self._detach() LOG.debug('Volume %s detached', self.id) LOG.debug('Attaching volume %s to this instance', self.id) with self._free_device_letter_mgr: letter = self._free_device_letter_mgr.get() devname = get_system_devname(letter) self._attach(__cloudstack__['instance_id'], devname_to_deviceid(devname))
def execute(self, query, silent=False): if not self.password: full_query = query else: full_query = 'AUTH %s\n%s' % (self.password, query) execute_query = lambda: system2([self.path, '-p', self.port], stdin=full_query,silent=True, warn_stderr=False) try: out = execute_query()[0] #fix for redis 2.4 AUTH if 'Client sent AUTH, but no password is set' in out: execute_query = lambda: system2([self.path], stdin=query, silent=True) out = execute_query()[0] if "Redis is loading the dataset in memory" in out: #[SCALARIZR-1604] #test until service becomes available: wait_until(lambda: "LOADING" not in system2([self.path], stdin='ping', silent=True)[0]) #run query again: out = execute_query()[0] elif out.startswith('ERR'): raise PopenError(out) elif out.startswith('OK\n'): out = out[3:] if out.endswith('\n'): out = out[:-1] return out except PopenError, e: if not silent: LOG.error('Unable to execute query %s with redis-cli: %s' % (query, e)) raise
def _attach(self, instance_id): self._check_connection() volume_id = self.id or self._native_vol.id with self.attach_lock: LOG.debug('Attaching CloudStack volume %s', volume_id) taken_before = base.taken_devices() self._conn.attachVolume(volume_id, instance_id) def device_plugged(): # Rescan SCSI bus scsi_host = '/sys/class/scsi_host' for name in os.listdir(scsi_host): with open(scsi_host + '/' + name + '/scan', 'w') as fp: fp.write('- - -') return base.taken_devices() > taken_before util.wait_until( device_plugged, start_text='Checking that volume %s is available in OS' % volume_id, timeout=30, sleep=1, error_text='Volume %s attached but not available in OS' % volume_id) devices = list(base.taken_devices() - taken_before) if len(devices) > 1: msg = "While polling for attached device, got multiple new devices: %s. " \ "Don't know which one to select".format(devices) raise Exception(msg) return devices[0] LOG.debug('Checking that volume %s is attached', volume_id)
def create_volume(ec2_conn, size, avail_zone, snap_id=None, volume_type=None, iops=None, logger=None, timeout=DEFAULT_TIMEOUT, tags=None): logger = logger or logging.getLogger(__name__) msg = 'Creating EBS volume%s%s in avail zone %s' % ( size and ' (size: %sG)' % size or '', snap_id and ' from snapshot %s' % snap_id or '', avail_zone ) logger.debug(msg) if snap_id: wait_snapshot(ec2_conn, snap_id, logger) vol = ec2_conn.create_volume(size, avail_zone, snapshot=snap_id, volume_type=volume_type, iops=iops) logger.debug('EBS volume %s created%s', vol.id, snap_id and ' from snapshot %s' % snap_id or '') logger.debug('Checking that EBS volume %s is available', vol.id) wait_until( lambda: vol.update() == "available", logger=logger, timeout=timeout, error_text="EBS volume %s wasn't available in a reasonable time" % vol.id ) logger.debug('EBS volume %s available', vol.id) if not tags: logger.debug('No tags to apply to volume %s' % vol.id) else: try: logger.debug('Applying tags to EBS volume %s : %s' % (vol.id, tags)) ec2_conn.create_tags((vol.id, ), tags) except: logger.warn('Cannot apply tags to EBS volume %s', vol.id) return vol
def start(self): try: if not self.running: #TODO: think about moving this code elsewhere if self.port == __redis__['defaults']['port']: base_dir = self.redis_conf.dir snap_src = os.path.join(base_dir, __redis__['db_filename']) snap_dst = os.path.join(base_dir, get_snap_db_filename(__redis__['defaults']['port'])) if os.path.exists(snap_src) and not os.path.exists(snap_dst): shutil.move(snap_src, snap_dst) if 'snapshotting' == __redis__["persistence_type"]: self.redis_conf.dbfilename = snap_dst aof_src = os.path.join(base_dir, __redis__['aof_filename']) aof_dst = os.path.join(base_dir, get_aof_db_filename(__redis__['defaults']['port'])) if os.path.exists(aof_src) and not os.path.exists(aof_dst): shutil.move(aof_src, aof_dst) if 'aof' == __redis__["persistence_type"]: self.redis_conf.appendfilename = aof_dst LOG.debug('Starting %s on port %s' % (__redis__['redis-server'], self.port)) system2('%s %s -s %s -c "%s %s"' % ( __redis__['su'], __redis__['defaults']['user'], __redis__['bash'], __redis__['redis-server'], self.config_path), shell=True, close_fds=True, preexec_fn=os.setsid) wait_until(lambda: self.running) #wait_until(lambda: self.cli.test_connection()) LOG.debug('%s process has been started.' % SERVICE_NAME) except PopenError, e: LOG.error('Unable to start redis process: %s' % e) raise initdv2.InitdError(e)
def destroy(self, vol, force=False, **kwargs): super(LoopVolumeProvider, self).destroy(vol, force, **kwargs) wait_until(self._rmloop, (vol.devname, ), sleep=1, timeout=60, error_text='Cannot detach loop device %s' % vol.devname) if force: os.remove(vol.file) vol.device = None
def _ensure_repos(self, updatedb=True): if 'release-latest' in self.repo_url or 'release-stable' in self.repo_url: LOG.warn("Special branches release/latest and release/stable currently doesn't work") self.repo_url = devel_repo_url_for_branch('master') repo = pkgmgr.repository('scalr-{0}'.format(self.repository), self.repo_url) # Delete previous repository for filename in glob.glob(os.path.dirname(repo.filename) + os.path.sep + 'scalr*'): if os.path.isfile(filename): os.remove(filename) if 'buildbot.scalr-labs.com' in self.repo_url and not linux.os.windows: self._configure_devel_repo(repo) elif linux.os.debian_family: self._apt_pin_release('scalr') # make downgrades possible elif linux.os.redhat_family or linux.os.oracle_family: self._yum_prioritize(repo) # Ensure new repository repo.ensure() if updatedb: LOG.info('Updating packages cache') def do_updatedb(): try: self.pkgmgr.updatedb() return True except: LOG.warn('Package manager error', exc_info=sys.exc_info()) wait_until(do_updatedb, sleep=10, timeout=120)
def rebundle(self): image_name = self._role_name + "-" + time.strftime("%Y%m%d%H%M%S") nova = __node__['openstack']['new_nova_connection'] nova.connect() server_id = __node__['openstack']['server_id'] system2("sync", shell=True) LOG.info('Creating server image (server_id: %s)', server_id) image_id = nova.servers.create_image(server_id, image_name) LOG.info('Server image %s created', image_id) result = [None] def image_completed(): try: result[0] = nova.images.get(image_id) return result[0].status in ('ACTIVE', 'FAILED') except: e = sys.exc_info()[1] if 'Unhandled exception occurred during processing' in str(e): return raise wait_until(image_completed, start_text='Polling image status', sleep=30) image_id = result[0].id if result[0].status == 'FAILED': raise handlers.HandlerError('Image %s becomes FAILED', image_id) LOG.info('Image %s completed and available for use!', image_id) return image_id
def _run(self): self.volume = storage2.volume(self.volume) LOG.debug("Volume obj: %s", self.volume) LOG.debug("Volume config: %s", dict(self.volume)) state = {} self.fire("freeze", self.volume, state) try: snap = self.volume.snapshot(self.description, tags=self.tags) finally: self.fire("unfreeze", self.volume, state) try: util.wait_until( lambda: snap.status() in (snap.COMPLETED, snap.FAILED), start_text="Polling snapshot status (%s)" % snap.id, logger=LOG, ) except: if "Request limit exceeded" in str(sys.exc_info()[1]): pass else: raise if snap.status() == snap.FAILED: msg = "Backup failed because snapshot %s failed" % snap.id raise Error(msg) return restore(type=self.type, snapshot=snap, **state)
def start(self): initdv2.ParametrizedInitScript.start(self) wait_until(lambda: self._processes, timeout=10, sleep=1) redis_conf = RedisConf.find() password = redis_conf.requirepass cli = RedisCLI(password) wait_until(lambda: cli.test_connection(), timeout=10, sleep=1)
def on_host_init_response(self, hir): LOG.info('Configuring block device mountpoints') with bus.initialization_op as op: with op.phase(self._phase_plug_volume): wait_until(self._plug_all_volumes, sleep=10, timeout=600, error_text='Cannot attach and mount disks in a reasonable time') volumes = hir.body.get('volumes') or [] if volumes: LOG.debug('HIR volumes: %s', volumes) for i in range(0, len(volumes)): vol = volumes[i] template = vol.pop('template', None) from_template_if_missing = vol.pop('from_template_if_missing', None) vol = storage2.volume(**vol) LOG.info('Ensuring %s volume %s', vol.type, dict(vol)) try: vol.ensure(mount=bool(vol.mpoint), mkfs=True) except storage2.VolumeNotExistsError, e: if template and from_template_if_missing == '1': vol = storage2.volume(**template) LOG.warn('Volume %s not exists, re-creating %s volume from config: %s', str(e), vol.type, dict(vol)) vol.ensure(mount=bool(vol.mpoint), mkfs=True) else: raise self._volumes.append(dict(vol))
def _create_volume(self, zone=None, size=None, snapshot=None, volume_type=None, iops=None, tags=None): LOG.debug('Creating EBS volume (zone: %s size: %s snapshot: %s ' 'volume_type: %s iops: %s)', zone, size, snapshot, volume_type, iops) if snapshot: self._wait_snapshot(snapshot) ebs = self._conn.create_volume(size, zone, snapshot, volume_type, iops) LOG.debug('EBS volume %s created', ebs.id) LOG.debug('Checking that EBS volume %s is available', ebs.id) msg = "EBS volume %s is not in 'available' state. " \ "Timeout reached (%s seconds)" % ( ebs.id, self._global_timeout) util.wait_until( lambda: ebs.update() == "available", logger=LOG, timeout=self._global_timeout, error_text=msg ) LOG.debug('EBS volume %s available', ebs.id) if tags: try: LOG.debug('Applying tags to EBS volume %s (tags: %s)', ebs.id, tags) self._conn.create_tags([ebs.id], tags) except: LOG.warn('Cannot apply tags to EBS volume %s. Error: %s', ebs.id, sys.exc_info()[1]) return ebs
def reload(self): try: if os.path.exists(self.pid_file): pid = self.pid() if pid: args = [ self.haproxy_exec, '-f', self.config_path, '-p', self.pid_file, '-D', '-sf', pid ] util.system2(args, close_fds=True, logger=LOG, preexec_fn=os.setsid) util.wait_until( lambda: self.pid() and self.pid() != pid, timeout=self.timeout, sleep=0.5, error_text="Error reloading HAProxy service process.") if self.status() != 0: raise initdv2.InitdError( "HAProxy service not running.") else: raise LookupError('File %s not exist' % self.pid_file) except: raise initdv2.InitdError, "HAProxy service not running can't reload it."\ " Details: %s" % sys.exc_info()[1], sys.exc_info()[2]
def _create_volume(self, zone=None, size=None, snapshot=None, volume_type=None, iops=None, tags=None, encrypted=False): LOG.debug('Creating EBS volume (zone: %s size: %s snapshot: %s ' 'volume_type: %s iops: %s encrypted: %s)', zone, size, snapshot, volume_type, iops, encrypted) if snapshot: self._wait_snapshot(snapshot) ebs = self._conn.create_volume(size, zone, snapshot, volume_type, iops, encrypted) LOG.debug('EBS volume %s created', ebs.id) LOG.debug('Checking that EBS volume %s is available', ebs.id) msg = "EBS volume %s is not in 'available' state. " \ "Timeout reached (%s seconds)" % ( ebs.id, self._global_timeout) util.wait_until( lambda: ebs.update() == "available", logger=LOG, timeout=self._global_timeout, error_text=msg ) LOG.debug('EBS volume %s available', ebs.id) if tags: self._create_tags_async(ebs.id, tags) return ebs
def _create_volume(self, zone=None, size=None, snapshot=None, volume_type=None, iops=None, tags=None, encrypted=False): LOG.debug( 'Creating EBS volume (zone: %s size: %s snapshot: %s ' 'volume_type: %s iops: %s encrypted: %s)', zone, size, snapshot, volume_type, iops, encrypted) if snapshot: self._wait_snapshot(snapshot) ebs = self._conn.create_volume(size, zone, snapshot, volume_type, iops, encrypted) LOG.debug('EBS volume %s created', ebs.id) LOG.debug('Checking that EBS volume %s is available', ebs.id) msg = "EBS volume %s is not in 'available' state. " \ "Timeout reached (%s seconds)" % ( ebs.id, self._global_timeout) util.wait_until(lambda: ebs.update() == "available", logger=LOG, timeout=self._global_timeout, error_text=msg) LOG.debug('EBS volume %s available', ebs.id) if tags: self._create_tags_async(ebs.id, tags) return ebs
def _attach_volume(self, volume): ebs = self._ebs_volume(volume) with self.attach_lock: device_name = get_free_name() taken_before = base.taken_devices() volume_id = ebs.id LOG.debug('Attaching EBS volume %s (name: %s)', volume_id, device_name) ebs.attach(self._instance_id(), device_name) LOG.debug('Checking that EBS volume %s is attached', volume_id) msg = "EBS volume %s wasn't attached. Timeout reached (%s seconds)" % ( ebs.id, self._global_timeout) util.wait_until( lambda: ebs.update() and ebs.attachment_state() == 'attached', logger=LOG, timeout=self._global_timeout, error_text=msg ) LOG.debug('EBS volume %s attached', volume_id) if not linux.os.windows: util.wait_until(lambda: base.taken_devices() > taken_before, start_text='Checking that volume %s is available in OS' % volume_id, timeout=30, sleep=1, error_text='Volume %s attached but not available in OS' % volume_id) devices = list(base.taken_devices() - taken_before) if len(devices) > 1: msg = "While polling for attached device, got multiple new devices: {0}. " \ "Don't know which one to select".format(devices) raise Exception(msg) return devices[0], device_name else: return device_name, device_name
def bgrewriteaof(self, wait_until_complete=True): if not self.bgrewriteaof_in_progress: self.execute('bgrewriteaof') if wait_until_complete: wait_until(lambda: not self.bgrewriteaof_in_progress, sleep=5, timeout=900)
def _wait_status_transition(self, volume_id=None): """ Wait until volume enters stable state (not 'detaching' or 'attaching') :param volume_id: :return: volume status """ if not volume_id: volume_id = self.id status = self._cinder.volumes.get(volume_id).status vol = [None] def exit_condition(): vol[0] = self._cinder.volumes.get(volume_id) return vol[0].status not in ('attaching', 'detaching', 'creating') if not exit_condition(): msg = 'Cinder volume %s hangs in transitional state. ' \ 'Timeout reached (%s seconds)' % (volume_id, self._global_timeout) util.wait_until( exit_condition, logger=LOG, timeout=self._global_timeout, error_text=msg) if vol[0].status == 'error': msg = 'Cinder volume %s enters error state after %s.' % \ (volume_id, status) raise storage2.StorageError(msg) return vol[0].status
def start(cls): if not cls.is_running(): cls._logger.info("Starting %s process" % MONGOS) args = [ "sudo", "-u", DEFAULT_USER, MONGOS, "--fork", "--logpath", ROUTER_LOG_PATH, "--configdb", "mongo-0-0:%s" % CONFIG_SERVER_DEFAULT_PORT, ] if cls.keyfile and os.path.exists(cls.keyfile): chown_r(cls.keyfile, DEFAULT_USER) args.append("--keyFile=%s" % cls.keyfile) if cls.verbose and isinstance(cls.verbose, int) and 0 < cls.verbose < 6: args.append("-" + "v" * cls.verbose) if os.path.exists(ROUTER_LOG_PATH): chown_r(ROUTER_LOG_PATH, DEFAULT_USER) system2(args, close_fds=True, preexec_fn=mongo_preexec_fn) wait_until(lambda: cls.is_running, timeout=MAX_START_TIMEOUT) wait_until(lambda: cls.get_cli().has_connection, timeout=MAX_START_TIMEOUT) cls._logger.debug("%s process has been started." % MONGOS)
def _check_attachement(self): self._native_vol = self._conn.listVolumes(id=self.id)[0] if self._attached(): if self._native_vol.virtualmachineid == __cloudstack__[ 'instance_id']: LOG.debug('Volume %s is attached to this instance', self.id) return LOG.warning( 'Volume %s is not available. ' 'It is attached to different instance %s. ' 'Now scalarizr will detach it', self.id, self._native_vol.virtualmachineid) # We should wait for state chage if self._native_vol.vmstate == 'Stopping': def vm_state_changed(): self._native_vol = self._conn.listVolumes( self._native_vol.id)[0] return not hasattr(self._native_vol, 'virtualmachineid') or \ self._native_vol.vmstate != 'Stopping' wait_until(vm_state_changed) # If stil attached, detaching if hasattr(self._native_vol, 'virtualmachineid'): self._detach() LOG.debug('Volume %s detached', self.id) LOG.debug('Attaching volume %s to this instance', self.id) with self._free_device_letter_mgr: letter = self._free_device_letter_mgr.get() devname = get_system_devname(letter) self._attach(__cloudstack__['instance_id'], devname_to_deviceid(devname))
def _attach(self, instance_id): self._check_connection() volume_id = self.id or self._native_vol.id with self.attach_lock: LOG.debug('Attaching CloudStack volume %s', volume_id) taken_before = base.taken_devices() self._conn.attachVolume(volume_id, instance_id) def device_plugged(): # Rescan SCSI bus scsi_host = '/sys/class/scsi_host' for name in os.listdir(scsi_host): with open(scsi_host + '/' + name + '/scan', 'w') as fp: fp.write('- - -') return base.taken_devices() > taken_before util.wait_until(device_plugged, start_text='Checking that volume %s is available in OS' % volume_id, timeout=30, sleep=1, error_text='Volume %s attached but not available in OS' % volume_id) devices = list(base.taken_devices() - taken_before) if len(devices) > 1: msg = "While polling for attached device, got multiple new devices: %s. " \ "Don't know which one to select".format(devices) raise Exception(msg) return devices[0] LOG.debug('Checking that volume %s is attached', volume_id)
def on_host_init_response(self, hir): bus.init_op.logger.info('Configuring storage volumes') # volumes from QueryEnv.list_ebs_mountpoints() wait_until(self._plug_old_style_volumes, sleep=10) # volumes assigned to this role on Farm Designer volumes = hir.body.get('volumes', []) or [] self._plug_new_style_volumes(volumes)
def shutdown(self, force=False): self._logger.debug('entring shutdown _server: %s, running: %s', self._server, self.running) self.running = False if not self._server: return self._logger.debug('Shutdown message consumer %s ...', self.endpoint) self._logger.debug("Shutdown HTTP server") self._server.shutdown() self._server.socket.shutdown(socket.SHUT_RDWR) self._server.socket.close() #self._server.server_close() self._server = None self._logger.debug("HTTP server terminated") self._logger.debug("Shutdown message handler") self.handler_locked = True if not force: t = 120 self._logger.debug('Waiting for message handler to complete it`s task. Timeout: %d seconds', t) wait_until(lambda: self.handler_status in ('idle', 'stopped'), timeout=t, error_text='Message consumer is busy', logger=self._logger) if self.handing_message_id: store = P2pMessageStore() store.mark_as_handled(self.handing_message_id) if self._handler_thread: self._handler_thread.join() self._logger.debug("Message handler terminated") self._logger.debug('Message consumer %s terminated', self.endpoint)
def _locate_nginx(self): util.wait_until( self._do_locate_nginx, timeout=600, logger=LOG, start_text="Locating nginx frontend server", error_text="Cannot locate nginx frontend server", )
def initiate_rs(self): ''' @return (host:port) ''' self.cli.initiate_rs() wait_until(lambda: self.is_replication_master, sleep=5, logger=self._logger, timeout=120, start_text='Wait until node becomes replication primary') self._logger.debug('Server became replication master')
def _locate_cloud_controller(self): util.wait_until( self._do_locate_cloud_controller, timeout=600, logger=LOG, start_text="Locating cloud_controller server", error_text="Cannot locate cloud_controller server", )
def stop(self, reason=None): if self.running: if self.pid: LOG.info('Stopping redis server on port %s (pid %s). Reason: %s' % (self.port, self.pid, reason)) os.kill(int(self.pid), signal.SIGTERM) wait_until(lambda: not self.running) else: #XXX: rare case when process is alive but scalarizr is unable to get PID raise ServiceError("Cannot stop redis process: PID file not found.")
def _wait_attachment_state_change(self, volume): ebs = self._ebs_volume(volume) msg = 'EBS volume %s hangs in attaching state. ' \ 'Timeout reached (%s seconds)' % (ebs.id, self._global_timeout) util.wait_until(lambda: ebs.update() and ebs.attachment_state() not in ('attaching', 'detaching'), logger=LOG, timeout=self._global_timeout, error_text=msg)
def start(self): initdv2.ParametrizedInitScript.start(self) timeout = 60 wait_until( lambda: self.status() == initdv2.Status.RUNNING, sleep=1, timeout=timeout, error_text= "%s state still isn't 'Running' In %s seconds after start " % (SERVICE_NAME, timeout))
def wait_for_sync(self, link_timeout=None, sync_timeout=None): LOG.info('Waiting for link with master') wait_until(lambda: self.redis_cli.master_link_status == 'up', sleep=3, timeout=link_timeout) LOG.info('Waiting for sync with master to complete') wait_until(lambda: not self.redis_cli.master_sync_in_progress, sleep=10, timeout=sync_timeout) LOG.info('Sync with master completed')
def stop(self, reason=None): if self.running: if self.pid: LOG.debug("Waiting until redis service is ready to shut down") wait_until(lambda: not self._is_aof_rewrite_running) # http://redis.io/commands/shutdown LOG.info('Stopping redis server on port %s (pid %s). Reason: %s' % (self.port, self.pid, reason)) os.kill(int(self.pid), signal.SIGTERM) #self.cli.execute("SHUTDOWN SAVE") wait_until(lambda: not self.running) LOG.debug("Redis process terminated.") else: #XXX: rare case when process is alive but scalarizr is unable to get PID raise ServiceError("Cannot stop redis process: PID file not found.")
def _attach_volume(self, server_id=None): ''' :rtype: tuple(cloud_device_name, os_device_name) ''' if server_id is None: server_id = self._server_id() volume_id = self.id self._check_nova_connection() ops_delay = 10 with self.attach_lock: for _ in xrange(5): LOG.debug('Attaching Cinder volume %s', volume_id) taken_before = base.taken_devices() try: attachment = self._nova.volumes.create_server_volume(server_id, volume_id, None) except TypeError, e: if "'NoneType' object has no attribute '__getitem__'" not in str(e): # Very often (2/5 times) we got this error on RaxNG, because of incorrect API response raise #waiting for attaching transitional state LOG.debug('Checking that Cinder volume %s is attached', volume_id) new_status = self._wait_status_transition(volume_id) if new_status == 'in-use': LOG.debug('Cinder volume %s attached', volume_id) break elif new_status == 'available': LOG.warn('Volume %s status changed to "available" instead of "in-use"', volume_id) LOG.debug('Will try attach volume again after %d seconds', ops_delay) continue else: msg = 'Unexpected status transition "available" -> "{0}".' \ ' Cinder volume {1}'.format(new_status, volume_id) raise storage2.StorageError(msg) if not linux.os.windows_family: util.wait_until(lambda: base.taken_devices() > taken_before, start_text='Checking that volume %s is available in OS' % volume_id, timeout=30, sleep=1, error_text='Volume %s attached but not available in OS' % volume_id) devices = list(base.taken_devices() - taken_before) if len(devices) > 1: msg = "While polling for attached device, got multiple new devices: %s. " \ "Don't know which one to select".format(devices) raise Exception(msg) return devices[0] else: return attachment.device