Exemplo n.º 1
0
def check_lock(ctx, config, check_up=True):
    """
    Check lock status of remote machines.
    """
    if not teuth_config.lock_server or ctx.config.get('check-locks') is False:
        log.info('Lock checking disabled.')
        return
    log.info('Checking locks...')
    for machine in ctx.config['targets'].iterkeys():
        status = lockstatus.get_status(machine)
        log.debug('machine status is %s', repr(status))
        assert status is not None, \
            'could not read lock status for {name}'.format(name=machine)
        if check_up:
            assert status['up'], 'machine {name} is marked down'.format(
                name=machine
            )
        assert status['locked'], \
            'machine {name} is not locked'.format(name=machine)
        assert status['locked_by'] == ctx.owner, \
            'machine {name} is locked by {user}, not {owner}'.format(
                name=machine,
                user=status['locked_by'],
                owner=ctx.owner,
            )
Exemplo n.º 2
0
def check_lock(ctx, config, check_up=True):
    """
    Check lock status of remote machines.
    """
    if not teuth_config.lock_server or ctx.config.get('check-locks') is False:
        log.info('Lock checking disabled.')
        return
    log.info('Checking locks...')
    for machine in ctx.config['targets'].iterkeys():
        status = lockstatus.get_status(machine)
        log.debug('machine status is %s', repr(status))
        assert status is not None, \
            'could not read lock status for {name}'.format(name=machine)
        if check_up:
            assert status['up'], 'machine {name} is marked down'.format(
                name=machine
            )
        assert status['locked'], \
            'machine {name} is not locked'.format(name=machine)
        assert status['locked_by'] == ctx.owner, \
            'machine {name} is locked by {user}, not {owner}'.format(
            name=machine,
            user=status['locked_by'],
            owner=ctx.owner,
            )
Exemplo n.º 3
0
 def machine_type(self):
     if not getattr(self, '_machine_type', None):
         remote_info = ls.get_status(self.hostname)
         if not remote_info:
             return None
         self._machine_type = remote_info.get("machine_type", None)
     return self._machine_type
Exemplo n.º 4
0
 def machine_type(self):
     if not getattr(self, '_machine_type', None):
         remote_info = ls.get_status(self.hostname)
         if not remote_info:
             return None
         self._machine_type = remote_info.get("machine_type", None)
     return self._machine_type
Exemplo n.º 5
0
def get_testdir(ctx):
    if 'test_path' in ctx.teuthology_config:
        return ctx.teuthology_config['test_path']

    basedir = ctx.teuthology_config.get('base_test_dir', '/home/ubuntu/cephtest')

    global global_jobid
    global checked_jobid

    # check if a jobid exists in the machine status for all our targets
    # and if its the same jobid, use that as the subdir for the test
    if not checked_jobid:
        jobids = {}
        for machine in ctx.config['targets'].iterkeys():
            status = lockstatus.get_status(ctx, machine)
            if status is None or 'description' not in status or status['description'] is None:
                continue
            jid = status['description'].split('/')[-1]
            if jid is None or jid == 'None':
                continue
            jobids[jid] = 1
            if len(jobids) > 1:
                break
        if len(jobids) == 1:
            # same job id on all machines, use that as the test subdir
            (jobid,) = jobids.iterkeys()
            if jobid is not None:
                global_jobid = jobid
                log.debug('setting my jobid to {jid}'.format(jid=global_jobid))
        checked_jobid = True

    # the subdir is chosen using the priority:
    # 1. jobid chosen by the teuthology beanstalk queue
    # 2. run name specified by teuthology schedule
    # 3. user@timestamp
    if global_jobid is not None:
        log.debug('with jobid basedir: {b}'.format(b=global_jobid))
        return '{basedir}/{jobid}'.format(
                    basedir=basedir,
                    jobid=global_jobid,
                    )
    elif hasattr(ctx, 'name') and ctx.name:
        log.debug('with name basedir: {b}'.format(b=basedir))
        # we need a short string to keep the path short
        import re
        m = re.match(r"(.*)-(.*)-(.*)-(.*)_(.*)-(.*)-(.*)-(.*)-(.*)", ctx.name)
        (u, y, m, d, hms, s, c, k, f) = m.groups()
        short = u[0:2] + y[2:4] + m[0:2] + d[0:2] + hms[0:2] + hms[3:5] + s[0] + c[0] + k[0] + f[0]
        return '{basedir}/{rundir}'.format(
                    basedir=basedir,
                    rundir=short,
                    )
    else:
        log.debug('basedir: {b}'.format(b=basedir))
        return '{basedir}/{user}{stamp}'.format(
                    basedir=basedir,
                    user=get_user()[0:2],
                    stamp=stamp)
Exemplo n.º 6
0
 def __init__(self, name, ipmiuser, ipmipass, ipmidomain, logfile=None, timeout=20):
     self.shortname = getShortName(name)
     status_info = ls.get_status('', self.shortname)
     try:
         phys_host = status_info['vpshost']
     except TypeError:
         return
     self.connection = libvirt.open(phys_host)
     for i in self.connection.listDomainsID():
         d = self.connection.lookupByID(i)
         if d.name() == self.shortname:
             self.vm_domain = d
             break
     return
Exemplo n.º 7
0
def check_lock(ctx, config):
    if ctx.config.get("check-locks") == False:
        log.info("Lock checking disabled.")
        return
    log.info("Checking locks...")
    for machine in ctx.config["targets"].iterkeys():
        status = lockstatus.get_status(ctx, machine)
        log.debug("machine status is %s", repr(status))
        assert status is not None, "could not read lock status for {name}".format(name=machine)
        assert status["up"], "machine {name} is marked down".format(name=machine)
        assert status["locked"], "machine {name} is not locked".format(name=machine)
        assert status["locked_by"] == ctx.owner, "machine {name} is locked by {user}, not {owner}".format(
            name=machine, user=status["locked_by"], owner=ctx.owner
        )
Exemplo n.º 8
0
def create_if_vm(ctx, machine_name):
    status_info = ls.get_status(ctx, machine_name)
    phys_host = status_info['vpshost']
    if not phys_host:
        return False
    try:
        vm_type = ctx.vm_type
    except AttributeError:
        vm_type = 'ubuntu'
    createMe = decanonicalize_hostname(machine_name)
    with tempfile.NamedTemporaryFile() as tmp:
        try:
            lcnfg = ctx.config['downburst']
        except KeyError:
            lcnfg = {}

        file_info = {}
        file_info['disk-size'] = lcnfg.get('disk-size', '30G')
        file_info['ram'] = lcnfg.get('ram', '1.9G')
        file_info['cpus'] = lcnfg.get('cpus', 1)
        file_info['networks'] = lcnfg.get('networks',
                [{'source' : 'front', 'mac' : status_info['mac']}])
        file_info['distro'] = lcnfg.get('distro', vm_type.lower())
        file_info['additional-disks'] = lcnfg.get(
                'additional-disks', 3)
        file_info['additional-disks-size'] = lcnfg.get(
                'additional-disks-size', '200G')
        file_info['arch'] = lcnfg.get('arch', 'x86_64')
        file_out = {'downburst': file_info}
        yaml.safe_dump(file_out, tmp)
        metadata = "--meta-data=%s" % tmp.name
        dbrst = _get_downburst_exec()
        if not dbrst:
            log.info("Error: no downburst executable found")
            return False
        p = subprocess.Popen([dbrst, '-c', phys_host,
                'create', metadata, createMe],
                stdout=subprocess.PIPE,stderr=subprocess.PIPE,)
        owt,err = p.communicate()
        if err:
            log.info("Downburst completed on %s: %s" %
                    (machine_name,err))
        else:
            log.info("%s created: %s" % (machine_name,owt))
    return True
Exemplo n.º 9
0
    def __init__(self, name, ipmiuser, ipmipass, ipmidomain, logfile=None,
                 timeout=20):
        if libvirt is None:
            raise RuntimeError("libvirt not found")

        self.shortname = getShortName(name)
        status_info = ls.get_status('', self.shortname)
        try:
            phys_host = status_info['vpshost']
        except TypeError:
            return
        self.connection = libvirt.open(phys_host)
        for i in self.connection.listDomainsID():
            d = self.connection.lookupByID(i)
            if d.name() == self.shortname:
                self.vm_domain = d
                break
        return
Exemplo n.º 10
0
    def __init__(self, name):
        if libvirt is None:
            raise RuntimeError("libvirt not found")

        self.shortname = remote.getShortName(name)
        status_info = ls.get_status(self.shortname)
        try:
            if status_info.get('is_vm', False):
                phys_host = status_info['vm_host']['name'].split('.')[0]
        except TypeError:
            return
        self.connection = libvirt.open(phys_host)
        for i in self.connection.listDomainsID():
            d = self.connection.lookupByID(i)
            if d.name() == self.shortname:
                self.vm_domain = d
                break
        return
Exemplo n.º 11
0
 def filter_hosts(self):
     """
     Exclude any non-RPM-based hosts, and any downburst VMs
     """
     super(SELinux, self).filter_hosts()
     new_cluster = Cluster()
     for (remote, roles) in self.cluster.remotes.iteritems():
         status_info = get_status(remote.name)
         if status_info and status_info.get('is_vm', False):
             msg = "Excluding {host}: VMs are not yet supported"
             log.info(msg.format(host=remote.shortname))
         elif remote.os.package_type == 'rpm':
             new_cluster.add(remote, roles)
         else:
             msg = "Excluding {host}: OS '{os}' does not support SELinux"
             log.debug(msg.format(host=remote.shortname, os=remote.os.name))
     self.cluster = new_cluster
     return self.cluster
Exemplo n.º 12
0
def check_lock(ctx, config, check_up=True):
    """
    Check lock status of remote machines.
    """
    if not teuth_config.lock_server or ctx.config.get("check-locks") is False:
        log.info("Lock checking disabled.")
        return
    log.info("Checking locks...")
    for machine in ctx.config["targets"].iterkeys():
        status = lockstatus.get_status(machine)
        log.debug("machine status is %s", repr(status))
        assert status is not None, "could not read lock status for {name}".format(name=machine)
        if check_up:
            assert status["up"], "machine {name} is marked down".format(name=machine)
        assert status["locked"], "machine {name} is not locked".format(name=machine)
        assert status["locked_by"] == ctx.owner, "machine {name} is locked by {user}, not {owner}".format(
            name=machine, user=status["locked_by"], owner=ctx.owner
        )
Exemplo n.º 13
0
 def filter_hosts(self):
     """
     Exclude any non-RPM-based hosts, and any downburst VMs
     """
     super(SELinux, self).filter_hosts()
     new_cluster = Cluster()
     for (remote, roles) in self.cluster.remotes.iteritems():
         status_info = get_status(remote.name)
         if status_info and status_info.get('is_vm', False):
             msg = "Excluding {host}: VMs are not yet supported"
             log.info(msg.format(host=remote.shortname))
         elif remote.os.package_type == 'rpm':
             new_cluster.add(remote, roles)
         else:
             msg = "Excluding {host}: OS '{os}' does not support SELinux"
             log.debug(msg.format(host=remote.shortname, os=remote.os.name))
     self.cluster = new_cluster
     return self.cluster
Exemplo n.º 14
0
    def __init__(self, name):
        if libvirt is None:
            raise RuntimeError("libvirt not found")

        self.shortname = remote.getShortName(name)
        status_info = ls.get_status(self.shortname)
        try:
            if status_info.get('is_vm', False):
                phys_host = status_info['vm_host']['name'].split('.')[0]
        except TypeError:
            return
        self.connection = libvirt.open(phys_host)
        for i in self.connection.listDomainsID():
            d = self.connection.lookupByID(i)
            if d.name() == self.shortname:
                self.vm_domain = d
                break
        return
Exemplo n.º 15
0
def check_lock(ctx, config):
    if ctx.config.get('check-locks') == False:
        log.info('Lock checking disabled.')
        return
    log.info('Checking locks...')
    for machine in ctx.config['targets'].iterkeys():
        status = lockstatus.get_status(ctx, machine)
        log.debug('machine status is %s', repr(status))
        assert status is not None, \
            'could not read lock status for {name}'.format(name=machine)
        assert status['up'], 'machine {name} is marked down'.format(name=machine)
        assert status['locked'], \
            'machine {name} is not locked'.format(name=machine)
        assert status['locked_by'] == ctx.owner, \
            'machine {name} is locked by {user}, not {owner}'.format(
            name=machine,
            user=status['locked_by'],
            owner=ctx.owner,
            )
Exemplo n.º 16
0
def check_lock(ctx, config):
    if ctx.config.get('check-locks') == False:
        log.info('Lock checking disabled.')
        return
    log.info('Checking locks...')
    for machine in ctx.config['targets'].iterkeys():
        status = lockstatus.get_status(ctx, machine)
        log.debug('machine status is %s', repr(status))
        assert status is not None, \
            'could not read lock status for {name}'.format(name=machine)
        assert status['up'], 'machine {name} is marked down'.format(
            name=machine)
        assert status['locked'], \
            'machine {name} is not locked'.format(name=machine)
        assert status['locked_by'] == ctx.owner, \
            'machine {name} is locked by {user}, not {owner}'.format(
            name=machine,
            user=status['locked_by'],
            owner=ctx.owner,
            )
Exemplo n.º 17
0
def update_lock(ctx, name, description=None, status=None, sshpubkey=None):
    status_info = ls.get_status(ctx, name)
    phys_host = status_info['vpshost']
    if phys_host:
        keyscan_out = ''
        while not keyscan_out:
            time.sleep(10)
            keyscan_out, _ = keyscan_check(ctx, [name])
    updated = {}
    if description is not None:
        updated['desc'] = description
    if status is not None:
        updated['status'] = status
    if sshpubkey is not None:
        updated['sshpubkey'] = sshpubkey

    if updated:
        success, _, _ = ls.send_request('PUT', ls._lock_url(ctx) + '/' + name,
                                  body=urllib.urlencode(updated),
                                  headers={'Content-type': 'application/x-www-form-urlencoded'})
        return success
    return True
Exemplo n.º 18
0
def destroy_if_vm(ctx, machine_name):
    """
    Return False only on vm downburst failures.
    """
    status_info = ls.get_status(ctx, machine_name)
    phys_host = status_info['vpshost']
    if not phys_host:
        return True
    destroyMe = decanonicalize_hostname(machine_name)
    dbrst = _get_downburst_exec()
    if not dbrst:
        log.info("Error: no downburst executable found")
        return False
    p = subprocess.Popen([dbrst, '-c', phys_host,
            'destroy', destroyMe],
            stdout=subprocess.PIPE,stderr=subprocess.PIPE,)
    owt,err = p.communicate()
    if err:
        log.info("Error occurred while deleting %s" % destroyMe)
        return False
    else:
        log.info("%s destroyed: %s" % (machine_name,owt))
    return True
Exemplo n.º 19
0
def lock_machines(ctx, config):
    """
    Lock machines.  Called when the teuthology run finds and locks
    new machines.  This is not called if the one has teuthology-locked
    machines and placed those keys in the Targets section of a yaml file.
    """
    # It's OK for os_type and os_version to be None here.  If we're trying
    # to lock a bare metal machine, we'll take whatever is available.  If
    # we want a vps, defaults will be provided by misc.get_distro and
    # misc.get_distro_version in provision.create_if_vm
    os_type = ctx.config.get("os_type")
    os_version = ctx.config.get("os_version")
    arch = ctx.config.get('arch')
    log.info('Locking machines...')
    assert isinstance(config[0], int), 'config[0] must be an integer'
    machine_type = config[1]
    how_many = config[0]
    # We want to make sure there are always this many machines available
    to_reserve = 5

    # change the status during the locking process
    report.try_push_job_info(ctx.config, dict(status='waiting'))

    while True:
        # get a candidate list of machines
        machines = lock.list_locks(machine_type=machine_type,
                                   up=True,
                                   locked=False,
                                   count=how_many + to_reserve)
        if machines is None:
            if ctx.block:
                log.error('Error listing machines, trying again')
                time.sleep(20)
                continue
            else:
                raise RuntimeError('Error listing machines')

        # make sure there are machines for non-automated jobs to run
        if len(machines) < to_reserve + how_many and ctx.owner.startswith(
                'scheduled'):
            if ctx.block:
                log.info(
                    'waiting for more machines to be free (need %s + %s, have %s)...',
                    to_reserve,
                    how_many,
                    len(machines),
                )
                time.sleep(10)
                continue
            else:
                assert 0, ('not enough machines free; need %s + %s, have %s' %
                           (to_reserve, how_many, len(machines)))

        newly_locked = lock.lock_many(ctx, how_many, machine_type, ctx.owner,
                                      ctx.archive, os_type, os_version, arch)
        if not newly_locked and not isinstance(newly_locked, list):
            raise RuntimeError('Invalid parameters specified')
        if len(newly_locked) == how_many:
            vmlist = []
            for lmach in newly_locked:
                if misc.is_vm(lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info('Waiting for virtual machines to come up')
                keys_dict = dict()
                loopcount = 0
                while len(keys_dict) != len(vmlist):
                    loopcount += 1
                    time.sleep(10)
                    keys_dict = lock.ssh_keyscan(vmlist)
                    log.info('virtual machine is still unavailable')
                    if loopcount == 40:
                        loopcount = 0
                        log.info('virtual machine(s) still not up, ' +
                                 'recreating unresponsive ones.')
                        for guest in vmlist:
                            if guest not in keys_dict.keys():
                                log.info('recreating: ' + guest)
                                full_name = misc.canonicalize_hostname(guest)
                                provision.destroy_if_vm(ctx, full_name)
                                provision.create_if_vm(ctx, full_name)
                if lock.do_update_keys(keys_dict):
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in newly_locked.iterkeys():
                    stats = lockstatus.get_status(dkey)
                    newscandict[dkey] = stats['ssh_pub_key']
                ctx.config['targets'] = newscandict
            else:
                ctx.config['targets'] = newly_locked
            locked_targets = yaml.safe_dump(
                ctx.config['targets'], default_flow_style=False).splitlines()
            log.info('\n  '.join([
                'Locked targets:',
            ] + locked_targets))
            # successfully locked machines, change status back to running
            report.try_push_job_info(ctx.config, dict(status='running'))
            break
        elif not ctx.block:
            assert 0, 'not enough machines are available'

        log.warn('Could not lock enough machines, waiting...')
        time.sleep(10)
    try:
        yield
    finally:
        if ctx.config.get('unlock_on_failure', False) or \
                get_status(ctx.summary) == 'pass':
            log.info('Unlocking machines...')
            for machine in ctx.config['targets'].iterkeys():
                lock.unlock_one(ctx, machine, ctx.owner, ctx.archive)
Exemplo n.º 20
0
def lock_machines(ctx, config):
    log.info('Locking machines...')
    assert isinstance(config[0], int), 'config must be an integer'
    machine_type = config[1]
    config = config[0]

    while True:
        # make sure there are enough machines up
        machines = lock.list_locks(ctx)
        if machines is None:
            if ctx.block:
                log.warn('error listing machines, trying again')
                time.sleep(20)
                continue
            else:
                assert 0, 'error listing machines'
        num_up = len(
            filter(
                lambda machine: machine['up'] and machine['type'] ==
                machine_type, machines))
        assert num_up >= config, 'not enough machines are up'

        # make sure there are machines for non-automated jobs to run
        num_free = len(
            filter(
                lambda machine: machine['up'] and machine['locked'] == 0 and
                machine['type'] == machine_type, machines))
        if num_free < 6 and ctx.owner.startswith('scheduled'):
            if ctx.block:
                log.info('waiting for more machines to be free...')
                time.sleep(10)
                continue
            else:
                assert 0, 'not enough machines free'

        newly_locked = lock.lock_many(ctx, config, machine_type, ctx.owner,
                                      ctx.archive)
        if len(newly_locked) == config:
            vmlist = []
            for lmach in newly_locked:
                if lock.create_if_vm(ctx, lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info('Waiting for virtual machines to come up')
                keyscan_out = ''
                loopcount = 0
                while len(keyscan_out.splitlines()) != len(vmlist):
                    loopcount += 1
                    time.sleep(10)
                    keyscan_out, current_locks = lock.keyscan_check(
                        ctx, vmlist)
                    log.info('virtual machine is stil unavailable')
                    if loopcount == 40:
                        loopcount = 0
                        log.info(
                            'virtual machine(s) still not up, recreating unresponsive ones.'
                        )
                        for guest in vmlist:
                            if guest not in keyscan_out:
                                log.info('recreating: ' + guest)
                                lock.destroy_if_vm(ctx, 'ubuntu@' + guest)
                                lock.create_if_vm(ctx, 'ubuntu@' + guest)
                if lock.update_keys(ctx, keyscan_out, current_locks):
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in newly_locked.iterkeys():
                    stats = lockstatus.get_status(ctx, dkey)
                    newscandict[dkey] = stats['sshpubkey']
                ctx.config['targets'] = newscandict
            else:
                ctx.config['targets'] = newly_locked
            log.info('\n  '.join([
                'Locked targets:',
            ] + yaml.safe_dump(ctx.config['targets'],
                               default_flow_style=False).splitlines()))
            break
        elif not ctx.block:
            assert 0, 'not enough machines are available'

        log.warn('Could not lock enough machines, waiting...')
        time.sleep(10)
    try:
        yield
    finally:
        if ctx.summary.get('success', False):
            log.info('Unlocking machines...')
            for machine in ctx.config['targets'].iterkeys():
                lock.unlock(ctx, machine, ctx.owner)
Exemplo n.º 21
0
def lock_machines(ctx, config):
    log.info("Locking machines...")
    assert isinstance(config[0], int), "config[0] must be an integer"
    machine_type = config[1]
    how_many = config[0]

    while True:
        # make sure there are enough machines up
        machines = lock.list_locks()
        if machines is None:
            if ctx.block:
                log.warn("error listing machines, trying again")
                time.sleep(20)
                continue
            else:
                assert 0, "error listing machines"

        is_up = lambda machine: machine["up"] and machine["type"] == machine_type  # noqa
        num_up = len(filter(is_up, machines))
        assert num_up >= how_many, "not enough machines are up"

        # make sure there are machines for non-automated jobs to run
        is_up_and_free = (
            lambda machine: machine["up"] and machine["locked"] == 0 and machine["type"] == machine_type
        )  # noqa
        up_and_free = filter(is_up_and_free, machines)
        num_free = len(up_and_free)
        if num_free < 6 and ctx.owner.startswith("scheduled"):
            if ctx.block:
                log.info("waiting for more machines to be free (need %s see %s)...", how_many, num_free)
                time.sleep(10)
                continue
            else:
                assert 0, "not enough machines free"

        newly_locked = lock.lock_many(ctx, how_many, machine_type, ctx.owner, ctx.archive)
        if len(newly_locked) == how_many:
            vmlist = []
            for lmach in newly_locked:
                if lock.create_if_vm(ctx, lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info("Waiting for virtual machines to come up")
                keyscan_out = ""
                loopcount = 0
                while len(keyscan_out.splitlines()) != len(vmlist):
                    loopcount += 1
                    time.sleep(10)
                    keyscan_out, current_locks = lock.keyscan_check(ctx, vmlist)
                    log.info("virtual machine is stil unavailable")
                    if loopcount == 40:
                        loopcount = 0
                        log.info("virtual machine(s) still not up, " + "recreating unresponsive ones.")
                        for guest in vmlist:
                            if guest not in keyscan_out:
                                log.info("recreating: " + guest)
                                lock.destroy_if_vm(ctx, "ubuntu@" + guest)
                                lock.create_if_vm(ctx, "ubuntu@" + guest)
                if lock.update_keys(ctx, keyscan_out, current_locks):
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in newly_locked.iterkeys():
                    stats = lockstatus.get_status(ctx, dkey)
                    newscandict[dkey] = stats["sshpubkey"]
                ctx.config["targets"] = newscandict
            else:
                ctx.config["targets"] = newly_locked
            # FIXME: Ugh.
            log.info(
                "\n  ".join(
                    ["Locked targets:"] + yaml.safe_dump(ctx.config["targets"], default_flow_style=False).splitlines()
                )
            )
            break
        elif not ctx.block:
            assert 0, "not enough machines are available"

        log.warn("Could not lock enough machines, waiting...")
        time.sleep(10)
    try:
        yield
    finally:
        if ctx.summary.get("success", False):
            log.info("Unlocking machines...")
            for machine in ctx.config["targets"].iterkeys():
                lock.unlock_one(ctx, machine, ctx.owner)
Exemplo n.º 22
0
def lock_machines(ctx, config):
    """
    Lock machines.  Called when the teuthology run finds and locks
    new machines.  This is not called if the one has teuthology-locked
    machines and placed those keys in the Targets section of a yaml file.
    """
    # It's OK for os_type and os_version to be None here.  If we're trying
    # to lock a bare metal machine, we'll take whatever is available.  If
    # we want a vps, defaults will be provided by misc.get_distro and
    # misc.get_distro_version in provision.create_if_vm
    os_type = ctx.config.get("os_type")
    os_version = ctx.config.get("os_version")
    arch = ctx.config.get("arch")
    log.info("Locking machines...")
    assert isinstance(config[0], int), "config[0] must be an integer"
    machine_type = config[1]
    total_requested = config[0]
    # We want to make sure there are always this many machines available
    reserved = teuth_config.reserve_machines
    assert isinstance(reserved, int), "reserve_machines must be integer"
    assert reserved >= 0, "reserve_machines should >= 0"

    # change the status during the locking process
    report.try_push_job_info(ctx.config, dict(status="waiting"))

    all_locked = dict()
    requested = total_requested
    while True:
        # get a candidate list of machines
        machines = lock.list_locks(machine_type=machine_type, up=True, locked=False, count=requested + reserved)
        if machines is None:
            if ctx.block:
                log.error("Error listing machines, trying again")
                time.sleep(20)
                continue
            else:
                raise RuntimeError("Error listing machines")

        # make sure there are machines for non-automated jobs to run
        if len(machines) < reserved + requested and ctx.owner.startswith("scheduled"):
            if ctx.block:
                log.info(
                    "waiting for more %s machines to be free (need %s + %s, have %s)...",
                    machine_type,
                    reserved,
                    requested,
                    len(machines),
                )
                time.sleep(10)
                continue
            else:
                assert 0, "not enough machines free; need %s + %s, have %s" % (reserved, requested, len(machines))

        newly_locked = lock.lock_many(ctx, requested, machine_type, ctx.owner, ctx.archive, os_type, os_version, arch)
        all_locked.update(newly_locked)
        log.info(
            "{newly_locked} {mtype} machines locked this try, "
            "{total_locked}/{total_requested} locked so far".format(
                newly_locked=len(newly_locked),
                mtype=machine_type,
                total_locked=len(all_locked),
                total_requested=total_requested,
            )
        )
        if len(all_locked) == total_requested:
            vmlist = []
            for lmach in all_locked:
                if misc.is_vm(lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info("Waiting for virtual machines to come up")
                keys_dict = dict()
                loopcount = 0
                while len(keys_dict) != len(vmlist):
                    loopcount += 1
                    time.sleep(10)
                    keys_dict = misc.ssh_keyscan(vmlist)
                    log.info("virtual machine is still unavailable")
                    if loopcount == 40:
                        loopcount = 0
                        log.info("virtual machine(s) still not up, " + "recreating unresponsive ones.")
                        for guest in vmlist:
                            if guest not in keys_dict.keys():
                                log.info("recreating: " + guest)
                                full_name = misc.canonicalize_hostname(guest)
                                provision.destroy_if_vm(ctx, full_name)
                                provision.create_if_vm(ctx, full_name)
                if lock.do_update_keys(keys_dict):
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in all_locked.iterkeys():
                    stats = lockstatus.get_status(dkey)
                    newscandict[dkey] = stats["ssh_pub_key"]
                ctx.config["targets"] = newscandict
            else:
                ctx.config["targets"] = all_locked
            locked_targets = yaml.safe_dump(ctx.config["targets"], default_flow_style=False).splitlines()
            log.info("\n  ".join(["Locked targets:"] + locked_targets))
            # successfully locked machines, change status back to running
            report.try_push_job_info(ctx.config, dict(status="running"))
            break
        elif not ctx.block:
            assert 0, "not enough machines are available"
        else:
            requested = requested - len(newly_locked)
            assert requested > 0, "lock_machines: requested counter went" "negative, this shouldn't happen"

        log.info(
            "{total} machines locked ({new} new); need {more} more".format(
                total=len(all_locked), new=len(newly_locked), more=requested
            )
        )
        log.warn("Could not lock enough machines, waiting...")
        time.sleep(10)
    try:
        yield
    finally:
        # If both unlock_on_failure and nuke-on-error are set, don't unlock now
        # because we're just going to nuke (and unlock) later.
        unlock_on_failure = ctx.config.get("unlock_on_failure", False) and not ctx.config.get("nuke-on-error", False)
        if get_status(ctx.summary) == "pass" or unlock_on_failure:
            log.info("Unlocking machines...")
            for machine in ctx.config["targets"].iterkeys():
                lock.unlock_one(ctx, machine, ctx.owner, ctx.archive)
Exemplo n.º 23
0
def lock_machines(ctx, config):
    log.info('Locking machines...')
    assert isinstance(config[0], int), 'config must be an integer'
    machine_type = config[1]
    config = config[0]

    while True:
        # make sure there are enough machines up
        machines = lock.list_locks(ctx)
        if machines is None:
            if ctx.block:
                log.warn('error listing machines, trying again')
                time.sleep(20)
                continue
            else:
                assert 0, 'error listing machines'
        num_up = len(filter(lambda machine: machine['up'] and machine['type'] == machine_type, machines))
        assert num_up >= config, 'not enough machines are up'

        # make sure there are machines for non-automated jobs to run
        num_free = len(filter(
                lambda machine: machine['up'] and machine['locked'] == 0 and machine['type'] == machine_type,
                machines
                ))
        if num_free < 6 and ctx.owner.startswith('scheduled'):
            if ctx.block:
                log.info('waiting for more machines to be free...')
                time.sleep(10)
                continue
            else:
                assert 0, 'not enough machines free'

        newly_locked = lock.lock_many(ctx, config, machine_type, ctx.owner, ctx.archive)
        if len(newly_locked) == config:
            vmlist = []
            for lmach in newly_locked:
                if lock.create_if_vm(ctx,lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info('Waiting for virtual machines to come up')
                keyscan_out = ''
                while len(keyscan_out.splitlines()) != len(vmlist):
                    time.sleep(10)
                    keyscan_out, current_locks = lock.keyscan_check(ctx, vmlist)
                    log.info('virtual machine is stil unavailable')
                if lock.update_keys(ctx, keyscan_out, current_locks):
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in newly_locked.iterkeys():
                    stats = lockstatus.get_status(ctx, dkey)
                    newscandict[dkey] = stats['sshpubkey']
                ctx.config['targets'] = newscandict
            else:
                ctx.config['targets'] = newly_locked
            log.info('\n  '.join(['Locked targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines()))
            break
        elif not ctx.block:
            assert 0, 'not enough machines are available'

        log.warn('Could not lock enough machines, waiting...')
        time.sleep(10)
    try:
        yield
    finally:
        if ctx.summary.get('success', False):
            log.info('Unlocking machines...')
            for machine in ctx.config['targets'].iterkeys():
                lock.unlock(ctx, machine, ctx.owner)
Exemplo n.º 24
0
def lock_machines(ctx, config):
    """
    Lock machines.  Called when the teuthology run finds and locks
    new machines.  This is not called if the one has teuthology-locked
    machines and placed those keys in the Targets section of a yaml file.
    """
    log.info('Locking machines...')
    assert isinstance(config[0], int), 'config[0] must be an integer'
    machine_type = config[1]
    machine_types = teuthology.get_multi_machine_types(machine_type)
    how_many = config[0]

    while True:
        # make sure there are enough machines up
        machines = lock.list_locks()
        if machines is None:
            if ctx.block:
                log.warn('error listing machines, trying again')
                time.sleep(20)
                continue
            else:
                assert 0, 'error listing machines'

        is_up = lambda machine: machine['up'] and machine['type'] in machine_types  # noqa
        num_up = len(filter(is_up, machines))
        assert num_up >= how_many, 'not enough machines are up'

        # make sure there are machines for non-automated jobs to run
        is_up_and_free = lambda machine: machine['up'] and machine['locked'] == 0 and machine['type'] in machine_types  # noqa
        up_and_free = filter(is_up_and_free, machines)
        num_free = len(up_and_free)
        if num_free < 6 and ctx.owner.startswith('scheduled'):
            if ctx.block:
                log.info(
                    'waiting for more machines to be free (need %s see %s)...',
                    how_many,
                    num_free,
                )
                time.sleep(10)
                continue
            else:
                assert 0, 'not enough machines free'

        newly_locked = lock.lock_many(ctx, how_many, machine_type, ctx.owner,
                                      ctx.archive)
        if len(newly_locked) == how_many:
            vmlist = []
            for lmach in newly_locked:
                if teuthology.is_vm(lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info('Waiting for virtual machines to come up')
                keyscan_out = ''
                loopcount = 0
                while len(keyscan_out.splitlines()) != len(vmlist):
                    loopcount += 1
                    time.sleep(10)
                    keyscan_out, current_locks = lock.keyscan_check(ctx,
                                                                    vmlist)
                    log.info('virtual machine is still unavailable')
                    if loopcount == 40:
                        loopcount = 0
                        log.info('virtual machine(s) still not up, ' +
                                 'recreating unresponsive ones.')
                        for guest in vmlist:
                            if guest not in keyscan_out:
                                log.info('recreating: ' + guest)
                                lock.destroy_if_vm(ctx, 'ubuntu@' + guest)
                                lock.create_if_vm(ctx, 'ubuntu@' + guest)
                if lock.update_keys(ctx, keyscan_out, current_locks):
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in newly_locked.iterkeys():
                    stats = lockstatus.get_status(ctx, dkey)
                    newscandict[dkey] = stats['sshpubkey']
                ctx.config['targets'] = newscandict
            else:
                ctx.config['targets'] = newly_locked
            # FIXME: Ugh.
            log.info('\n  '.join(['Locked targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines()))
            break
        elif not ctx.block:
            assert 0, 'not enough machines are available'

        log.warn('Could not lock enough machines, waiting...')
        time.sleep(10)
    try:
        yield
    finally:
        if ctx.config.get('unlock_on_failure', False) or \
           ctx.summary.get('success', False):
            log.info('Unlocking machines...')
            for machine in ctx.config['targets'].iterkeys():
                lock.unlock_one(ctx, machine, ctx.owner)
Exemplo n.º 25
0
def lock_machines(ctx, config):
    """
    Lock machines.  Called when the teuthology run finds and locks
    new machines.  This is not called if the one has teuthology-locked
    machines and placed those keys in the Targets section of a yaml file.
    """
    # It's OK for os_type and os_version to be None here.  If we're trying
    # to lock a bare metal machine, we'll take whatever is available.  If
    # we want a vps, defaults will be provided by misc.get_distro and
    # misc.get_distro_version in provision.create_if_vm
    os_type = ctx.config.get("os_type")
    os_version = ctx.config.get("os_version")
    arch = ctx.config.get('arch')
    log.info('Locking machines...')
    assert isinstance(config[0], int), 'config[0] must be an integer'
    machine_type = config[1]
    how_many = config[0]
    # We want to make sure there are always this many machines available
    to_reserve = teuth_config.reserve_machines
    assert isinstance(to_reserve, int), 'reserve_machines must be integer'
    assert (to_reserve >= 0), 'reserve_machines should >= 0'

    # change the status during the locking process
    report.try_push_job_info(ctx.config, dict(status='waiting'))

    while True:
        # get a candidate list of machines
        machines = lock.list_locks(machine_type=machine_type, up=True,
                                   locked=False, count=how_many + to_reserve)
        if machines is None:
            if ctx.block:
                log.error('Error listing machines, trying again')
                time.sleep(20)
                continue
            else:
                raise RuntimeError('Error listing machines')

        # make sure there are machines for non-automated jobs to run
        if len(machines) < to_reserve + how_many and ctx.owner.startswith('scheduled'):
            if ctx.block:
                log.info(
                    'waiting for more machines to be free (need %s + %s, have %s)...',
                    to_reserve,
                    how_many,
                    len(machines),
                )
                time.sleep(10)
                continue
            else:
                assert 0, ('not enough machines free; need %s + %s, have %s' %
                           (to_reserve, how_many, len(machines)))

        newly_locked = lock.lock_many(ctx, how_many, machine_type, ctx.owner,
                                      ctx.archive, os_type, os_version, arch)
        if not newly_locked and not isinstance(newly_locked, list):
            raise RuntimeError('Invalid parameters specified')
        if len(newly_locked) == how_many:
            vmlist = []
            for lmach in newly_locked:
                if misc.is_vm(lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info('Waiting for virtual machines to come up')
                keys_dict = dict()
                loopcount = 0
                while len(keys_dict) != len(vmlist):
                    loopcount += 1
                    time.sleep(10)
                    keys_dict = lock.ssh_keyscan(vmlist)
                    log.info('virtual machine is still unavailable')
                    if loopcount == 40:
                        loopcount = 0
                        log.info('virtual machine(s) still not up, ' +
                                 'recreating unresponsive ones.')
                        for guest in vmlist:
                            if guest not in keys_dict.keys():
                                log.info('recreating: ' + guest)
                                full_name = misc.canonicalize_hostname(guest)
                                provision.destroy_if_vm(ctx, full_name)
                                provision.create_if_vm(ctx, full_name)
                if lock.do_update_keys(keys_dict):
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in newly_locked.iterkeys():
                    stats = lockstatus.get_status(dkey)
                    newscandict[dkey] = stats['ssh_pub_key']
                ctx.config['targets'] = newscandict
            else:
                ctx.config['targets'] = newly_locked
            locked_targets = yaml.safe_dump(
                ctx.config['targets'],
                default_flow_style=False
            ).splitlines()
            log.info('\n  '.join(['Locked targets:', ] + locked_targets))
            # successfully locked machines, change status back to running
            report.try_push_job_info(ctx.config, dict(status='running'))
            break
        elif not ctx.block:
            assert 0, 'not enough machines are available'
        else:
            how_many = how_many - len(newly_locked)
            assert how_many > 0, "lock_machines: how_many counter went" \
                                 "negative, this shouldn't happen"

        log.warn('Could not lock enough machines, waiting...')
        time.sleep(10)
    try:
        yield
    finally:
        # If both unlock_on_failure and nuke-on-error are set, don't unlock now
        # because we're just going to nuke (and unlock) later.
        unlock_on_failure = (
            ctx.config.get('unlock_on_failure', False)
            and not ctx.config.get('nuke-on-error', False)
        )
        if get_status(ctx.summary) == 'pass' or unlock_on_failure:
            log.info('Unlocking machines...')
            for machine in ctx.config['targets'].iterkeys():
                lock.unlock_one(ctx, machine, ctx.owner, ctx.archive)
Exemplo n.º 26
0
def lock_machines(ctx, config):
    """
    Lock machines.  Called when the teuthology run finds and locks
    new machines.  This is not called if the one has teuthology-locked
    machines and placed those keys in the Targets section of a yaml file.
    """
    log.info('Locking machines...')
    assert isinstance(config[0], int), 'config[0] must be an integer'
    machine_type = config[1]
    how_many = config[0]
    # We want to make sure there are always this many machines available
    to_reserve = 5

    while True:
        # get a candidate list of machines
        machines = lock.list_locks(machine_type=machine_type, up=True,
                                   locked=False, count=how_many + to_reserve)
        if machines is None:
            if ctx.block:
                log.error('Error listing machines, trying again')
                time.sleep(20)
                continue
            else:
                raise RuntimeError('Error listing machines')

        # make sure there are machines for non-automated jobs to run
        if len(machines) <= to_reserve and ctx.owner.startswith('scheduled'):
            if ctx.block:
                log.info(
                    'waiting for more machines to be free (need %s see %s)...',
                    how_many,
                    len(machines),
                )
                time.sleep(10)
                continue
            else:
                assert 0, 'not enough machines free'

        newly_locked = lock.lock_many(ctx, how_many, machine_type, ctx.owner,
                                      ctx.archive)
        if not newly_locked and not isinstance(newly_locked, list):
            raise RuntimeError('Invalid parameters specified')
        if len(newly_locked) == how_many:
            vmlist = []
            for lmach in newly_locked:
                if misc.is_vm(lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info('Waiting for virtual machines to come up')
                keys_dict = dict()
                loopcount = 0
                while len(keys_dict) != len(vmlist):
                    loopcount += 1
                    time.sleep(10)
                    keys_dict = lock.ssh_keyscan(vmlist)
                    log.info('virtual machine is still unavailable')
                    if loopcount == 40:
                        loopcount = 0
                        log.info('virtual machine(s) still not up, ' +
                                 'recreating unresponsive ones.')
                        for guest in vmlist:
                            if guest not in keys_dict.keys():
                                log.info('recreating: ' + guest)
                                full_name = misc.canonicalize_hostname(guest)
                                provision.destroy_if_vm(ctx, full_name)
                                provision.create_if_vm(ctx, full_name)
                if lock.do_update_keys(keys_dict):
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in newly_locked.iterkeys():
                    stats = lockstatus.get_status(dkey)
                    newscandict[dkey] = stats['ssh_pub_key']
                ctx.config['targets'] = newscandict
            else:
                ctx.config['targets'] = newly_locked
            # FIXME: Ugh.
            log.info('\n  '.join(['Locked targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines()))
            break
        elif not ctx.block:
            assert 0, 'not enough machines are available'

        log.warn('Could not lock enough machines, waiting...')
        time.sleep(10)
    try:
        yield
    finally:
        if ctx.config.get('unlock_on_failure', False) or \
           ctx.summary.get('success', False):
            log.info('Unlocking machines...')
            for machine in ctx.config['targets'].iterkeys():
                lock.unlock_one(ctx, machine, ctx.owner)
Exemplo n.º 27
0
def lock_machines(ctx, config):
    """
    Lock machines.  Called when the teuthology run finds and locks
    new machines.  This is not called if the one has teuthology-locked
    machines and placed those keys in the Targets section of a yaml file.
    """
    log.info('Locking machines...')
    assert isinstance(config[0], int), 'config[0] must be an integer'
    machine_type = config[1]
    machine_types = teuthology.get_multi_machine_types(machine_type)
    how_many = config[0]

    while True:
        # make sure there are enough machines up
        machines = lock.list_locks()
        if machines is None:
            if ctx.block:
                log.warn('error listing machines, trying again')
                time.sleep(20)
                continue
            else:
                assert 0, 'error listing machines'

        is_up = lambda machine: machine['up'] and machine['type'] in machine_types  # noqa
        num_up = len(filter(is_up, machines))
        assert num_up >= how_many, 'not enough machines are up'

        # make sure there are machines for non-automated jobs to run
        is_up_and_free = lambda machine: machine['up'] and machine['locked'] == 0 and machine['type'] in machine_types  # noqa
        up_and_free = filter(is_up_and_free, machines)
        num_free = len(up_and_free)
        if num_free < 6 and ctx.owner.startswith('scheduled'):
            if ctx.block:
                log.info(
                    'waiting for more machines to be free (need %s see %s)...',
                    how_many,
                    num_free,
                )
                time.sleep(10)
                continue
            else:
                assert 0, 'not enough machines free'

        newly_locked = lock.lock_many(ctx, how_many, machine_type, ctx.owner,
                                      ctx.archive)
        if len(newly_locked) == how_many:
            vmlist = []
            for lmach in newly_locked:
                if teuthology.is_vm(lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info('Waiting for virtual machines to come up')
                keyscan_out = ''
                loopcount = 0
                while len(keyscan_out.splitlines()) != len(vmlist):
                    loopcount += 1
                    time.sleep(10)
                    keyscan_out, current_locks = lock.keyscan_check(ctx,
                                                                    vmlist)
                    log.info('virtual machine is still unavailable')
                    if loopcount == 40:
                        loopcount = 0
                        log.info('virtual machine(s) still not up, ' +
                                 'recreating unresponsive ones.')
                        for guest in vmlist:
                            if guest not in keyscan_out:
                                log.info('recreating: ' + guest)
                                lock.destroy_if_vm(ctx, 'ubuntu@' + guest)
                                lock.create_if_vm(ctx, 'ubuntu@' + guest)
                if lock.update_keys(ctx, keyscan_out, current_locks):
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in newly_locked.iterkeys():
                    stats = lockstatus.get_status(ctx, dkey)
                    newscandict[dkey] = stats['sshpubkey']
                ctx.config['targets'] = newscandict
            else:
                ctx.config['targets'] = newly_locked
            # FIXME: Ugh.
            log.info('\n  '.join(['Locked targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines()))
            break
        elif not ctx.block:
            assert 0, 'not enough machines are available'

        log.warn('Could not lock enough machines, waiting...')
        time.sleep(10)
    try:
        yield
    finally:
        if ctx.config.get('unlock_on_failure', False) or \
           ctx.summary.get('success', False):
            log.info('Unlocking machines...')
            for machine in ctx.config['targets'].iterkeys():
                lock.unlock_one(ctx, machine, ctx.owner)
Exemplo n.º 28
0
def lock_machines(ctx, config):
    """
    Lock machines.  Called when the teuthology run finds and locks
    new machines.  This is not called if the one has teuthology-locked
    machines and placed those keys in the Targets section of a yaml file.
    """
    # It's OK for os_type and os_version to be None here.  If we're trying
    # to lock a bare metal machine, we'll take whatever is available.  If
    # we want a vps, defaults will be provided by misc.get_distro and
    # misc.get_distro_version in provision.create_if_vm
    os_type = ctx.config.get("os_type")
    os_version = ctx.config.get("os_version")
    arch = ctx.config.get('arch')
    log.info('Locking machines...')
    assert isinstance(config[0], int), 'config[0] must be an integer'
    machine_type = config[1]
    total_requested = config[0]
    # We want to make sure there are always this many machines available
    reserved = teuth_config.reserve_machines
    assert isinstance(reserved, int), 'reserve_machines must be integer'
    assert (reserved >= 0), 'reserve_machines should >= 0'

    # change the status during the locking process
    report.try_push_job_info(ctx.config, dict(status='waiting'))

    all_locked = dict()
    requested = total_requested
    while True:
        # get a candidate list of machines
        machines = lock.list_locks(machine_type=machine_type, up=True,
                                   locked=False, count=requested + reserved)
        if machines is None:
            if ctx.block:
                log.error('Error listing machines, trying again')
                time.sleep(20)
                continue
            else:
                raise RuntimeError('Error listing machines')

        # make sure there are machines for non-automated jobs to run
        if len(machines) < reserved + requested and ctx.owner.startswith('scheduled'):
            if ctx.block:
                log.info(
                    'waiting for more %s machines to be free (need %s + %s, have %s)...',
                    machine_type,
                    reserved,
                    requested,
                    len(machines),
                )
                time.sleep(10)
                continue
            else:
                assert 0, ('not enough machines free; need %s + %s, have %s' %
                           (reserved, requested, len(machines)))

        newly_locked = lock.lock_many(ctx, requested, machine_type, ctx.owner,
                                      ctx.archive, os_type, os_version, arch)
        all_locked.update(newly_locked)
        log.info(
            '{newly_locked} {mtype} machines locked this try, '
            '{total_locked}/{total_requested} locked so far'.format(
                newly_locked=len(newly_locked),
                mtype=machine_type,
                total_locked=len(all_locked),
                total_requested=total_requested,
            )
        )
        if len(all_locked) == total_requested:
            vmlist = []
            for lmach in all_locked:
                if misc.is_vm(lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info('Waiting for virtual machines to come up')
                keys_dict = dict()
                loopcount = 0
                while len(keys_dict) != len(vmlist):
                    loopcount += 1
                    time.sleep(10)
                    keys_dict = misc.ssh_keyscan(vmlist)
                    log.info('virtual machine is still unavailable')
                    if loopcount == 40:
                        loopcount = 0
                        log.info('virtual machine(s) still not up, ' +
                                 'recreating unresponsive ones.')
                        for guest in vmlist:
                            if guest not in keys_dict.keys():
                                log.info('recreating: ' + guest)
                                full_name = misc.canonicalize_hostname(guest)
                                provision.destroy_if_vm(ctx, full_name)
                                provision.create_if_vm(ctx, full_name)
                if lock.do_update_keys(keys_dict):
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in all_locked.iterkeys():
                    stats = lockstatus.get_status(dkey)
                    newscandict[dkey] = stats['ssh_pub_key']
                ctx.config['targets'] = newscandict
            else:
                ctx.config['targets'] = all_locked
            locked_targets = yaml.safe_dump(
                ctx.config['targets'],
                default_flow_style=False
            ).splitlines()
            log.info('\n  '.join(['Locked targets:', ] + locked_targets))
            # successfully locked machines, change status back to running
            report.try_push_job_info(ctx.config, dict(status='running'))
            break
        elif not ctx.block:
            assert 0, 'not enough machines are available'
        else:
            requested = requested - len(newly_locked)
            assert requested > 0, "lock_machines: requested counter went" \
                                  "negative, this shouldn't happen"

        log.info(
            "{total} machines locked ({new} new); need {more} more".format(
                total=len(all_locked), new=len(newly_locked), more=requested)
        )
        log.warn('Could not lock enough machines, waiting...')
        time.sleep(10)
    try:
        yield
    finally:
        # If both unlock_on_failure and nuke-on-error are set, don't unlock now
        # because we're just going to nuke (and unlock) later.
        unlock_on_failure = (
            ctx.config.get('unlock_on_failure', False)
            and not ctx.config.get('nuke-on-error', False)
        )
        if get_status(ctx.summary) == 'pass' or unlock_on_failure:
            log.info('Unlocking machines...')
            for machine in ctx.config['targets'].iterkeys():
                lock.unlock_one(ctx, machine, ctx.owner, ctx.archive)
Exemplo n.º 29
0
        assert ctx.owner is None, \
            '--all and --owner are mutually exclusive'
        assert not machines, \
            '--all and listing specific machines are incompatible'
    if ctx.num_to_lock:
        assert ctx.machine_type, \
            'must specify machine type to lock'

    if ctx.brief:
        assert ctx.list, '--brief only applies to --list'

    if ctx.list or ctx.list_targets:
        assert ctx.desc is None, '--desc does nothing with --list'

        if machines:
            statuses = [ls.get_status(ctx, machine) for machine in machines]
        else:
            statuses = list_locks(ctx)
        vmachines = []

        for vmachine in statuses:
            if vmachine['vpshost']:
                if vmachine['locked']:
                    vmachines.append(vmachine['name'])
        if vmachines:
            # Avoid ssh-keyscans for everybody when listing all machines
            # Listing specific machines will update the keys.
            if machines:
                scan_for_locks(ctx, vmachines)
                statuses = [ls.get_status(ctx, machine) for machine in machines]
            else: