def processEC2(ec2Kittens): keynames = db.keys('counts:*') counts = {} for item in keynames: instanceType = item.replace('counts:', '') counts[instanceType] = { 'current': 0 } count = db.hgetall(item) for key in count.keys(): counts[instanceType][key] = count[key] for kitten, r in ec2Kittens: host = r['host'] instanceType = host.info['class'] if instanceType not in counts: log.error('%s has a instance type [%s] not found in our counts, assuming minimum of 2 and max of 50' % (kitten, instanceType)) counts[instanceType]['max'] = 50 counts[instanceType]['min'] = 2 counts[instanceType]['current'] = 0 if host.info['enabled'] and host.info['state'] == 'running': counts[instanceType]['current'] += 1 if 'lastseen' in r: log.info('%s: count = %d idle: %dh %dm %ss' % (instanceType, counts[instanceType]['current'], r['lastseen']['hours'], r['lastseen']['minutes'], r['lastseen']['seconds'])) if r['lastseen']['since'] > 3600: if host.info['enabled'] and host.info['state'] == 'running': log.info('shutting down ec2 instance') # if we can ssh to host, then try and do normal shutdowns if host.graceful_shutdown(): log.info("instance was graceful'd") try: conn = connect_to_region(host.info['region'], aws_access_key_id=getPassword('aws_access_key_id'), aws_secret_access_key=getPassword('aws_secret_access_key')) conn.stop_instances(instance_ids=[host.info['id'],]) except: log.error('unable to stop ec2 instance %s [%s]' % (kitten, host.info['id']), exc_info=True) else: log.error('ec2 instance flagged for reboot/recovery but it is not running')
def processKittens(options, jobs, results): remoteEnv = releng.remote.RemoteEnvironment(options.tools, db=db) dNow = datetime.datetime.now() dDate = dNow.strftime('%Y-%m-%d') dHour = dNow.strftime('%H') while True: try: job = jobs.get(False) except Empty: job = None if job is not None: r = {} if job in remoteEnv.hosts: info = remoteEnv.hosts[job] if info['environment'] == options.environ: if not info['enabled'] and not options.force: if options.verbose: log.info('%s not enabled, skipping' % job) elif len(info['notes']) > 0 and not options.force: if options.verbose: log.info('%s has a slavealloc notes field, skipping' % job) else: log.info(job) host = remoteEnv.getHost(job) if host is None: log.error('unknown host for %s' % job) else: r = remoteEnv.check(host, indent=' ', dryrun=options.dryrun, verbose=options.verbose) d = remoteEnv.rebootIfNeeded(host, lastSeen=r['lastseen'], indent=' ', dryrun=options.dryrun, verbose=options.verbose) for s in ['reboot', 'recovery', 'ipmi', 'pdu']: r[s] = d[s] r['output'] += d['output'] hostKey = 'kittenherder:%s.%s:%s' % (dDate, dHour, job) for key in r: db.hset(hostKey, key, r[key]) db.expire(hostKey, _keyExpire) # all this because json cannot dumps() the timedelta object td = r['lastseen'] if td is not None: secs = td.seconds hours, remainder = divmod(secs, 3600) minutes, seconds = divmod(remainder, 60) r['lastseen'] = { 'hours': hours, 'minutes': minutes, 'seconds': seconds, 'relative': relative(td), 'since': secs, } log.info('%s: %s' % (job, json.dumps(r))) if (host.farm == 'ec2') and (r['reboot'] or r['recovery']): log.info('shutting down ec2 instance') try: conn = connect_to_region(host.info['region'], aws_access_key_id=getPassword('aws_access_key_id'), aws_secret_access_key=getPassword('aws_secret_access_key')) conn.stop_instances(instance_ids=[host.info['id'],]) except: log.error('unable to stop ec2 instance %s [%s]' % (job, host.info['id']), exc_info=True) else: if options.verbose: log.info('%s not in requested environment %s (%s), skipping' % (job, options.environ, info['environment'])) else: if options.verbose: log.error('%s not listed in slavealloc, skipping' % job, exc_info=True) results.put((job, r))
def processKittens(options, jobs, results): remoteEnv = releng.remote.RemoteEnvironment(options.tools, db=db) dNow = datetime.datetime.now() dDate = dNow.strftime('%Y-%m-%d') dHour = dNow.strftime('%H') while True: try: job = jobs.get(False) except Empty: job = None if job is not None: r = {} if job in remoteEnv.hosts: info = remoteEnv.hosts[job] if info['environment'] == options.environ: if not info['enabled'] and not options.force: if options.verbose: log.info('%s not enabled, skipping' % job) elif len(info['notes']) > 0 and not options.force: if options.verbose: log.info( '%s has a slavealloc notes field, skipping' % job) else: log.info(job) host = remoteEnv.getHost(job) if host is None: log.error('unknown host for %s' % job) else: r = remoteEnv.check(host, indent=' ', dryrun=options.dryrun, verbose=options.verbose) d = remoteEnv.rebootIfNeeded( host, lastSeen=r['lastseen'], indent=' ', dryrun=options.dryrun, verbose=options.verbose) for s in ['reboot', 'recovery', 'ipmi', 'pdu']: r[s] = d[s] r['output'] += d['output'] hostKey = 'kittenherder:%s.%s:%s' % (dDate, dHour, job) for key in r: db.hset(hostKey, key, r[key]) db.expire(hostKey, _keyExpire) # all this because json cannot dumps() the timedelta object td = r['lastseen'] if td is not None: secs = td.seconds hours, remainder = divmod(secs, 3600) minutes, seconds = divmod(remainder, 60) r['lastseen'] = { 'hours': hours, 'minutes': minutes, 'seconds': seconds, 'relative': relative(td), 'since': secs, } log.info('%s: %s' % (job, json.dumps(r))) if (host.farm == 'ec2') and (r['reboot'] or r['recovery']): log.info('shutting down ec2 instance') try: conn = connect_to_region( host.info['region'], aws_access_key_id=getPassword( 'aws_access_key_id'), aws_secret_access_key=getPassword( 'aws_secret_access_key')) conn.stop_instances(instance_ids=[ host.info['id'], ]) except: log.error( 'unable to stop ec2 instance %s [%s]' % (job, host.info['id']), exc_info=True) else: if options.verbose: log.info( '%s not in requested environment %s (%s), skipping' % (job, options.environ, info['environment'])) else: if options.verbose: log.error('%s not listed in slavealloc, skipping' % job, exc_info=True) results.put((job, r))