Пример #1
0
def parse_test_file(test_root, test_path):
    ''' Parse a Python file to find test cases '''
    logger = LogManager().getLogger('parse_test_file')
    test_root = os.path.abspath(test_root)
    test_path = os.path.abspath(test_path)
    python_path = test_path[len(test_root)+1:-3].replace(os.path.sep, '.')
    file_name = python_path.split('.')[-1]

    if test_root not in sys.path:
        sys.path.insert(0, test_root)
    old_path = os.getcwd()
    os.chdir(test_root)
    reload_modules(test_root)

    try:
        module = __import__(python_path, globals(),
            locals(), fromlist=[file_name])
        return find_test_methods(module)
    except:
        logger.warn("Could not load test %s, \n%s"
            %(test_path, traceback.format_exc()))
    finally:
        os.chdir(old_path)

    return []
Пример #2
0
    def __init__(self, version):
        self.udpQueue = Queue()
        self.udpServer = UDPServer(self.udpQueue)
        self.tmpDir = tempfile.mkdtemp("tmpTAS")

        os.environ["TAS_TMP"] = self.tmpDir
        os.environ["TAS_VERSION"] = version

        self.logger = LogManager().getLogger("Main")
        self.peers = {}
        self.root = os.getcwd()
        os.chdir(self.tmpDir)

        self.httpServer = subprocess.Popen(
            ["python", "-m", "SimpleHTTPServer", "0"],
            stdout=open("/dev/null", "w"),
            stderr=open("/dev/null", "w"),
            preexec_fn=os.setsid,
        )
        time.sleep(1)

        pid = str(self.httpServer.pid)
        httpPort = subprocess.Popen(
            "netstat -tulpn | awk ' /" + pid + '\/python/ { gsub(/^[^:]+:/, "", $4); print $4 } \'',
            shell=True,
            stdout=subprocess.PIPE,
            stderr=open("/dev/null", "w"),
        )
        self.httpPort = int(httpPort.stdout.readline().strip())

        os.chdir(self.root)
        signal.signal(signal.SIGINT, self._shutdown)
        signal.signal(signal.SIGTERM, self._shutdown)
        self.shutdownLock = threading.Lock()
        self.closed = False
Пример #3
0
 def __init__(self, peer, rebootAttempts = 1, removeLock = False):
     self.peer = peer
     self.removeLock = removeLock
     self.stages = [self.__rebootSSH, self.__rebootPWR] * rebootAttempts
     self.logger = LogManager().getLogger('RecoveryWorker-%s' % peer.ipAddr)
     Process.__init__(self, target = self.__restartPeer)
     self.start()
Пример #4
0
    def __init__(self, config, defaultTimeout, iterations, srcLoc):
        self.test = 0
        self.config = config
        self.logger = LogManager().getLogger('TestDiscoveryWorker')

        self.enabled = self.config.DiscoveryWorker.enabled == 'true'
        self.testRoot = os.path.abspath(self.config.SourceLocation.testRoot.PCDATA)
        self.testPath = self.config.SourceLocation.testPath.PCDATA or ""
        self.defaultTestTimeout = defaultTimeout
        self.iterations = iterations
        self.srcLoc = srcLoc
Пример #5
0
class RecoveryWorker(Process):
    def __init__(self, peer, rebootAttempts = 1, removeLock = False):
        self.peer = peer
        self.removeLock = removeLock
        self.stages = [self.__rebootSSH, self.__rebootPWR] * rebootAttempts
        self.logger = LogManager().getLogger('RecoveryWorker-%s' % peer.ipAddr)
        Process.__init__(self, target = self.__restartPeer)
        self.start()

    def __restartPeer(self):
        """
        Reboot a given peer
        @param peer: Peer to reboot
        @return: Process.exitcode will be 0 if successful else -99
        """
        for stage in self.stages:
            try: stage()
            except KeyError, e:
                self.logger.warn("Ignoring stage %s, invalid config: %s"
                    % (stage.__name__, e))

            time.sleep(10)
        sys.exit(-99)
Пример #6
0
    def __init__(self, username, password, host, cmdsToSend, port = 22, exitCmd = "exit", timeout = None):
        Process.__init__(self, name = "SSHClient")

        self.logger = LogManager().getLogger('SSHClient-%s' % host)
        self.username = username
        self.password = password
        self.host = host
        self.port = int(port)
        self.cmdsToSend = cmdsToSend if isinstance(cmdsToSend, list) else [cmdsToSend]
        self.exitCmd = exitCmd

        self.queue = Queue()
        self.msg = ""
        self.status = Status.FAILURE
        self.startTime = Value('d', 0.0)
        self.endTime = Value('d', 0.0)
        self.timeout = timeout or SSHClient.TIMEOUT
        self.cmdsSend = False
        self.start()
Пример #7
0
class SSHClient(Process):
    TIMEOUT = 10
    PING_RECEIVED = re.compile("1 received")

    def __init__(self, username, password, host, cmdsToSend, port = 22, exitCmd = "exit", timeout = None):
        Process.__init__(self, name = "SSHClient")

        self.logger = LogManager().getLogger('SSHClient-%s' % host)
        self.username = username
        self.password = password
        self.host = host
        self.port = int(port)
        self.cmdsToSend = cmdsToSend if isinstance(cmdsToSend, list) else [cmdsToSend]
        self.exitCmd = exitCmd

        self.queue = Queue()
        self.msg = ""
        self.status = Status.FAILURE
        self.startTime = Value('d', 0.0)
        self.endTime = Value('d', 0.0)
        self.timeout = timeout or SSHClient.TIMEOUT
        self.cmdsSend = False
        self.start()

    def isFinished(self):
        """ True if the process has finished """
        return not self.is_alive()

    def updateOutput(self):
        """
        Update the msg to include the latest
        output from the given commands
        """
        try:
            while True:
                msg = self.queue.get(timeout = 0.5)
                self.msg += msg
        except Empty: pass
        except IOError: pass

        if self.isFinished():
            self.queue.close()
        return self.msg

    def run(self):
        factory = SSHFactory(self)
        factory.protocol = ClientTransport
        reactor.connectTCP(self.host, self.port, factory)

        self.startTime.value = time.time()
        check = task.LoopingCall(self.__ping)
        check.start(2.0)
        reactor.callLater(self.timeout, self.__timeout)
        log.defaultObserver.stop()
        reactor.run()
        self.endTime.value = time.time()
        self.queue.close()
        sys.exit(self.status)

    def __timeout(self):
        """ Timeout checker """
        if self.status != Status.FAILURE:
            return

        self.logger.error('Connection timeout to peer %s:%s'
            %(self.host, self.port))
        reactor.stop()

    def __ping(self):
        with open('/dev/null') as null:
            ping = subprocess.Popen(["ping", "-c1", "-W1", self.host],
                stdout = null, stderr = null)
            ping.wait()

        if ping.returncode != 0 and reactor.running:
            if self.cmdsSend == False:
                self.status = Status.FAILURE
            reactor.stop() 

    def cleanup(self):
        self.queue.close()

    def shutdown(self):
        """ Terminate the SSH process """
        self.terminate()
        self.join()
        self.endTime.value = time.time()
Пример #8
0
class ExecutionOrganiser(object):
    def __init__(self, version):
        self.udpQueue = Queue()
        self.udpServer = UDPServer(self.udpQueue)
        self.tmpDir = tempfile.mkdtemp("tmpTAS")

        os.environ["TAS_TMP"] = self.tmpDir
        os.environ["TAS_VERSION"] = version

        self.logger = LogManager().getLogger("Main")
        self.peers = {}
        self.root = os.getcwd()
        os.chdir(self.tmpDir)

        self.httpServer = subprocess.Popen(
            ["python", "-m", "SimpleHTTPServer", "0"],
            stdout=open("/dev/null", "w"),
            stderr=open("/dev/null", "w"),
            preexec_fn=os.setsid,
        )
        time.sleep(1)

        pid = str(self.httpServer.pid)
        httpPort = subprocess.Popen(
            "netstat -tulpn | awk ' /" + pid + '\/python/ { gsub(/^[^:]+:/, "", $4); print $4 } \'',
            shell=True,
            stdout=subprocess.PIPE,
            stderr=open("/dev/null", "w"),
        )
        self.httpPort = int(httpPort.stdout.readline().strip())

        os.chdir(self.root)
        signal.signal(signal.SIGINT, self._shutdown)
        signal.signal(signal.SIGTERM, self._shutdown)
        self.shutdownLock = threading.Lock()
        self.closed = False

    def _process(self):
        """ Monitor the overall progress and exchange data """
        heartBeats = self.udpServer.getHeartBeats(self.udpQueue.qsize())
        self.__processHeartBeats(heartBeats)
        if len(self.peers.keys()) == 0:
            return False

        if all(self.__processPeers(heartBeats)):
            toContinue = self.testDistributor.continueIterations()
            if not toContinue:
                return True
            self.peers = {}
            return False
        return False

    def __processPeers(self, heartbeats):
        """ Process all peer states """
        states = []
        for macAddr, peer in self.peers.items():
            hasHeartBeat = any(m == macAddr for h, m, r in heartbeats)
            peer.checkState(hasHeartBeat)
            states.append(peer.isDone())
        return states

    def __processHeartBeats(self, heartbeats):
        """ Process all new peer discoveries """
        for ipAddr, macAddr, randomBits in heartbeats:
            if macAddr in self.peers.keys():
                self.peers[macAddr].checkIP(ipAddr, randomBits)
            else:
                peer = Peer.createPeer(
                    ipAddr, self.httpPort, macAddr, randomBits, self.testDistributor, self.resultWorker
                )
                if peer:
                    self.peers[macAddr] = peer

    def _go(self):
        self.testGather = TestGatherManager(self.tmpDir)
        source, description = self.testGather.gatherTests()
        self.testDistributor = TestDistributor(self.peers, source)
        self.resultWorker = ResultWorker(self.testGather.getPackDetails(), source)

    def _shutdown(self, *args):
        """ Teardown all components and a graceful shutdown """
        with self.shutdownLock:
            if self.closed:
                return
            self.closed = True

        self.logger.info("Shutdown called")

        processes = []
        peers = self.peers.values() if hasattr(self, "peers") else []
        for peer in peers:
            processes += peer.shutdown()

        while any(process.is_alive() for process in processes if process):
            time.sleep(1)

        if hasattr(self, "httpServer"):
            os.killpg(self.httpServer.pid, signal.SIGTERM)
        for component in ["udpServer", "resultWorker"]:
            if not hasattr(self, component):
                continue
            getattr(self, component).shutdown()

        ConfigurationManager.destroySingleton()
        if os.path.exists(self.tmpDir):
            shutil.rmtree(self.tmpDir)

        os._exit(0)
Пример #9
0
 def __init__(self, source):
     self.execMap = {}
     self.logger = LogManager().getLogger(
         self.__class__.__name__)
Пример #10
0
class AbstractDistributor(object):
    def __init__(self, source):
        self.execMap = {}
        self.logger = LogManager().getLogger(
            self.__class__.__name__)

    def recordExecution(self, peer, test):
        """
        Add the test to the map, which contains the history of
        the test distribution to the peers
        """
        if not peer.macAddr in self.execMap.keys():
            self.execMap[peer.macAddr] = []
        self.execMap[peer.macAddr].append(test)

    def assignSuitableTest(self, peer, peers, source):
        '''
        Method, which finds the suitable test for the given peer
        @param peer: the peer, which is active and awaits the test
        @param souce: the source object with groups of tests
        @return: found test and modified source
        '''
        for gi in range(0, len(source.groups)):
            for ti in range(0, len(source.groups[gi].tests)):
                test = source.groups[gi].tests[ti]

                if not self.useTest(peer, peers,
                source, source.groups[gi], test, True):
                    continue

                self.recordExecution(peer, test)
                return copy.deepcopy(test), source

        self.logger.warn('Could not find any test for peer %s' % peer.ipAddr)
        return None, source

    def peekSuitableTest(self, peer, peers, source):
        '''
        Method, which finds the suitable test for the given peer
        @param peer: the peer, which is active and awaits the test
        @param souce: the source object with groups of tests
        @return: found test and modified source
        '''
        for gi in range(0, len(source.groups)):
            for ti in range(0, len(source.groups[gi].tests)):
                test = source.groups[gi].tests[ti]

                if not self.useTest(peer, peers,
                source, source.groups[gi], test, False):
                    continue
                return copy.deepcopy(test)

        return None

    def isSuitable(self, has, requires):
        '''
        Method copied from envutils/caps.py module
        @param requires: modulators required by test
        @param has: modulators provided by the box
        @return: True or False depending on the box capability
        '''
        has = [ set(c.split('/')) for c in has.split() ]
        requires = [ set((r,)) for r in requires.split() ]
        if len(has) < len(requires): return False

        return any(
            all(h >= r for r, h in izip(requires, perm))
            for perm in permutations(has, len(requires)))

    def whatsMissing(self, has, requires):
        has = [ set(c.split('/')) for c in has.split() ]
        requires = [ set((r,)) for r in requires.split() ]

        for r in requires[:]:
            for h in has:
                if not any(x in h for x in r):
                    continue

                has.remove(h)
                requires.remove(r)
                break

        strBuffer = StringIO()
        for r in requires:
            strBuffer.write(' %s' % '/'.join(list(r)))

        result = strBuffer.getvalue()
        strBuffer.close()
        return result

    def useTest(self, peer, peers, source, group, test, alterSource):
        raise NotImplementedError()

    def executed(self, peers, test):
        raise NotImplementedError()
Пример #11
0
class TestDiscoveryWorker(object):
    CONFIG_ENABLED_TRUE = 'true'

    def __init__(self, config, defaultTimeout, iterations, srcLoc):
        self.test = 0
        self.config = config
        self.logger = LogManager().getLogger('TestDiscoveryWorker')

        self.enabled = self.config.DiscoveryWorker.enabled == 'true'
        self.testRoot = os.path.abspath(self.config.SourceLocation.testRoot.PCDATA)
        self.testPath = self.config.SourceLocation.testPath.PCDATA or ""
        self.defaultTestTimeout = defaultTimeout
        self.iterations = iterations
        self.srcLoc = srcLoc

    def createTests(self):
        '''
        Method creates the Source object, default group and the list 
        of tests. If the discovery worker is disabled, then the 
        empty list of tests is stored in the Source object
        @return: the source object with the list of tests
        '''
        if not self.enabled:
            return []

        tests = []
        output_Q, input_Q = Queue() ,Queue()
        processes, alive, work_avail = [], 0, True
        fullPath = os.path.join(self.testRoot, self.testPath)

        for path in find_python_files(fullPath):
            input_Q.put(path, block = False)

        while work_avail or alive > 0 or not output_Q.empty():
            time.sleep(0.1)

            work_avail = not input_Q.empty()
            alive = [p.is_alive() for p in processes].count(True)
            if alive == cpu_count() or not work_avail:
                while not output_Q.empty():
                    tests.append(output_Q.get())
                continue

            process = Process(target = self.__processTest,
                args=(self.testRoot, output_Q, input_Q))
            process.daemon = True
            process.start()
            processes.append(process)

        input_Q.close()
        output_Q.close()
        return tests

    def __processTest(self, test_root, output_Q, input_Q):
        ''' Convert a given file location into test objects '''
        os.chdir(test_root)
        if test_root not in sys.path:
            sys.path.insert(0, test_root)

        while not input_Q.empty():
            file_path = input_Q.get(timeout = 0.5)
            test_methods = parse_test_file(test_root, file_path)

            for cls, method in test_methods:
                raw_doc = gather_doc_str(cls, method)
                doc_dict = process_docstr(raw_doc)

                try:
                    newTest = self.__createTestCase(
                        cls, method, file_path, raw_doc,
                        doc_dict, self.iterations)
                    if newTest: output_Q.put(newTest)
                except:
                    self.logger.warn("Cannot create object from %s, \n%s\n%s\n\n"
                        %(file_path, doc_dict, traceback.format_exc()))

        output_Q.close()
        input_Q.close()

    def __createTestCase(self, cls, method, test_file, raw_doc, doc_dict, iterations):
        description = doc_dict.get('summary', '')
        testId = doc_dict.get('test', '')
        environment = doc_dict.get('environment', '')
        testStatus = doc_dict.get('status', '')
        testTimeout = doc_dict.get('timeout', self.defaultTestTimeout)

        # Check whether the test is function, or it is within class
        testFile = test_file.replace('%s/' % os.getcwd(), '')
        test_method = method.im_func.func_name if cls else method.func_name
        test_class = cls.__name__ if cls else None

        manditory_data = { 'test method' : test_method,
            'test file' : testFile, 'test id' : testId }
        missing_data = []

        for key in manditory_data.keys():
            if manditory_data[key] in ['', None]:
                missing_data.append(key)

        if len(missing_data) > 0:
            return self.logger.warn('Cannot create test for %s:%s.%s as %s is missing'
                %(testFile, test_class, test_method, ', '.join(missing_data) ))

        return PythonNoseTest(description, iterations, environment,
            test_class, testMethod = test_method, testFile = testFile,
            testId = testId, testStatus = testStatus,
            testTimeout = testTimeout, docstrings = raw_doc,
            srcLoc = self.srcLoc)
Пример #12
0
    def __init__(self, ipAddr, port, macAddr, randomBits, testDistributor, resultWorker):
        self.STAGES = [
            self.__retrieveConfigFile,
            self.__processConfigFile,
            self.__checkForLockedBox,
            self.__initRebootPeer,
            self.__syncCode,
            self.__executeTest,
            self.__archiveLogFiles,
            self.__retrieveLogFiles,
            self.__defineResults,
            self.__reportResults,
            self.__reaction,
            self.__gracePeriod,
            self.__postRebootPeer,
        ]

        self.recoverIndex = self.STAGES.index(self.__archiveLogFiles)
        self.graceIndex = self.STAGES.index(self.__gracePeriod)
        self.STAG_LEN = len(self.STAGES)
    
        self.testDistributor = testDistributor
        self.resultWorker = resultWorker

        self.config = {}
        self.ipAddr = ipAddr
        self.macAddr = macAddr
        self.randomBits = randomBits
        self.capabilities = ''
        self.gracePeriod = self.recoveries = 0
        self.timeJoined = time.time()
        self.envServer = ''
        self.envServerPort = 0

        execConf = ConfigurationManager().getConfiguration('execution').configuration
        resConf = ConfigurationManager().getConfiguration('resultWorker').configuration

        self.customLogFilters = resConf.customLogFilters.customLogFilter \
            if hasattr(resConf.customLogFilters, 'customLogFilter') else []
        self.customLogFilters = self.customLogFilters  \
            if isinstance(self.customLogFilters, list) else [self.customLogFilters]

        self.logger = LogManager().getLogger('Peer-%s' % self.macAddr)
        self.hostIP = getIPAddressByInterface()
        self.masterHTTPLoc = 'http://%s:%s/' % (self.hostIP, port)
        self.httpLoc = 'http://%s:5005/' % ipAddr
        self.tmpDir = os.environ['TAS_TMP']
        self.peerDir = '%s/peer-%s' % (self.tmpDir, self.macAddr)
        self.resultLoc = '%s/results' % self.peerDir

        if not os.path.exists(self.resultLoc):
            os.makedirs(self.resultLoc)

        execution = ConfigurationManager()\
            .getConfiguration('execution').configuration
        self.initReboot = execution.rebootNewBoxes.PCDATA == 'true'
        self.postReboot = execution.rebootPerTest.PCDATA == 'true'
        self.state = PeerState.ACTIVE()
        self.__changeState(PeerState.PENDING())
        self.stage = 0
 
        self.lastHeartBeat = time.time()
        self.currentTest = self.testTimeout = None
        self.failureCodes = []
        self.processResults = []
        self.longRunningProcesses = []

        Peer.NEW_PEER_EVENT(self)
Пример #13
0
class Peer(object):
    HEARTBEAT_TIMEOUT = 300
    DEATH_TIMEOUT = 600
    TEST_TIMEOUT = 60
    TEST_GRACE = 3

    DEAD_STATES = [PeerState.UNRESPONSIVE(), PeerState.DEAD()]
    DONE_STATES = [PeerState.ACTIVE(), PeerState.DEAD()]
    REBOOT_STATES = [PeerState.REBOOTING(), PeerState.DEAD()]
    LOCKED_STATES = [PeerState.LOCKED(), PeerState.PENDING()]
    REQUIRED_CONF = ['user', 'password', 'tmpdir', 'envserver', 'envserverport', 'rebootcmd']
    
    IGNORED_PEER_EVENT = get_event_handler('ignored_peer')
    NEW_PEER_EVENT = get_event_handler('new_peer')
    PEER_STATE_CHANGE_EVENT = get_event_handler('peer_state_change')
    PEER_STAGE_CHANGE_EVENT= get_event_handler('peer_stage_change')
    TEST_RESULT_EVENT = get_event_handler('test_result')
    PEER_SLEEPING_EVENT = get_event_handler('peer_sleeping')

    @staticmethod
    def createPeer(ipAddr, port, macAddr, randomBits, testDistributor, resultWorker):
        config = ConfigurationManager().getConfiguration('routes').configuration
        routes = config.route if isinstance(config.route, list) else [config.route]

        for route in routes:
            if route.macAddr.PCDATA != macAddr: continue
            if route.enabled == 'false': continue
            return Peer(ipAddr, port, macAddr, randomBits, testDistributor, resultWorker)
        Peer.IGNORED_PEER_EVENT(ipAddr, port, macAddr, randomBits)
 
    def __init__(self, ipAddr, port, macAddr, randomBits, testDistributor, resultWorker):
        self.STAGES = [
            self.__retrieveConfigFile,
            self.__processConfigFile,
            self.__checkForLockedBox,
            self.__initRebootPeer,
            self.__syncCode,
            self.__executeTest,
            self.__archiveLogFiles,
            self.__retrieveLogFiles,
            self.__defineResults,
            self.__reportResults,
            self.__reaction,
            self.__gracePeriod,
            self.__postRebootPeer,
        ]

        self.recoverIndex = self.STAGES.index(self.__archiveLogFiles)
        self.graceIndex = self.STAGES.index(self.__gracePeriod)
        self.STAG_LEN = len(self.STAGES)
    
        self.testDistributor = testDistributor
        self.resultWorker = resultWorker

        self.config = {}
        self.ipAddr = ipAddr
        self.macAddr = macAddr
        self.randomBits = randomBits
        self.capabilities = ''
        self.gracePeriod = self.recoveries = 0
        self.timeJoined = time.time()
        self.envServer = ''
        self.envServerPort = 0

        execConf = ConfigurationManager().getConfiguration('execution').configuration
        resConf = ConfigurationManager().getConfiguration('resultWorker').configuration

        self.customLogFilters = resConf.customLogFilters.customLogFilter \
            if hasattr(resConf.customLogFilters, 'customLogFilter') else []
        self.customLogFilters = self.customLogFilters  \
            if isinstance(self.customLogFilters, list) else [self.customLogFilters]

        self.logger = LogManager().getLogger('Peer-%s' % self.macAddr)
        self.hostIP = getIPAddressByInterface()
        self.masterHTTPLoc = 'http://%s:%s/' % (self.hostIP, port)
        self.httpLoc = 'http://%s:5005/' % ipAddr
        self.tmpDir = os.environ['TAS_TMP']
        self.peerDir = '%s/peer-%s' % (self.tmpDir, self.macAddr)
        self.resultLoc = '%s/results' % self.peerDir

        if not os.path.exists(self.resultLoc):
            os.makedirs(self.resultLoc)

        execution = ConfigurationManager()\
            .getConfiguration('execution').configuration
        self.initReboot = execution.rebootNewBoxes.PCDATA == 'true'
        self.postReboot = execution.rebootPerTest.PCDATA == 'true'
        self.state = PeerState.ACTIVE()
        self.__changeState(PeerState.PENDING())
        self.stage = 0
 
        self.lastHeartBeat = time.time()
        self.currentTest = self.testTimeout = None
        self.failureCodes = []
        self.processResults = []
        self.longRunningProcesses = []

        Peer.NEW_PEER_EVENT(self)

    def __retrieveConfigFile(self):
        """ Retrieve the configuration file from the peer """
        if not os.path.exists(self.peerDir):
            os.makedirs(self.peerDir)

        self.longRunningProcesses.append(syncGetHTTPFile(
            '%sconfig' % self.httpLoc, self.peerDir))

    def __processConfigFile(self):
        """ Process the configuration file as a key/value pair """
        configLoc = '%s/config' % self.peerDir
        if not os.path.exists(configLoc):
            self.logger.warn('Could not find a configuration file')
            return self.__changeState(PeerState.DEAD())

        with open(configLoc, 'r') as lf:
            for line in lf.readlines():
                key, value = line.strip().split('=', 1)
                self.config[key.lower()] = value

        self.envServer = self.config['envserver'].strip()
        self.envServerPort = self.config['envserverport'].strip()
        if self.envServerPort.isdigit():
            self.envServerPort = int(self.envServerPort)

        if self.config['rebootcmd'] == '':
            self.postReboot = self.initReboot = False

        self.capabilities = envcatRequest(self.envServer,
            self.envServerPort, 'capabilities')

        if self.__configIsMissingKeys():
            return self.__changeState(PeerState.DEAD(), 'Invalid config')

        cmd = 'mkdir -p %(T)s; cd %(T)s; if [ ! -f locked ]; then echo "%(I)s" > locked; sleep 2; fi; ' \
              'if [ "%(I)s" != `cat locked` ]; then echo `cat locked`; else echo "UNLOCKED"; fi;' \
              % { 'T' : self.config['tmpdir'], 'I' : self.hostIP }

        self.longRunningProcesses.append(SSHClient(self.config['user'],
            self.config['password'], self.ipAddr, cmd))

    def __checkForLockedBox(self):
        """ Check for a locked box """
        output = self.processResults[0].updateOutput().strip()

        if 'UNLOCKED' in output:
            self.lastHeartBeat = time.time()
        elif not self.testDistributor.peekTest(self):
            self.stage = 0
            self.__changeState(PeerState.ACTIVE())
        else:
            self.gracePeriod = 5
            self.stage = self.graceIndex - 1
            self.__changeState(PeerState.LOCKED(), 
                'current user %s' % output)

    def __initRebootPeer(self):
        """ Pre testing reboot if required """
        if not self.initReboot: return False
        self.initReboot = False

        self.__changeState(PeerState.REBOOTING())
        self.longRunningProcesses.append(RecoveryWorker(self))

    def __syncCode(self):
        """ Sync the source code the the peer """
        self.status = self.__changeState(PeerState.SYNC_CODE())
        cmd = 'mkdir -p %(T)s; for x in `ls %(T)s | grep -vE "(config|locked|scripts.%(J)s.tar.gz)"`; do rm -rf %(T)s/$x; done; ' \
          'if [ ! -f %(T)s/scripts.%(J)s.tar.gz ]; then wget %(H)sscripts.tar.gz -O %(T)s/scripts.%(J)s.tar.gz -q; fi; ' \
          'tar -xzf %(T)s/scripts.%(J)s.tar.gz -C %(T)s' %{'T' : self.config['tmpdir'], 'J' : self.timeJoined, 'H' : self.masterHTTPLoc}

        self.longRunningProcesses.append(SSHClient(
            self.config['user'], self.config['password'],
            self.ipAddr, cmd))

    def __executeTest(self):
        """ Execute a test script on the peer """
        test = self.testDistributor.getTest(self)
        if test == None:
            self.stage = 0
            self.longRunningProcesses.append(self.__unlockBox())
            return self.__changeState(PeerState.ACTIVE())

        test.testTimeout += Peer.TEST_GRACE
        self.__changeState(PeerState.TESTING(), 'while running test %s' % test.testId)
        self.currentTest = test

        cmd = 'cd %s; bash -x %s >execution.log 2>&1' %(self.config['tmpdir'], self.currentTest.getExecutionString())
        self.longRunningProcesses.append(SSHClient(
            self.config['user'], self.config['password'],
            self.ipAddr, cmd))

        self.currentTest.startTime = time.time()

    def __archiveLogFiles(self):
        """ Archive logs and cleanup on the peer """
        endTime = time.time()
        startTime = self.currentTest.startTime
        if len(self.processResults) > 0:
            endTime = self.processResults[0].endTime.value
            startTime = self.processResults[0].startTime.value
        self.currentTest.duration = endTime - startTime
        self.__changeState(PeerState.SYNC_LOGS())
        
        cmd =''
        for customLogFilter in self.customLogFilters:
            cmd +='cp -r %s %s/logs; ' % (customLogFilter.PCDATA, self.config['tmpdir'])
        cmd += 'cd %s/logs; tar -czf ../logs.tar.gz *;' % self.config['tmpdir']

        self.longRunningProcesses.append(SSHClient(
            self.config['user'], self.config['password'],
            self.ipAddr, cmd))

    def __retrieveLogFiles(self):
        """ Retrieve the log files from the peer """
        self.lastResultLoc = "%s-%s-%s" %(self.resultLoc, time.time(), self.currentTest.testId)
        os.makedirs(self.lastResultLoc)

        self.longRunningProcesses.append(syncGetHTTPFile(
            '%slogs.tar.gz' % self.httpLoc, self.lastResultLoc, True))
        self.longRunningProcesses.append(EnvCat(self.envServer, self.envServerPort,
            '%s/envlog.txt' % self.lastResultLoc, 'getlogs'))

    def __defineResults(self):
        """ Define the result for the test result """
        self.lastResultFiles = [ os.path.join(self.lastResultLoc, fileName)
            for fileName in os.listdir(self.lastResultLoc) ]

        self.longRunningProcesses.append(ResultDefiner(
            self.lastResultFiles, self.currentTest))

    def __reportResults(self):
        """ From the log files define a result """
        testState, errorMsg = self.processResults[0].getResult()
        self.currentTest.state = testState
        self.currentTest.error = errorMsg
        Peer.TEST_RESULT_EVENT(self.currentTest, self.lastResultFiles, self)
        #TODO: replace me, self.resultWorker.report(self.currentTest, self.lastResultFiles, self)

        self.longRunningProcesses.append(
            ReactionDefiner(self.lastResultFiles, self.currentTest))

    def __reaction(self):
        """ React appon the current test results """
        peerState, gracePeriod = self.processResults[0].getResult()
        self.__changeState(peerState)
        self.gracePeriod = gracePeriod
        self.currentTest = None

    def __gracePeriod(self):
        """ Allow for a grace period """
        if self.gracePeriod == 0: return False
        Peer.PEER_SLEEPING_EVENT(self)
        self.longRunningProcesses.append(Popen(
            'sleep %d' % self.gracePeriod, shell = True))
        self.gracePeriod = 0

    def __postRebootPeer(self):
        """ Pre testing reboot if required """
        if not self.postReboot or self.state in Peer.LOCKED_STATES:
            return False

        self.initReboot = False
        self.__changeState(PeerState.REBOOTING())
        self.longRunningProcesses.append(RecoveryWorker(
            self, removeLock = True))

    def __changeState(self, state, comment = None):
        """ Change the peer state and report """
        if self.state == state or not state:
            return

        self.state = state
        Peer.PEER_STATE_CHANGE_EVENT(self, comment)

    def __hasDied(self, heartBeatFelt):
        """
        States if the peer hasn't been sending heartbeats.
        The state will be set to either DEAD or UNRESPONSIVE
        depending on how much time has passed.
        @return True if heartbeat timeout has not occured else false
        """
        now = time.time()
        timePassed = now - self.lastHeartBeat

        if timePassed > Peer.DEATH_TIMEOUT \
        and self.state != PeerState.DEAD():
            self.__changeState(PeerState.DEAD())

        elif timePassed > Peer.HEARTBEAT_TIMEOUT \
        and self.state not in Peer.DEAD_STATES:
            self.__changeState(PeerState.UNRESPONSIVE(), 'within stage %d' % self.stage)

        else:
            if heartBeatFelt: self.lastHeartBeat = time.time()
            return False

        return True

    def checkState(self, heartBeatFelt):
        """
        Check the overall state
        @param heartBeat: True if a heart was felt else False
        @return: Result, State. Either can be None if they have not changed
        """
        self.failureCodes = [ p for p
            in self.longRunningProcesses
            if self.__getExitCode(p) not in [None, 0]
            and not isinstance(p, RecoveryWorker) ] \
        + self.failureCodes

        self.processResults = [ p for p
            in self.longRunningProcesses
            if not self.__isAlive(p) 
            and not isinstance(p, RecoveryWorker) ] \
        + self.processResults

        self.longRunningProcesses = [ p for p
            in self.longRunningProcesses
            if self.__isAlive(p) ]

        # Box doesn't belong to us yet. Try again
        if len(self.failureCodes) > 0 and self.state == PeerState.PENDING():
            self.gracePeriod = 5
            self.stage = self.graceIndex
            self.__clearProcessHistory()

        if self.state not in Peer.LOCKED_STATES:
            hasDied = self.__hasDied(heartBeatFelt)
            if len(self.longRunningProcesses) > 0: return

            if len(self.failureCodes) > 0 or hasDied:
                hasConfig = not any(x not in self.config.keys() for x in Peer.REQUIRED_CONF)
                if self.state not in Peer.DEAD_STATES:
                    objNames = ','.join([ x.__class__.__name__ for x in self.failureCodes])
                    funcName = self.STAGES[self.stage].im_func.func_name
                    self.__changeState(PeerState.UNRESPONSIVE(), 'on stage %s, %s' % (funcName, objNames))
                if hasConfig and self.recoveries < 5:
                    self.longRunningProcesses.append(RecoveryWorker(self))
                    self.recoveries += 1
                return

            if self.state == PeerState.TESTING() \
            and time.time() - self.currentTest.startTime > self.currentTest.testTimeout \
            and self.currentTest.state != TestState.TIMEOUT():
                self.currentTest.state = TestState.TIMEOUT()
                for process in self.longRunningProcesses + self.processResults:
                    if hasattr(process, 'shutdown'): process.shutdown()

        if len(self.longRunningProcesses) > 0 or \
        self.state in Peer.REBOOT_STATES + Peer.DONE_STATES: 
            return # Still running processes # TODO: timeout?

        if self.STAGES[self.stage]() == False:
           # Go to the next stage if asked to skip
           self.__incStage()
           self.checkState(heartBeatFelt)
        else:
            self.recoveries = 0
            self.__incStage()
            self.__clearProcessHistory()

    def __incStage(self):
        """ Increment the current stage """
        if self.state == PeerState.ACTIVE(): return
        self.stage = (self.stage + 1) * (self.stage + 1 < self.STAG_LEN)
        Peer.PEER_STAGE_CHANGE_EVENT(self)

    def __getExitCode(self, process):
        """
        Returns the exit code from a process
        @param process: Popen or Process object
        @return: exit code of process
        """
        if isinstance(process, Popen):
            return process.poll()
        return process.exitcode

    def __isAlive(self, process):
        """ 
        Returns true if the process or Popen is
        still executing code else false
        @param process: Process or Popen
        @return True if executing else false
        """
        if isinstance(process, Popen):
            return process.poll() == None
        return process.is_alive()
 
    def __killAllProcesses(self):
        """ Kill all running processes """
        for process in self.longRunningProcesses:
            try: process.terminate()
            except OSError: pass

        self.longRunningProcesses = []
        self.__clearProcessHistory()       

    def __clearProcessHistory(self):
        """ Clear all process history """
        processes = self.failureCodes + self.processResults
        for process in processes:
            if not hasattr(process, 'cleanup'):
                continue
            process.cleanup()

        self.failureCodes = []
        self.processResults = []

    def checkIP(self, ipAddr, randomBits):
        """
        Check if the IP address has changed
        @param ipAddr: New IP address
        """
        # IP address has changed
        if ipAddr != self.ipAddr:
            self.logger.warn('IP address for peer %s has changed to %s from %s!'
                %(self.macAddr, ipAddr, self.ipAddr))
            # TODO: restart SSH ? What does this mean? Recovery?

        # Box rebooted randomly
        elif randomBits != self.randomBits and self.currentTest:
            self.logger.warn('Peer (%s,%s) has rebooted! Random bits have changed: %s - %s'
                %(self.ipAddr, self.macAddr, self.randomBits, randomBits))

            self.__changeState(PeerState.RECOVER_LOGS())
            self.__killAllProcesses()
            self.stage = self.recoverIndex
            self.currentTest.state = TestState.CRASH()
            self.lastHeartBeat = time.time()
 
        # Expected and controlled rebooted
        if self.randomBits != randomBits and self.state in Peer.REBOOT_STATES:
            self.__changeState(PeerState.PENDING())

        self.ipAddr = ipAddr
        self.randomBits = randomBits

    def __configIsMissingKeys(self):
        """ Configuration is missing keys """
        return any(k not in self.config.keys() for k in Peer.REQUIRED_CONF)

    def __unlockBox(self):
        """ Remove the lock from the box and reboot """
        if self.__configIsMissingKeys(): return

        return SSHClient(self.config['user'], self.config['password'],
          self.ipAddr, 'if [ "`cat %(T)s/locked`" == "%(H)s" ]; then touch %(T)s/unlock; %(R)s; fi'
          % { 'H': self.hostIP, 'T': self.config['tmpdir'], 'R': self.config['rebootcmd'] },
          timeout = 2)

    def shutdown(self):
        """ Shutdown all operations that are related to this peer """
        self.__killAllProcesses()
        if self.currentTest and self.state == PeerState.DEAD(): 
            self.currentTest.state = TestState.DEADBOX()
            self.resultWorker.report(self.currentTest, [], self)
            self.currentTest = None

        processes = [ EnvCat(self.envServer,
            self.envServerPort, '/dev/null', 'play') ]

        if self.state not in Peer.LOCKED_STATES:
            processes.append(self.__unlockBox())
        return processes

    def isDone(self):
        """
        State if the peer has finished all required work
        @return: True if all work has been completed else false
        """
        return self.state in Peer.DONE_STATES