Exemplo n.º 1
0
 def test_temporarysshkey_create(self):
     """
     Verify create of TemporarySSHKey creates a new key.
     """
     key = TemporarySSHKey(TEST_HOST, logging.getLogger())
     key.create()
     self.assertTrue(os.path.isfile(key.path))
     os.unlink(key.path)
Exemplo n.º 2
0
 def test_temporarysshkey_create(self):
     """
     Verify create of TemporarySSHKey creates a new key.
     """
     key = TemporarySSHKey(TEST_HOST, logging.getLogger())
     key.create()
     self.assertTrue(os.path.isfile(key.path))
     os.unlink(key.path)
Exemplo n.º 3
0
 def test_temporarysshkey_remove(self):
     """
     Verify TemporarySSHKey.remove successfully removes keys.
     """
     key = TemporarySSHKey(TEST_HOST, logging.getLogger())
     key.create()
     self.assertTrue(os.path.isfile(key.path))
     key.remove()
     self.assertFalse(os.path.isfile(key.path))
Exemplo n.º 4
0
 def test_temporarysshkey_contextmanager(self):
     """
     Verify TemporarySSHKey can be used as a context manager.
     """
     with TemporarySSHKey(TEST_HOST, logging.getLogger()) as key:
         self.assertTrue(os.path.isfile(key.path))
     self.assertFalse(os.path.isfile(key.path))
Exemplo n.º 5
0
 def test_temporarysshkey__init(self):
     """
     Verify init of TemporarySSHKey sets up the instances.
     """
     key = TemporarySSHKey(TEST_HOST, logging.getLogger())
     # There should be no path yet
     self.assertEquals(None, key.path)
Exemplo n.º 6
0
    def _check(self, address):
        """
        Initiates an check on the requested host.

        :param address: Host address to investigate
        :type address: str
        :param cluster_data: Optional data for the associated cluster
        :type cluster_data: dict
        """
        # Statuses follow:
        # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses

        self.logger.info('Checking host "{}".'.format(address))
        try:
            response = self.request('storage.get',
                                    params={
                                        'model_type_name':
                                        'Host',
                                        'model_json_data':
                                        Host.new(address=address).to_json(),
                                        'secure':
                                        True,
                                    })
            host = Host.new(**response['result'])
        except Exception as error:
            self.logger.warn('Unable to continue for host "{}" due to '
                             '{}: {}. Returning...'.format(
                                 address, type(error), error))
            raise error

        transport = ansibleapi.Transport(host.remote_user)

        with TemporarySSHKey(host, self.logger) as key:
            try:
                self.logger.debug(
                    'Starting watcher run for host "{}"'.format(address))
                result = transport.check_host_availability(host, key.path)
                host.last_check = datetime.utcnow().isoformat()
                self.logger.debug('Watcher result for host {}: {}'.format(
                    address, result))
            except Exception as error:
                self.logger.warn(
                    'Failed to connect to host node "{}"'.format(address))
                self.logger.debug(
                    'Watcher failed for host node "{}" with {}: {}'.format(
                        address, str(error), error))
                host.status = 'failed'
                raise error
            finally:
                # Save the model
                self.request('storage.save',
                             params={
                                 'model_type_name': host.__class__.__name__,
                                 'model_json_data': host.to_json(),
                             })
            self.logger.info(
                'Finished watcher run for host "{}"'.format(address))
Exemplo n.º 7
0
 def test_temporarysshkey_remove_failure(self):
     """
     Verify TemporarySSHKey.remove reacts properly to failure.
     """
     mock_logger = mock.MagicMock(logging.Logger('test'))
     key = TemporarySSHKey(TEST_HOST, mock_logger)
     key.create()
     with mock.patch('os.unlink') as _unlink:
         _unlink.side_effect = Exception
         self.assertTrue(os.path.isfile(key.path))
         key.remove()
         self.assertTrue(os.path.isfile(key.path))
         # We should have a warning in the log
         mock_logger.warn.assert_called_once()
     # Clean up the file
     key.remove()
Exemplo n.º 8
0
 def test_temporarysshkey_remove(self):
     """
     Verify TemporarySSHKey.remove successfully removes keys.
     """
     key = TemporarySSHKey(TEST_HOST, logging.getLogger())
     key.create()
     self.assertTrue(os.path.isfile(key.path))
     key.remove()
     self.assertFalse(os.path.isfile(key.path))
Exemplo n.º 9
0
 def test_temporarysshkey_remove_failure(self):
     """
     Verify TemporarySSHKey.remove reacts properly to failure.
     """
     mock_logger = mock.MagicMock(logging.Logger('test'))
     key = TemporarySSHKey(TEST_HOST, mock_logger)
     key.create()
     with mock.patch('os.unlink') as _unlink:
         _unlink.side_effect = Exception
         self.assertTrue(os.path.isfile(key.path))
         key.remove()
         self.assertTrue(os.path.isfile(key.path))
         # We should have a warning in the log
         mock_logger.warn.assert_called_once_with(mock.ANY)
     # Clean up the file
     key.remove()
Exemplo n.º 10
0
    def _check(self, address):
        """
        Initiates an check on the requested host.

        :param address: Host address to investigate
        :type address: str
        :param cluster_data: Optional data for the associated cluster
        :type cluster_data: dict
        """
        # Statuses follow:
        # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses

        self.logger.info('Checking host "{}".'.format(address))

        host = self.storage.get_host(address)
        host_creds = self.storage.get(HostCreds.new(address=host.address))

        transport = ansibleapi.Transport(host_creds.remote_user)

        with TemporarySSHKey(host_creds, self.logger) as key:
            try:
                self.logger.debug(
                    'Starting watcher run for host "{}"'.format(address))
                result = transport.check_host_availability(host, key.path)
                host.last_check = formatted_dt()
                self.logger.debug('Watcher result for host {}: {}'.format(
                    address, result))
            except Exception as error:
                self.logger.warn(
                    'Failed to connect to host node "{}"'.format(address))
                self.logger.debug(
                    'Watcher failed for host node "{}" with {}: {}'.format(
                        address, str(error), error))
                host.status = C.HOST_STATUS_FAILED
                raise error
            finally:
                # Save the model
                self.storage.save(host)
            self.logger.info(
                'Finished watcher run for host "{}"'.format(address))
Exemplo n.º 11
0
    def on_investigate(self, message, address, cluster_data={}):
        """
        Initiates an investigation of the requested host.

        :param message: A message instance
        :type message: kombu.message.Message
        :param address: Host address to investigate
        :type address: str
        :param cluster_data: Optional data for the associated cluster
        :type cluster_data: dict
        """
        # Statuses follow:
        # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses

        self.logger.info('{} is now in investigating.'.format(address))
        self.logger.debug('Investigating: {}'.format(address))
        if cluster_data:
            self.logger.debug('Related cluster: {}'.format(cluster_data))

        host = self.storage.get_host(address)
        host_creds = self.storage.get(HostCreds.new(address=host.address))
        transport = ansibleapi.Transport(host.remote_user)

        key = TemporarySSHKey(host_creds, self.logger)
        try:
            key.create()
        except Exception as error:
            self.logger.warn('Unable to continue for {} due to '
                             '{}: {}. Returning...'.format(
                                 address, type(error), error))
            raise error

        try:
            facts = transport.get_info(address, key.path)
            # recreate the host instance with new data
            data = json.loads(host.to_json())
            data.update(facts)
            host = Host.new(**data)
            host.last_check = formatted_dt()
            host.status = C.HOST_STATUS_BOOTSTRAPPING
            self.logger.info('Facts for {} retrieved'.format(address))
            self.logger.debug('Data: {}'.format(host.to_json()))
        except Exception as error:
            self.logger.warn('Getting info failed for {}: {}'.format(
                address, str(error)))
            host.status = C.HOST_STATUS_FAILED
            key.remove()
            raise error
        finally:
            # Save the updated host model.
            self.storage.save(host)

        self.logger.info(
            'Finished and stored investigation data for {}'.format(address))
        self.logger.debug('Finished investigation update for {}: {}'.format(
            address, host.to_json()))

        self.logger.info('{} is now in bootstrapping'.format(address))
        oscmd = get_oscmd(host.os)
        try:
            etcd_config = self._get_etcd_config()
            cluster, network = self._get_cluster_and_network_models(
                cluster_data)

            container_manager = None
            if cluster:
                if cluster.container_manager:
                    container_manager = cluster.container_manager
                    self.logger.info(
                        'Using cluster "{}" managed by "{}"'.format(
                            cluster.name, container_manager))
                else:
                    self.logger.info('Using unmanaged cluster "{}"'.format(
                        cluster.name))

            self.logger.info('Using network "{}" of type "{}"'.format(
                network.name, network.type))
            transport.bootstrap(address, key.path, oscmd, etcd_config, network)
            host.status = C.HOST_STATUS_DISASSOCIATED
        except Exception as error:
            self.logger.warn('Unable to start bootstraping for {}: {}'.format(
                address, str(error)))
            host.status = C.HOST_STATUS_FAILED
            key.remove()
            raise error
        finally:
            # Save the updated host model.
            self.storage.save(host)

        # Register with container manager (if applicable).
        try:
            if container_manager:
                self.request('container.register_node', container_manager,
                             address)
                host.status = C.HOST_STATUS_ACTIVE
        except Exception as error:
            self.logger.warn(
                'Unable to register {} to container manager "{}": {}'.format(
                    address, container_manager, error.args[0]))
            key.remove()
            raise error
        finally:
            # Save the updated host model.
            self.storage.save(host)

        self.logger.info('Finished bootstrapping for {}'.format(address))
        self.logger.debug('Finished bootstrapping for {}: {}'.format(
            address, host.to_json()))

        # XXX TEMPORARILY DISABLED
        # WATCHER_QUEUE.put_nowait((host, datetime.datetime.utcnow()))

        key.remove()

        return host.to_json()
Exemplo n.º 12
0
def watcher(queue, store_manager, run_once=False):
    """
    Attempts to connect and check hosts for status.

    :param queue: Queue to pull work from.
    :type queue: Queue.Queue
    :param store_manager: Proxy object for remtote stores
    :type store_manager: commissaire.store.StoreHandlerManager
    :param run_once: If only one run should occur.
    :type run_once: bool
    """
    logger = logging.getLogger('watcher')
    logger.info('Watcher started')
    # TODO: should be configurable
    delta = datetime.timedelta(seconds=20)
    # TODO: should be configurable
    throttle = 60  # 1 minute

    # If the queue is empty attempt to populated it with known hosts
    if queue.qsize() == 0:
        logger.info('The WATCHER_QUEUE is empty. '
                    'Attempting to populate it from the store.')
        try:
            hosts = store_manager.list(Hosts(hosts=[]))
            for host in hosts.hosts:
                last_check = datetime.datetime.strptime(
                    host.last_check, "%Y-%m-%dT%H:%M:%S.%f")
                queue.put_nowait((host, last_check))
                logger.debug('Inserted {0} into WATCHER_QUEUE'.format(
                    host.address))
        except:
            logger.info('No hosts found in the store.')

    while True:
        try:
            host, last_run = queue.get_nowait()
        except Empty:
            time.sleep(throttle)
            continue

        logger.debug('Retrieved {0} from queue. Last check was {1}'.format(
            host.address, last_run))
        now = datetime.datetime.utcnow()
        if last_run > now - delta:
            logger.debug('{0} not ready to check. {1}'.format(
                host.address, last_run))
            # Requeue the host with the same last_run
            queue.put_nowait((host, last_run))
        else:
            logger.info('Checking {0} for availability'.format(
                host.address))
            transport = ansibleapi.Transport(host.remote_user)
            with TemporarySSHKey(host, logger) as key:
                results = transport.check_host_availability(host, key.path)
                host.last_check = now.isoformat()
                if results[0] == 0:  # This means the host is available
                    # Only flip the bit on failed only
                    if host.status == 'failed':
                        try:
                            cluster_type = util.cluster_for_host(
                                host.address, store_manager).type
                        except Exception:
                            logger.debug(
                                '{0} has no cluster type. Assuming {1}'.format(
                                    host.address, C.CLUSTER_TYPE_HOST))
                            cluster_type = C.CLUSTER_TYPE_HOST
                        # If the type is CLUSTER_TYPE_HOST then it should be
                        if cluster_type == C.CLUSTER_TYPE_HOST:
                            host.status = 'disassociated'
                        else:
                            host.status = 'active'
                else:
                    # If we can not access the host at all throw it to failed
                    host.status = 'failed'
                host.last_check = now.isoformat()
                host = store_manager.save(host)
                # Requeue the host
                queue.put_nowait((host, now))
                logger.debug('{0} has been requeued for next check run'.format(
                    host.address))

        if run_once:
            logger.info('Exiting watcher due to run_once request.')
            break

        logger.debug('Sleeping for {0} seconds.'.format(throttle))
        time.sleep(throttle)

    logger.info('Watcher stopping')
Exemplo n.º 13
0
    def _execute(self, message, model_instance, command_args,
                 finished_hosts_key):
        """
        Remotely executes OS-specific shell commands across a cluster.

        :param message: A message instance
        :type message: kombu.message.Message
        :param model_instance: Initial model for the async operation
        :type model_instance: commissaire.models.Model
        :param command_args: Command name + arguments as a tuple
        :type command_args: tuple
        :param finished_hosts_key: Model attribute name for finished hosts
        :type finished_hosts_key: str
        """
        # Split out the command name.
        command_name = command_args[0]
        command_args = command_args[1:]

        end_status = 'finished'

        # XXX We assume the model instance names a cluster.
        #     Note, cluster_name is used in the except clause,
        #     so it must be reliably defined.
        cluster_name = getattr(model_instance, 'name', None)

        try:
            assert cluster_name is not None
            model_json_data = model_instance.to_dict()

            # Set the initial status in the store.
            self.logger.info('Setting initial status.')
            self.logger.debug('Status={}'.format(model_json_data))
            self.storage.save(model_instance)

            # Respond to the caller with the initial status.
            if message.properties.get('reply_to'):
                # XXX Have to dig up the message ID again.
                #     CommissaireService.on_message() already
                #     does this, but doesn't pass it to us.
                body = message.body
                if isinstance(body, bytes):
                    body = json.loads(body.decode())
                self.respond(message.properties['reply_to'],
                             body.get('id', -1), model_json_data)
        except Exception as error:
            self.logger.error(
                'Unable to save initial state for "{}" clusterexec due to '
                '{}: {}'.format(cluster_name, type(error), error))
            raise error

        # Collect all host addresses in the cluster.

        cluster = self.storage.get_cluster(cluster_name)

        n_hosts = len(cluster.hostset)
        if n_hosts:
            self.logger.debug('{} hosts in cluster "{}"'.format(
                n_hosts, cluster_name))
        else:
            self.logger.warn('No hosts in cluster "{}"'.format(cluster_name))

        for address in cluster.hostset:
            host = self.storage.get_host(address)

            oscmd = get_oscmd(host.os)

            # os_command is only used for logging
            os_command = getattr(oscmd, command_name)(*command_args)
            self.logger.info('Executing {} on {}...'.format(
                os_command, host.address))

            model_instance.in_process.append(host.address)
            self.storage.save(model_instance)

            with TemporarySSHKey(host, self.logger) as key:
                try:
                    transport = ansibleapi.Transport(host.remote_user)
                    method = getattr(transport, command_name)
                    method(host.address, key.path, oscmd, command_args)
                except Exception as error:
                    # If there was a failure, set the end_status and break.
                    end_status = C.HOST_STATUS_FAILED
                    self.logger.error(
                        'Clusterexec {} for {} failed: {}: {}'.format(
                            command_name, host.address, type(error), error))
                    break

            # Set the finished hosts.
            finished_hosts = getattr(model_instance, finished_hosts_key)
            finished_hosts.append(host.address)
            try:
                index = model_instance.in_process.index(host.address)
                model_instance.in_process.pop(index)
            except ValueError:
                self.logger.warn('Host {} was not in_process for {} {}'.format(
                    host.address, command_name, cluster_name))
            self.storage.save(model_instance)

            self.logger.info('Finished executing {} for {} in {}'.format(
                command_name, host.address, cluster_name))

        # Final set of command result.

        model_instance.finished_at = formatted_dt()
        model_instance.status = end_status

        self.logger.info('Cluster {} final {} status: {}'.format(
            cluster_name, command_name, model_instance.to_json()))

        self.storage.save(model_instance)
Exemplo n.º 14
0
    def on_investigate(self, message, address, cluster_data={}):
        """
        Initiates an investigation of the requested host.

        :param message: A message instance
        :type message: kombu.message.Message
        :param address: Host address to investigate
        :type address: str
        :param cluster_data: Optional data for the associated cluster
        :type cluster_data: dict
        """
        # Statuses follow:
        # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses

        self.logger.info('{0} is now in investigating.'.format(address))
        self.logger.debug('Investigating: {0}'.format(address))
        if cluster_data:
            self.logger.debug('Related cluster: {0}'.format(cluster_data))

        try:
            params = {
                'model_type_name': 'Host',
                'model_json_data': Host.new(address=address).to_json(),
                'secure': True
            }
            response = self.request('storage.get', params=params)
            host = Host.new(**response['result'])
        except Exception as error:
            self.logger.warn(
                'Unable to continue for {0} due to '
                '{1}: {2}. Returning...'.format(address, type(error), error))
            raise error

        transport = ansibleapi.Transport(host.remote_user)

        key = TemporarySSHKey(host, self.logger)
        try:
            key.create()
        except Exception as error:
            self.logger.warn(
                'Unable to continue for {0} due to '
                '{1}: {2}. Returning...'.format(address, type(error), error))
            raise error

        try:
            facts = transport.get_info(address, key.path)
            # recreate the host instance with new data
            data = json.loads(host.to_json(secure=True))
            data.update(facts)
            host = Host.new(**data)
            host.last_check = datetime.datetime.utcnow().isoformat()
            host.status = 'bootstrapping'
            self.logger.info('Facts for {0} retrieved'.format(address))
            self.logger.debug('Data: {0}'.format(host.to_json()))
        except Exception as error:
            self.logger.warn('Getting info failed for {0}: {1}'.format(
                address, str(error)))
            host.status = 'failed'
            key.remove()
            raise error
        finally:
            # Save the updated host model.
            params = {
                'model_type_name': host.__class__.__name__,
                'model_json_data': host.to_json()
            }
            self.request('storage.save', params=params)

        self.logger.info(
            'Finished and stored investigation data for {0}'.format(address))
        self.logger.debug(
            'Finished investigation update for {0}: {1}'.format(
                address, host.to_json()))

        self.logger.info('{0} is now in bootstrapping'.format(address))
        oscmd = get_oscmd(host.os)
        try:
            etcd_config = self._get_etcd_config()
            cluster, network = self._get_cluster_and_network_models(
                cluster_data)
            self.logger.info(
                'Using cluster "{0}" of type "{1}"'.format(
                    cluster.name, cluster.type))
            self.logger.info(
                'Using network "{0}" of type "{1}"'.format(
                    network.name, network.type))
            transport.bootstrap(
                address, key.path, oscmd, etcd_config, cluster, network)
            host.status = 'inactive'
        except Exception as error:
            self.logger.warn(
                'Unable to start bootstraping for {0}: {1}'.format(
                    address, str(error)))
            host.status = 'disassociated'
            key.remove()
            raise error
        finally:
            # Save the updated host model.
            params = {
                'model_type_name': host.__class__.__name__,
                'model_json_data': host.to_json()
            }
            self.request('storage.save', params=params)

        # Verify association with relevant container managers
        params = {
            'cluster_type': cluster.type,
            'address': address
        }
        response = self.request('storage.node_registered', params=params)
        if response['result']:
            host.status = 'active'

        self.logger.info(
            'Finished bootstrapping for {0}'.format(address))
        self.logger.debug('Finished bootstrapping for {0}: {1}'.format(
            address, host.to_json()))

        # XXX TEMPORARILY DISABLED
        # WATCHER_QUEUE.put_nowait((host, datetime.datetime.utcnow()))

        key.remove()

        return host.to_json()
Exemplo n.º 15
0
def investigator(queue, config, run_once=False):
    """
    Investigates new hosts to retrieve and store facts.

    :param queue: Queue to pull work from.
    :type queue: Queue.Queue
    :param config: Configuration information.
    :type config: commissaire.config.Config
    """
    logger = logging.getLogger('investigator')
    logger.info('Investigator started')

    while True:
        # Statuses follow:
        # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses
        store_manager, to_investigate, ssh_priv_key, remote_user = queue.get()
        address = to_investigate['address']
        logger.info('{0} is now in investigating.'.format(address))
        logger.debug(
            'Investigation details: key={0}, data={1}, remote_user={2}'.format(
                to_investigate, ssh_priv_key, remote_user))

        transport = ansibleapi.Transport(remote_user)

        try:
            host = store_manager.get(
                Host(
                    address=address,
                    status='',
                    os='',
                    cpus=0,
                    memory=0,
                    space=0,
                    last_check='',
                    ssh_priv_key='',
                    remote_user=''))
            key = TemporarySSHKey(host, logger)
            key.create()
        except Exception as error:
            logger.warn(
                'Unable to continue for {0} due to '
                '{1}: {2}. Returning...'.format(address, type(error), error))
            key.remove()
            continue

        try:
            result, facts = transport.get_info(address, key.path)
            # recreate the host instance with new data
            data = json.loads(host.to_json(secure=True))
            data.update(facts)
            host = Host(**data)
            host.last_check = datetime.datetime.utcnow().isoformat()
            host.status = 'bootstrapping'
            logger.info('Facts for {0} retrieved'.format(address))
            logger.debug('Data: {0}'.format(host.to_json()))
        except:
            exc_type, exc_msg, tb = sys.exc_info()
            logger.warn('Getting info failed for {0}: {1}'.format(
                address, exc_msg))
            host.status = 'failed'
            store_manager.save(host)
            key.remove()
            if run_once:
                break
            continue

        store_manager.save(host)
        logger.info(
            'Finished and stored investigation data for {0}'.format(address))
        logger.debug('Finished investigation update for {0}: {1}'.format(
            address, host.to_json()))

        logger.info('{0} is now in bootstrapping'.format(address))
        oscmd = get_oscmd(host.os)
        try:
            result, facts = transport.bootstrap(
                address, key.path, config, oscmd, store_manager)
            host.status = 'inactive'
            store_manager.save(host)
        except:
            exc_type, exc_msg, tb = sys.exc_info()
            logger.warn('Unable to start bootstraping for {0}: {1}'.format(
                address, exc_msg))
            host.status = 'disassociated'
            store_manager.save(host)
            key.remove()
            if run_once:
                break
            continue

        host.status = cluster_type = C.CLUSTER_TYPE_HOST
        try:
            cluster = util.cluster_for_host(address, store_manager)
            cluster_type = cluster.type
        except KeyError:
            # Not part of a cluster
            pass

        # Verify association with the container manager
        if cluster_type == C.CLUSTER_TYPE_KUBERNETES:
            try:
                container_mgr = KubeContainerManager(config)
                # Try 3 times waiting 5 seconds each time before giving up
                for cnt in range(0, 3):
                    if container_mgr.node_registered(address):
                        logger.info(
                            '{0} has been registered with the '
                            'container manager.'.format(address))
                        host.status = 'active'
                        break
                    if cnt == 3:
                        msg = 'Could not register with the container manager'
                        logger.warn(msg)
                        raise Exception(msg)
                    logger.debug(
                        '{0} has not been registered with the container '
                        ' manager. Checking again in 5 seconds...'.format(
                            address))
                    sleep(5)
            except:
                _, exc_msg, _ = sys.exc_info()
                logger.warn(
                    'Unable to finish bootstrap for {0} while associating '
                    'with the container manager: {1}'.format(
                        address, exc_msg))
                host.status = 'inactive'

        store_manager.save(host)
        logger.info(
            'Finished bootstrapping for {0}'.format(address))
        logging.debug('Finished bootstrapping for {0}: {1}'.format(
            address, host.to_json()))

        key.remove()
        if run_once:
            logger.info('Exiting due to run_once request.')
            break

    logger.info('Investigator stopping')
Exemplo n.º 16
0
def clusterexec(store_manager, cluster_name, command, kwargs={}):
    """
    Remote executes a shell commands across a cluster.

    :param store_manager: Proxy object for remtote stores
    :type store_manager: commissaire.store.StoreHandlerManager
    :param cluster_name: Name of the cluster to act on
    :type cluster_name: str
    :param command: Top-level command to execute
    :type command: str
    :param kwargs: Keyword arguments for the command
    :type kwargs: dict
    """
    logger = logging.getLogger('clusterexec')

    # TODO: This is a hack and should really be done elsewhere
    command_args = ()
    if command == 'upgrade':
        finished_hosts_key = 'upgraded'
        model_instance = ClusterUpgrade.new(
            name=cluster_name,
            status='in_process',
            started_at=datetime.datetime.utcnow().isoformat(),
            upgraded=[],
            in_process=[],
        )
    elif command == 'restart':
        finished_hosts_key = 'restarted'
        model_instance = ClusterRestart.new(
            name=cluster_name,
            status='in_process',
            started_at=datetime.datetime.utcnow().isoformat(),
            restarted=[],
            in_process=[],
        )
    elif command == 'deploy':
        finished_hosts_key = 'deployed'
        version = kwargs.get('version', '')
        command_args = (version,)
        model_instance = ClusterDeploy.new(
            name=cluster_name,
            status='in_process',
            started_at=datetime.datetime.utcnow().isoformat(),
            version=version,
            deployed=[],
            in_process=[],
        )

    end_status = 'finished'

    try:
        # Set the initial status in the store
        logger.info('Setting initial status.')
        logger.debug('Status={0}'.format(model_instance.to_json()))
        store_manager.save(model_instance)
    except Exception as error:
        logger.error(
            'Unable to save initial state for "{0}" clusterexec due to '
            '{1}: {2}'.format(cluster_name, type(error), error))
        return

    # Collect all host addresses in the cluster
    try:
        cluster = store_manager.get(Cluster.new(
            name=cluster_name, status='', hostset=[]))
    except Exception as error:
        logger.warn(
            'Unable to continue for cluster "{0}" due to '
            '{1}: {2}. Returning...'.format(cluster_name, type(error), error))
        return

    if cluster.hostset:
        logger.debug(
            '{0} hosts in cluster "{1}"'.format(
                len(cluster.hostset), cluster_name))
    else:
        logger.warn('No hosts in cluster "{0}"'.format(cluster_name))

    # TODO: Find better way to do this
    try:
        hosts = store_manager.list(Hosts(hosts=[]))
    except Exception as error:
        logger.warn(
            'No hosts in the cluster. Error: {0}. Exiting clusterexec'.format(
                error))
        return

    for host in hosts.hosts:
        if host.address not in cluster.hostset:
            logger.debug(
                'Skipping {0} as it is not in this cluster.'.format(
                    host.address))
            continue  # Move on to the next one
        oscmd = get_oscmd(host.os)

        # command_list is only used for logging
        command_list = getattr(oscmd, command)(*command_args)
        logger.info('Executing {0} on {1}...'.format(
            command_list, host.address))

        model_instance.in_process.append(host.address)
        try:
            store_manager.save(model_instance)
        except Exception as error:
            logger.error(
                'Unable to save in_process state for "{0}" clusterexec due to '
                '{1}: {2}'.format(cluster_name, type(error), error))
            return

        key = TemporarySSHKey(host, logger)
        key.create()

        try:
            transport = ansibleapi.Transport(host.remote_user)
            exe = getattr(transport, command)
            result, facts = exe(
                host.address, key.path, oscmd, kwargs)
        # XXX: ansibleapi explicitly raises Exception()
        except Exception as ex:
            # If there was a failure set the end_status and break out
            end_status = 'failed'
            logger.error('Clusterexec {0} for {1} failed: {2}: {3}'.format(
                command, host.address, type(ex), ex))
            break
        finally:
            try:
                key.remove()
                logger.debug('Removed temporary key file {0}'.format(key.path))
            except:
                logger.warn(
                    'Unable to remove the temporary key file: {0}'.format(
                        key.path))

        # Set the finished hosts
        new_finished_hosts = getattr(
            model_instance, finished_hosts_key) + [host.address]
        setattr(
            model_instance,
            finished_hosts_key,
            new_finished_hosts)
        try:
            idx = model_instance.in_process.index(host.address)
            model_instance.in_process.pop(idx)
        except ValueError:
            logger.warn('Host {0} was not in_process for {1} {2}'.format(
                host['address'], command, cluster_name))
        try:
            store_manager.save(model_instance)
            logger.info('Finished executing {0} for {1} in {2}'.format(
                command, host.address, cluster_name))
        except Exception as error:
            logger.error(
                'Unable to save cluster state for "{0}" clusterexec due to '
                '{1}: {2}'.format(cluster_name, type(error), error))
            return

    # Final set of command result
    model_instance.finished_at = datetime.datetime.utcnow().isoformat()
    model_instance.status = end_status

    logger.info('Cluster {0} final {1} status: {2}'.format(
        cluster_name, command, model_instance.to_json()))

    try:
        store_manager.save(model_instance)
    except Exception as error:
        logger.error(
            'Unable to save final state for "{0}" clusterexec due to '
            '{1}: {2}'.format(cluster_name, type(error), error))

    logger.info('Clusterexec stopping')