示例#1
0
 def perform_outlet_action(self, outlet, action):
     log.debug(
         "%s: Running %s -> %s on %s:%s" %
         (self.__class__.__name__, outlet, action, self.address, self.port))
     actions = {"on": "1", "off": "2", "reboot": "3"}
     # Quick and dirty
     sock = socket.create_connection((self.address, self.port))
     sock.recv(4096)
     sock.send("%s%s%sfoo\n\n" % (chr(255), chr(253), chr(5)))
     sock.recv(4096)
     sock.send("bar\n\n")
     sock.recv(4096)
     sock.send("1\n\n")
     sock.recv(4096)
     sock.send("2\n\n")
     sock.recv(4096)
     sock.send("1\n\n")
     sock.recv(4096)
     sock.send("%s\n\n" % outlet)
     sock.recv(4096)
     sock.send("1\n\n")
     sock.recv(4096)
     sock.send("%s\n\n" % actions[action])
     sock.recv(4096)
     sock.send("YES\n\n")
     sock.recv(4096)
     sock.send("\n\n")
     sock.recv(4096)
     sock.send("".join(CTRL_C))
     sock.close()
 def reboot_server(self, fqdn):
     log.debug("reboot %s" % fqdn)
     server = self.servers[fqdn]
     if not server.running:
         server.startup()
     else:
         server.shutdown(reboot=True)
 def start(self, ha_label):
     with self._lock:
         resource = self.state['resources'][ha_label]
         resource['started_on'] = resource['primary_node']
         log.debug("Starting resource %s on %s" %
                   (ha_label, resource['primary_node']))
         self.save()
         return resource
 def mount(self, nids, filesystem_name):
     log.debug("FakeClient.mount %s %s" % (nids, filesystem_name))
     # Look up NIDs to an MGT
     # Check the MGT and targets are really started
     # Add an entry to self.state['mounts']
     if not (nids, filesystem_name) in self.state['mounts']:
         self.state['mounts'].append((nids, filesystem_name))
     self.save()
 def del_client_mount(self, mountspec):
     with self._lock:
         try:
             del self.state['client_mounts'][mountspec]
             self.save()
         except KeyError:
             pass
     log.debug("Unmounted %s" % mountspec)
 def add_client_mount(self, mountspec, mountpoint):
     fsname = mountspec.split(':/')[1]
     mountpoint = "%s/%s" % (mountpoint, fsname)
     self.start_lnet()
     with self._lock:
         self.state['client_mounts'][mountspec] = mountpoint
         self.save()
     log.debug("Mounted %s on %s" % (mountspec, mountpoint))
示例#7
0
    def server_poweron_hook(self, outlet):
        """
        When an outlet has been turned on, attempts to start the associated
        server. If the server has already been started, this is a no-op.
        """
        fqdn = self.state['outlet_servers'][outlet]

        log.debug("starting %s in poweron_hook" % fqdn)
        self.start_server_fn(fqdn)
 def start_server(self, fqdn, simulate_bootup=False):
     """
     :param simulate_bootup: Whether to simulate a bootup, delays and all
     """
     log.debug("start %s" % fqdn)
     server = self.servers[fqdn]
     if server.running and not simulate_bootup:
         raise RuntimeError("Can't start %s, it is already running" % fqdn)
     server.startup(simulate_bootup)
 def inject_log_message(self, message):
     log.debug("Injecting log message %s/%s" % (self.fqdn, message))
     self._log_messages.append({
         'source': 'cluster_sim',
         'severity': 1,
         'facility': 1,
         'message': message,
         'datetime': IMLDateTime.utcnow().isoformat()
     })
示例#10
0
    def server_poweroff_hook(self, outlet):
        """
        When an outlet has been turned off, checks to see if the associated
        server has lost all powered outlets. If so, the server is stopped.
        """
        fqdn = self.state['outlet_servers'][outlet]

        if not self.server_has_power(fqdn):
            log.debug("stopping %s in poweroff_hook" % fqdn)
            self.stop_server_fn(fqdn)
示例#11
0
 def login(self):
     self.fd.write("\n\rUser Name : ")
     self.fd.flush()
     # We don't really need to do this, but it's nice to get rid of the
     # telnet control characters for logging.
     username = self._strip_controls(self.fd.readline().rstrip())
     self.fd.readline()
     log.debug("Received username: %s" % username)
     self.fd.write("\rPassword : "******"Received password: %s" % password)
     self.control_console()
    def stop_server(self, fqdn, shutdown=False, simulate_shutdown=False):
        """
        :param shutdown: Whether to treat this like a server shutdown (leave the
         HA cluster) rather than just an agent shutdown.
        :param simulate_shutdown: Whether to simulate a shutdown, delays and all
        """
        log.debug("stop %s" % fqdn)
        server = self.servers[fqdn]
        if not server.running:
            log.debug("not running")
            return

        if shutdown:
            server.shutdown(simulate_shutdown)
        else:
            server.shutdown_agent()
    def start_all(self):
        self.pre_server_start()

        # Spread out starts to avoid everyone doing sending their update
        # at the same moment

        if len(self.servers):
            delay = Session.POLL_PERIOD / float(len(self.servers))

            log.debug("Start all (%.2f dispersion)" % delay)

            for i, fqdn in enumerate(self.servers.keys()):
                self.start_server(fqdn)
                if i != len(self.servers) - 1:
                    time.sleep(delay)
        else:
            log.info("start_all: No servers yet")

        self.post_server_start()
    def join(self, nodename, **kwargs):
        with self._lock:
            if nodename in self.state['nodes']:
                self.state['nodes'][nodename]['online'] = True
            else:
                self.state['nodes'][nodename] = {
                    'online': True,
                    'nodename': nodename
                }
                self.state['nodes'][nodename].update(**kwargs)

            for ha_label, resource in self.state['resources'].items():
                if resource['started_on'] is None:
                    if resource['primary_node'] == nodename:
                        log.debug("Starting %s on primary %s" %
                                  (ha_label, nodename))
                        resource['started_on'] = nodename
                    elif resource['secondary_node'] == nodename:
                        log.debug("Starting %s on secondary %s" %
                                  (ha_label, nodename))
                        resource['started_on'] = nodename
            self.save()
    def register(self, fqdn, secret):
        try:
            log.debug("register %s" % fqdn)
            server = self.servers[fqdn]

            if server.agent_is_running:
                # e.g. if the server was added then force-removed then re-added
                server.shutdown_agent()

            if not server.is_worker and not self.power.server_has_power(fqdn):
                raise RuntimeError(
                    "Not registering %s, none of its PSUs are powered" % fqdn)

            client = AgentClient(url=self.url + "register/%s/" % secret,
                                 action_plugins=FakeActionPlugins(
                                     self, server),
                                 device_plugins=FakeDevicePlugins(server),
                                 server_properties=server,
                                 crypto=server.crypto)

            try:
                registration_result = client.register()
            except ConnectionError as e:
                log.error("Registration connection failed for %s: %s" %
                          (fqdn, e))
                return
            except HttpError as e:
                log.error("Registration request failed for %s: %s" % (fqdn, e))
                return
            server.crypto.install_certificate(
                registration_result['certificate'])

            # Immediately start the agent after registration, to pick up the
            # setup actions that will be waiting for us on the manager.
            self.start_server(fqdn)
            return registration_result
        except Exception:
            log.error(traceback.format_exc())
    def register_many(self, fqdns, secret):
        simulator = self

        class RegistrationThread(threading.Thread):
            def __init__(self, fqdn, secret):
                super(RegistrationThread, self).__init__()
                self.fqdn = fqdn
                self.secret = secret

            def run(self):
                self.result = simulator.register(self.fqdn, self.secret)

        threads = []
        log.debug("register_many: spawning threads")
        for fqdn in fqdns:
            thread = RegistrationThread(fqdn, secret)
            thread.start()
            threads.append(thread)

        for i, thread in enumerate(threads):
            thread.join()
            log.debug("register_many: joined %s/%s" % (i + 1, len(threads)))

        return [t.result for t in threads]
    def leave(self, nodename):
        with self._lock:
            log.debug("leave: %s" % nodename)
            self.state['nodes'][nodename]['online'] = False
            for ha_label, resource in self.state['resources'].items():
                if resource['started_on'] == nodename:
                    options = set([
                        resource['primary_node'], resource['secondary_node']
                    ]) - set([nodename])
                    if options:
                        destination = options.pop()
                        log.debug("migrating %s to %s" %
                                  (ha_label, destination))
                        resource['started_on'] = destination
                    else:
                        log.debug("stopping %s" % (ha_label))
                        resource['started_on'] = None

            self.save()
示例#18
0
 def start_sim_server(self, pdu_name):
     log.debug("starting server for %s" % pdu_name)
     assert pdu_name not in self.sim_servers
     pdu = self.pdu_sims[pdu_name]
     self.sim_servers[pdu_name] = PDUSimulatorServer(pdu)
     self.sim_servers[pdu_name].start()
    def detect_scan(self, target_devices):
        local_targets = []
        mgs_target = None
        for serial, target in self._devices.state['targets'].items():
            log.info("targets: %s: %s %s" %
                     (serial, target['label'], target['uuid']))
        for ha_label, resource in self._cluster.state['resources'].items():
            log.info("cluster: %s %s %s" %
                     (ha_label, resource['uuid'], resource['device_path']))

        for target_device in target_devices:
            path = target_device['path']

            try:
                target = self._devices.get_target_by_path(self.fqdn, path)
            except KeyError:
                # Key error means this is not a target
                continue

            try:
                ha_resource = self._cluster.get_by_uuid(target['uuid'])
            except KeyError:
                log.warning("No HA resource for target %s/%s" %
                            (target['label'], target['uuid']))
                continue
            location = self._cluster.resource_locations()[
                ha_resource['ha_label']]
            mounted = location == self.nodename
            local_targets.append({
                "name": target['label'],
                "uuid": target['uuid'],
                "params": {},
                "device_paths": [path],
                "mounted": mounted
            })

            if target['label'] == 'MGS':
                mgs_target = target

        mgs_targets = defaultdict(lambda: [])
        if mgs_target is not None:
            for target_label in self._devices.mgt_get_target_labels(
                    mgs_target['mgsnode']):
                target = self._devices.get_target_by_label(target_label)

                mgs_targets[target['fsname']].append({
                    'uuid':
                    target['uuid'],
                    'name':
                    target['label'],
                    'nid':
                    target['primary_nid']
                })

        result = {
            "local_targets": local_targets,
            "mgs_targets": mgs_targets,
            "mgs_conf_params": {}
        }
        log.debug("detect_scan: %s" % json.dumps(result, indent=2))

        return result
示例#20
0
    def create_pdu_entries(self, simulator, args):
        if not (args.username and args.password):
            sys.stderr.write(
                "Username and password required to create PDU entries\n")
            sys.exit(-1)

        session = self._get_authenticated_session(args.url, args.username,
                                                  args.password)

        log.info(
            "Creating PDU entries and associating PDU outlets with servers...")
        outlet_count = len(simulator.servers)

        if outlet_count < 1:
            log.error("Skipping PDU creation (no servers)")
            return

        # Create a custom type to ensure that it has enough outlets.
        # NB: If more servers are added later this won't work correctly,
        # but it should handle most use cases for simulated clusters.
        response = session.post("%s/api/power_control_type/" % args.url,
                                data=json.dumps({
                                    'agent': "fence_apc",
                                    'make': "Fake",
                                    'model': "PDU",
                                    'default_username': "******",
                                    'default_password': "******",
                                    'max_outlets': outlet_count
                                }))

        assert 200 <= response.status_code < 300, response.text
        fence_apc = json.loads(response.text)

        log.debug("Created power_control_type: %s" % fence_apc['name'])

        pdu_entries = []
        for pdu_sim in simulator.power.pdu_sims.values():
            response = session.post("%s/api/power_control_device/" % args.url,
                                    data=json.dumps({
                                        'device_type':
                                        fence_apc['resource_uri'],
                                        'name':
                                        pdu_sim.name,
                                        'address':
                                        pdu_sim.address,
                                        'port':
                                        pdu_sim.port
                                    }))

            assert 200 <= response.status_code < 300, response.text
            pdu_entries.append(json.loads(response.text))
            log.debug("Created power_control_device: %s" %
                      pdu_entries[-1]['name'])

        response = session.get("%s/api/host/" % args.url,
                               data=json.dumps({'limit': 0}))
        assert 200 <= response.status_code < 300, response.text
        servers = [
            s for s in json.loads(response.text)['objects']
            if 'posix_copytool_worker' not in s['server_profile']
        ]

        for i, server in enumerate(
                sorted(servers, key=lambda server: server['fqdn'])):
            for pdu in pdu_entries:
                outlet = [
                    o for o in pdu['outlets'] if o['identifier'] == str(i + 1)
                ][0]
                response = session.patch(
                    "%s/%s" % (args.url, outlet['resource_uri']),
                    data=json.dumps({'host': server['resource_uri']}))
                assert 200 <= response.status_code < 300, response.text
                log.debug("Created association %s <=> %s:%s" %
                          (server['fqdn'], pdu['name'], outlet['identifier']))
 def _open_fifo(self):
     fifo_path = self.wrapper.event_fifo
     self.fifo = open(fifo_path, "w", 1)
     log.debug("Opened %s for write" % fifo_path)
 def unmount(self, nids, filesystem_name):
     log.debug("FakeClient.unmount %s %s" % (nids, filesystem_name))
     if (nids, filesystem_name) in self.state['mounts']:
         self.state['mounts'].remove((nids, filesystem_name))
     self.save()
示例#23
0
    def run(self, cmd, agent_daemon_context, kwargs):

        # This is a little hackish: we don't actually separate the thread_state for
        # each simulated agent (they mostly don't even shell out when simulated) but
        # do this to avoid the subprocess log building up indefinitely.
        AgentShell.thread_state = ResultStore()

        log.debug("FakeActionPlugins: %s %s" % (cmd, kwargs))
        with self._lock:
            if cmd == 'device_plugin':
                device_plugins = FakeDevicePlugins(self._server)
                if kwargs['plugin']:
                    return {
                        kwargs['plugin']:
                        device_plugins.get(
                            kwargs['plugin'])(None).start_session()
                    }
                else:
                    data = {}
                    for plugin, klass in device_plugins.get_plugins().items():
                        data[plugin] = klass(None).start_session()
                    return data

            elif cmd in [
                    'configure_ntp', 'unconfigure_ntp', 'unconfigure_corosync',
                    'unconfigure_corosync2', 'initialise_block_device_drivers'
            ]:
                return agent_result_ok
            elif cmd == 'deregister_server':
                sim = self._simulator
                server = self._server

                class StopServer(threading.Thread):
                    def run(self):
                        sim.stop_server(server.fqdn)

                def kill():
                    server.crypto.delete()
                    # Got to go and run stop_server in another thread, because it will try
                    # to join all the agent threads (including the one that is running this
                    # callback)
                    StopServer().start()

                raise CallbackAfterResponse(None, kill)
            elif cmd == 'shutdown_server':
                server = self._server

                def _shutdown():
                    server.shutdown(simulate_shutdown=True)

                raise CallbackAfterResponse(None, _shutdown)
            elif cmd == 'reboot_server':
                server = self._server

                def _reboot():
                    server.shutdown(simulate_shutdown=True, reboot=True)

                raise CallbackAfterResponse(None, _reboot)
            elif cmd == 'failover_target':
                self._server._cluster.failover(kwargs['ha_label'])
                return agent_result_ok
            elif cmd == 'failback_target':
                self._server._cluster.failback(kwargs['ha_label'])
                return agent_result_ok
            elif cmd == 'set_conf_param':
                self._server.set_conf_param(kwargs['key'],
                                            kwargs.get('value', None))
            elif cmd in [
                    'configure_pacemaker', 'unconfigure_pacemaker',
                    'enable_pacemaker', 'configure_target_store',
                    'unconfigure_target_store', 'configure_repo'
            ]:
                return
            elif cmd == 'kernel_status':
                return {
                    'running': 'fake_kernel-0.1',
                    'required': 'fake_kernel-0.1',
                    'available': ['fake_kernel-0.1']
                }
            elif cmd in ['configure_fencing', 'unconfigure_fencing']:
                # This shouldn't happen if the fence reconfiguration logic
                # is working. Good to simulate a failure here in case of
                # regressions, though.
                if self._server.is_worker:
                    raise PacemakerConfigurationError()
                return
            elif cmd == "host_corosync_config":
                return {}
            elif cmd == 'mount_lustre_filesystems':
                for mountspec, mountpoint in kwargs['filesystems']:
                    self._server.add_client_mount(mountspec, mountpoint)
            elif cmd == 'unmount_lustre_filesystems':
                for mountspec, _ in kwargs['filesystems']:
                    self._server.del_client_mount(mountspec)
            elif cmd == 'configure_copytool':
                self._simulator.configure_hsm_copytool(self._server, **kwargs)
            elif cmd == 'unconfigure_copytool':
                self._simulator.unconfigure_hsm_copytool(kwargs['id'])
            elif cmd == 'start_monitored_copytool':
                self._simulator.start_monitored_copytool(
                    self._server, kwargs['id'])
            elif cmd == 'stop_monitored_copytool':
                self._simulator.stop_monitored_copytool(kwargs['id'])
            else:
                try:
                    fn = getattr(self._server, cmd)
                except AttributeError:
                    raise RuntimeError("Unknown command %s" % cmd)
                else:
                    return fn(**kwargs)
示例#24
0
 def handle(self):
     log.debug("Handling PDU request from %s:%s" % self.client_address)
     self.server.pdu_simulator.handle_client(self.request,
                                             self.client_address)