def _shrink_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() all_instance_ids = [db_instance.id for db_instance in db_instances] remove_instances = [Instance.load(context, instance_id) for instance_id in instance_ids] left_instances = [Instance.load(context, instance_id) for instance_id in all_instance_ids if instance_id not in instance_ids] remove_member_ips = [self.get_ip(instance) for instance in remove_instances] k = VerticaCluster.k_safety(len(left_instances)) for db_instance in db_instances: if db_instance['type'] == 'master': master_instance = Instance.load(context, db_instance.id) if self.get_ip(master_instance) in remove_member_ips: raise RuntimeError(_("Cannot remove master instance!")) LOG.debug(_("Marking cluster k-safety: %s") % k) self.get_guest(master_instance).mark_design_ksafe(k) self.get_guest(master_instance).shrink_cluster( remove_member_ips) break for r in remove_instances: Instance.delete(r)
def _shrink_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() all_instance_ids = [db_instance.id for db_instance in db_instances] remove_instances = [Instance.load(context, instance_id) for instance_id in instance_ids] left_instances = [Instance.load(context, instance_id) for instance_id in all_instance_ids if instance_id not in instance_ids] remove_member_ips = [self.get_ip(instance) for instance in remove_instances] k = VerticaCluster.k_safety(len(left_instances)) for db_instance in db_instances: if db_instance['type'] == 'master': master_instance = Instance.load(context, db_instance.id) if self.get_ip(master_instance) in remove_member_ips: raise RuntimeError(_("Cannot remove master instance!")) LOG.debug("Marking cluster k-safety: %s", k) self.get_guest(master_instance).mark_design_ksafe(k) self.get_guest(master_instance).shrink_cluster( remove_member_ips) break for r in remove_instances: Instance.delete(r)
def _shrink_cluster(): cluster_node_ids = self.find_cluster_node_ids(cluster_id) cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) removed_nodes = CassandraClusterTasks.load_cluster_nodes(context, removal_ids) LOG.debug("All nodes ready, proceeding with cluster setup.") # Update the list of seeds on remaining nodes if necessary. # Once all nodes are configured, decommission the removed nodes. # Cassandra will stream data from decommissioned nodes to the # remaining ones. try: # All nodes should have the same seeds. # We retrieve current seeds from the first node. test_node = self.load_cluster_nodes(context, cluster_node_ids[:1])[0] current_seeds = test_node["guest"].get_seeds() # The seeds will have to be updated on all remaining instances # if any of the seed nodes is going to be removed. update_seeds = any(node["ip"] in current_seeds for node in removed_nodes) LOG.debug("Decommissioning removed nodes.") for node in removed_nodes: node["guest"].node_decommission() node["instance"].update_db(cluster_id=None) # Recompute the seed nodes based on the updated cluster # geometry if any of the existing seed nodes was removed. if update_seeds: LOG.debug("Updating seeds on the remaining nodes.") cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) remaining_nodes = [node for node in cluster_nodes if node not in removed_nodes] seeds = self.choose_seed_nodes(remaining_nodes) LOG.debug("Selected seed nodes: %s" % seeds) for node in remaining_nodes: LOG.debug("Configuring node: %s." % node["id"]) node["guest"].set_seeds(seeds) # Wait for the removed nodes to go SHUTDOWN. LOG.debug("Waiting for all decommissioned nodes to shutdown.") if not self._all_instances_shutdown(removal_ids, cluster_id): # Now detached, failed nodes will stay available # in the list of standalone instances. return # Delete decommissioned instances only when the cluster is in a # consistent state. LOG.debug("Deleting decommissioned instances.") for node in removed_nodes: Instance.delete(node["instance"]) LOG.debug("Cluster configuration finished successfully.") except Exception: LOG.exception(_("Error shrinking cluster.")) self.update_statuses_on_failure(cluster_id)
def _shrink_cluster(): removal_instances = [ Instance.load(context, instance_id) for instance_id in removal_instance_ids ] for instance in removal_instances: Instance.delete(instance) # wait for instances to be deleted def all_instances_marked_deleted(): non_deleted_instances = DBInstance.find_all( cluster_id=cluster_id, deleted=False).all() non_deleted_ids = [ db_instance.id for db_instance in non_deleted_instances ] return not bool( set(removal_instance_ids).intersection( set(non_deleted_ids))) try: LOG.info("Deleting instances (%s)", removal_instance_ids) utils.poll_until(all_instances_marked_deleted, sleep_time=2, time_out=CONF.cluster_delete_time_out) except PollTimeOut: LOG.error("timeout for instances to be marked as deleted.") return db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() leftover_instances = [ Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in removal_instance_ids ] leftover_cluster_ips = [ self.get_ip(instance) for instance in leftover_instances ] # Get config changes for left over instances rnd_cluster_guest = self.get_guest(leftover_instances[0]) cluster_context = rnd_cluster_guest.get_cluster_context() # apply the new config to all leftover instances for instance in leftover_instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(leftover_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) guest.write_cluster_configuration_overrides( cluster_configuration)
def _shrink_cluster(): removal_instances = [Instance.load(context, instance_id) for instance_id in removal_instance_ids] for instance in removal_instances: Instance.delete(instance) # wait for instances to be deleted def all_instances_marked_deleted(): non_deleted_instances = DBInstance.find_all( cluster_id=cluster_id, deleted=False).all() non_deleted_ids = [db_instance.id for db_instance in non_deleted_instances] return not bool( set(removal_instance_ids).intersection( set(non_deleted_ids)) ) try: LOG.info(_("Deleting instances (%s)") % removal_instance_ids) utils.poll_until(all_instances_marked_deleted, sleep_time=2, time_out=CONF.cluster_delete_time_out) except PollTimeOut: LOG.error(_("timeout for instances to be marked as deleted.")) return db_instances = DBInstance.find_all(cluster_id=cluster_id).all() leftover_instances = [Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in removal_instance_ids] leftover_cluster_ips = [self.get_ip(instance) for instance in leftover_instances] # Get config changes for left over instances rnd_cluster_guest = self.get_guest(leftover_instances[0]) cluster_context = rnd_cluster_guest.get_cluster_context() # apply the new config to all leftover instances for instance in leftover_instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(leftover_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) guest.write_cluster_configuration_overrides( cluster_configuration)
def _remove_nodes(self, coordinator, removed_nodes): LOG.debug("Decommissioning nodes and rebalancing the cluster.") guest_node_info = self.build_guest_node_info(removed_nodes) result = coordinator['guest'].remove_nodes(guest_node_info) if not result or len(result) < 2: raise exception.TroveError( _("No status returned from removing nodes from cluster.")) if result[0]: for node in removed_nodes: instance = node['instance'] LOG.debug("Deleting decommissioned instance %s." % instance.id) instance.update_db(cluster_id=None) Instance.delete(instance) else: raise exception.TroveError( _("Could not remove nodes from cluster: %s") % result[1])
def _remove_nodes(self, coordinator, removed_nodes): LOG.debug("Decommissioning nodes and rebalacing the cluster.") coordinator['guest'].remove_nodes({node['ip'] for node in removed_nodes}) # Always remove decommissioned instances from the cluster, # irrespective of the result of rebalancing. for node in removed_nodes: node['instance'].update_db(cluster_id=None) LOG.debug("Waiting for the rebalancing process to finish.") self._wait_for_rebalance_to_finish(coordinator) # Delete decommissioned instances only when the cluster is in a # consistent state. LOG.debug("Deleting decommissioned instances.") for node in removed_nodes: Instance.delete(node['instance'])
def _remove_nodes(self, coordinator, removed_nodes): LOG.debug("Decommissioning nodes and rebalacing the cluster.") coordinator['guest'].remove_nodes( {node['ip'] for node in removed_nodes}) # Always remove decommissioned instances from the cluster, # irrespective of the result of rebalancing. for node in removed_nodes: node['instance'].update_db(cluster_id=None) LOG.debug("Waiting for the rebalancing process to finish.") self._wait_for_rebalance_to_finish(coordinator) # Delete decommissioned instances only when the cluster is in a # consistent state. LOG.debug("Deleting decommissioned instances.") for node in removed_nodes: Instance.delete(node['instance'])
def _shrink_cluster(): cluster_node_ids = self.find_cluster_node_ids(cluster_id) cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) removed_nodes = CassandraClusterTasks.load_cluster_nodes( context, removal_ids) LOG.debug("All nodes ready, proceeding with cluster setup.") # Update the list of seeds on remaining nodes if necessary. # Once all nodes are configured, decommission the removed nodes. # Cassandra will stream data from decommissioned nodes to the # remaining ones. try: # All nodes should have the same seeds. # We retrieve current seeds from the first node. test_node = self.load_cluster_nodes(context, cluster_node_ids[:1])[0] current_seeds = test_node['guest'].get_seeds() # The seeds will have to be updated on all remaining instances # if any of the seed nodes is going to be removed. update_seeds = any(node['ip'] in current_seeds for node in removed_nodes) LOG.debug("Decommissioning removed nodes.") for node in removed_nodes: node['guest'].node_decommission() node['instance'].update_db(cluster_id=None) # Recompute the seed nodes based on the updated cluster # geometry if any of the existing seed nodes was removed. if update_seeds: LOG.debug("Updating seeds on the remaining nodes.") cluster_nodes = self.load_cluster_nodes( context, cluster_node_ids) remaining_nodes = [ node for node in cluster_nodes if node['id'] not in removal_ids ] seeds = self.choose_seed_nodes(remaining_nodes) LOG.debug("Selected seed nodes: %s", seeds) for node in remaining_nodes: LOG.debug("Configuring node: %s.", node['id']) node['guest'].set_seeds(seeds) # Wait for the removed nodes to go SHUTDOWN. LOG.debug("Waiting for all decommissioned nodes to shutdown.") if not self._all_instances_shutdown(removal_ids, cluster_id): # Now detached, failed nodes will stay available # in the list of standalone instances. return # Delete decommissioned instances only when the cluster is in a # consistent state. LOG.debug("Deleting decommissioned instances.") for node in removed_nodes: Instance.delete(node['instance']) LOG.debug("Cluster configuration finished successfully.") except Exception: LOG.exception(_("Error shrinking cluster.")) self.update_statuses_on_failure(cluster_id)
def shrink_cluster(self, context, cluster_id, removal_ids): """Shrink a K2hdkc Cluster.""" LOG.debug( "Begins shrink_cluster for %s. removal_ids:{}".format(removal_ids), cluster_id) # 1. validates args if context is None: LOG.error("no context") return if cluster_id is None: LOG.error("no cluster_id") return if removal_ids is None: LOG.error("no removal_ids") return timeout = Timeout(CONF.cluster_usage_timeout) try: # 2. Retrieves db_instances from the database db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() # 3. Retrieves instance ids from the db_instances instance_ids = [db_instance.id for db_instance in db_instances] # 4. Checks if instances are running if not self._all_instances_running(instance_ids, cluster_id): LOG.error("instances are not ready yet") return # 4. Loads instances instances = [ Instance.load(context, instance_id) for instance_id in removal_ids ] LOG.debug("len(instances) {}".format(len(instances))) # 5. Instances GuestAgent class # 6.2. Checks if removing instances are # if not self._all_instances_shutdown(removal_ids, cluster_id): # LOG.error("removing instances are not shutdown yet") # return # 7. Calls cluster_complete endpoint of K2hdkcGuestAgent LOG.debug( "Calling cluster_complete as a final hook to each node in the cluster" ) for instance in instances: self.get_guest(instance).cluster_complete() # 8. delete node from OpenStack LOG.debug("delete node from OpenStack") for instance in instances: Instance.delete(instance) # 9. reset the current cluster task status to None LOG.debug("reset cluster task to None") self.reset_task() except Timeout: # Note adminstrators should reset task via CLI in this case. if Timeout is not timeout: raise # not my timeout LOG.exception("Timeout for shrink cluster.") self.update_statuses_on_failure( cluster_id, status=inst_tasks.InstanceTasks.SHRINKING_ERROR) finally: timeout.cancel() LOG.debug("Completed shrink_cluster for %s.", cluster_id)