def _grow_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id).all() cluster_head = next( Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in new_instance_ids) if not cluster_head: raise TroveError( _("Unable to determine existing Redis cluster" " member")) (cluster_head_ip, cluster_head_port) = (self.get_guest(cluster_head).get_node_ip()) # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") new_insts = [ Instance.load(context, instance_id) for instance_id in new_instance_ids ] new_guests = map(self.get_guest, new_insts) # Connect nodes to the cluster head for guest in new_guests: guest.cluster_meet(cluster_head_ip, cluster_head_port) for guest in new_guests: guest.cluster_complete()
def _add_shard_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False, shard_id=shard_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in shard %(shard_id)s: %(instance_ids)s", {'shard_id': shard_id, 'instance_ids': instance_ids}) if not self._all_instances_ready(instance_ids, cluster_id, shard_id): return members = [Instance.load(context, instance_id) for instance_id in instance_ids] db_query_routers = DBInstance.find_all(cluster_id=cluster_id, type='query_router', deleted=False).all() query_routers = [Instance.load(context, db_query_router.id) for db_query_router in db_query_routers] if not self._create_shard(query_routers[0], members): return for member in members: self.get_guest(member).cluster_complete()
def _add_shard_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, shard_id=shard_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in shard %s: %s" % (shard_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id, shard_id): return members = [ Instance.load(context, instance_id) for instance_id in instance_ids ] if not self._create_replica_set(members, cluster_id, shard_id): return db_query_routers = DBInstance.find_all(cluster_id=cluster_id, type='query_router', deleted=False).all() query_routers = [ Instance.load(context, db_query_router.id) for db_query_router in db_query_routers ] if not self._create_shard(query_routers, replica_set_name, members, cluster_id, shard_id): return for member in members: self.get_guest(member).cluster_complete()
def test_create_sg_ha(self): name = "NAME" flavor_id = "flavor_id" image_id = "image_id" databases = "databases" users = "users" service_type = "mysql" volume_size = "10" backup_id = "backup_id" master_id = "master_id" extend = "" when(KSC_Instance)._create_instance(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()).thenReturn(None) instance_type = InstanceType.SG self.assertEqual(None,KSC_Instance.create(self.context, name, flavor_id, image_id, databases, users, service_type, volume_size, backup_id,instance_type,extend,master_id)) instance_type = InstanceType.HA self.assertEqual(None,KSC_Instance.create(self.context, name, flavor_id, image_id, databases, users, service_type, volume_size, backup_id,instance_type,extend,master_id))
def _add_shard_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, shard_id=shard_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in shard %s: %s" % (shard_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id, shard_id): return members = [Instance.load(context, instance_id) for instance_id in instance_ids] db_query_routers = DBInstance.find_all(cluster_id=cluster_id, type='query_router', deleted=False).all() query_routers = [Instance.load(context, db_query_router.id) for db_query_router in db_query_routers] if not self._create_shard(query_routers[0], members): return for member in members: self.get_guest(member).cluster_complete()
def _shrink_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() all_instance_ids = [db_instance.id for db_instance in db_instances] remove_instances = [Instance.load(context, instance_id) for instance_id in instance_ids] left_instances = [Instance.load(context, instance_id) for instance_id in all_instance_ids if instance_id not in instance_ids] remove_member_ips = [self.get_ip(instance) for instance in remove_instances] k = VerticaCluster.k_safety(len(left_instances)) for db_instance in db_instances: if db_instance['type'] == 'master': master_instance = Instance.load(context, db_instance.id) if self.get_ip(master_instance) in remove_member_ips: raise RuntimeError(_("Cannot remove master instance!")) LOG.debug("Marking cluster k-safety: %s", k) self.get_guest(master_instance).mark_design_ksafe(k) self.get_guest(master_instance).shrink_cluster( remove_member_ips) break for r in remove_instances: Instance.delete(r)
def _shrink_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() all_instance_ids = [db_instance.id for db_instance in db_instances] remove_instances = [Instance.load(context, instance_id) for instance_id in instance_ids] left_instances = [Instance.load(context, instance_id) for instance_id in all_instance_ids if instance_id not in instance_ids] remove_member_ips = [self.get_ip(instance) for instance in remove_instances] k = VerticaCluster.k_safety(len(left_instances)) for db_instance in db_instances: if db_instance['type'] == 'master': master_instance = Instance.load(context, db_instance.id) if self.get_ip(master_instance) in remove_member_ips: raise RuntimeError(_("Cannot remove master instance!")) LOG.debug(_("Marking cluster k-safety: %s") % k) self.get_guest(master_instance).mark_design_ksafe(k) self.get_guest(master_instance).shrink_cluster( remove_member_ips) break for r in remove_instances: Instance.delete(r)
def _grow_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id).all() cluster_head = next(Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in new_instance_ids) if not cluster_head: raise TroveError("Unable to determine existing Redis cluster " "member") (cluster_head_ip, cluster_head_port) = ( self.get_guest(cluster_head).get_node_ip()) # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") new_insts = [Instance.load(context, instance_id) for instance_id in new_instance_ids] new_guests = map(self.get_guest, new_insts) # Connect nodes to the cluster head for guest in new_guests: guest.cluster_meet(cluster_head_ip, cluster_head_port) for guest in new_guests: guest.cluster_complete()
def _grow_cluster(): new_instances = [db_instance for db_instance in self.db_instances if db_instance.id in instance_ids] new_members = [db_instance for db_instance in new_instances if db_instance.type == 'member'] new_query_routers = [db_instance for db_instance in new_instances if db_instance.type == 'query_router'] instances = [] if new_members: shard_ids = set([db_instance.shard_id for db_instance in new_members]) query_router_id = self._get_running_query_router_id() if not query_router_id: return for shard_id in shard_ids: LOG.debug('growing cluster by adding shard %(shard_id)s ' 'on query router %(router_id)s', {'shard_id': shard_id, 'router_id': query_router_id}) member_ids = [db_instance.id for db_instance in new_members if db_instance.shard_id == shard_id] if not self._all_instances_ready( member_ids, cluster_id, shard_id ): return members = [Instance.load(context, member_id) for member_id in member_ids] query_router = Instance.load(context, query_router_id) if not self._create_shard(query_router, members): return instances.extend(members) if new_query_routers: query_router_ids = [db_instance.id for db_instance in new_query_routers] config_servers_ids = [db_instance.id for db_instance in self.db_instances if db_instance.type == 'config_server'] LOG.debug('growing cluster by adding query routers ' '%(router)s, with config servers %(server)s', {'router': query_router_ids, 'server': config_servers_ids}) if not self._all_instances_ready( query_router_ids, cluster_id ): return query_routers = [Instance.load(context, instance_id) for instance_id in query_router_ids] config_servers_ips = [ self.get_ip(Instance.load(context, config_server_id)) for config_server_id in config_servers_ids ] if not self._add_query_routers( query_routers, config_servers_ips, admin_password=self.get_cluster_admin_password(context) ): return instances.extend(query_routers) for instance in instances: self.get_guest(instance).cluster_complete()
def _shrink_cluster(): cluster_node_ids = self.find_cluster_node_ids(cluster_id) cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) removed_nodes = CassandraClusterTasks.load_cluster_nodes(context, removal_ids) LOG.debug("All nodes ready, proceeding with cluster setup.") # Update the list of seeds on remaining nodes if necessary. # Once all nodes are configured, decommission the removed nodes. # Cassandra will stream data from decommissioned nodes to the # remaining ones. try: # All nodes should have the same seeds. # We retrieve current seeds from the first node. test_node = self.load_cluster_nodes(context, cluster_node_ids[:1])[0] current_seeds = test_node["guest"].get_seeds() # The seeds will have to be updated on all remaining instances # if any of the seed nodes is going to be removed. update_seeds = any(node["ip"] in current_seeds for node in removed_nodes) LOG.debug("Decommissioning removed nodes.") for node in removed_nodes: node["guest"].node_decommission() node["instance"].update_db(cluster_id=None) # Recompute the seed nodes based on the updated cluster # geometry if any of the existing seed nodes was removed. if update_seeds: LOG.debug("Updating seeds on the remaining nodes.") cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) remaining_nodes = [node for node in cluster_nodes if node not in removed_nodes] seeds = self.choose_seed_nodes(remaining_nodes) LOG.debug("Selected seed nodes: %s" % seeds) for node in remaining_nodes: LOG.debug("Configuring node: %s." % node["id"]) node["guest"].set_seeds(seeds) # Wait for the removed nodes to go SHUTDOWN. LOG.debug("Waiting for all decommissioned nodes to shutdown.") if not self._all_instances_shutdown(removal_ids, cluster_id): # Now detached, failed nodes will stay available # in the list of standalone instances. return # Delete decommissioned instances only when the cluster is in a # consistent state. LOG.debug("Deleting decommissioned instances.") for node in removed_nodes: Instance.delete(node["instance"]) LOG.debug("Cluster configuration finished successfully.") except Exception: LOG.exception(_("Error shrinking cluster.")) self.update_statuses_on_failure(cluster_id)
def _grow_cluster(): new_instances = [db_instance for db_instance in self.db_instances if db_instance.id in instance_ids] new_members = [db_instance for db_instance in new_instances if db_instance.type == 'member'] new_query_routers = [db_instance for db_instance in new_instances if db_instance.type == 'query_router'] instances = [] if new_members: shard_ids = set([db_instance.shard_id for db_instance in new_members]) query_router_id = self._get_running_query_router_id() if not query_router_id: return for shard_id in shard_ids: LOG.debug('growing cluster by adding shard %s on query ' 'router %s' % (shard_id, query_router_id)) member_ids = [db_instance.id for db_instance in new_members if db_instance.shard_id == shard_id] if not self._all_instances_ready( member_ids, cluster_id, shard_id ): return members = [Instance.load(context, member_id) for member_id in member_ids] query_router = Instance.load(context, query_router_id) if not self._create_shard(query_router, members): return instances.extend(members) if new_query_routers: query_router_ids = [db_instance.id for db_instance in new_query_routers] config_servers_ids = [db_instance.id for db_instance in self.db_instances if db_instance.type == 'config_server'] LOG.debug('growing cluster by adding query routers %s, ' 'with config servers %s' % (query_router_ids, config_servers_ids)) if not self._all_instances_ready( query_router_ids, cluster_id ): return query_routers = [Instance.load(context, instance_id) for instance_id in query_router_ids] config_servers_ips = [ self.get_ip(Instance.load(context, config_server_id)) for config_server_id in config_servers_ids ] if not self._add_query_routers( query_routers, config_servers_ips, admin_password=self.get_cluster_admin_password(context) ): return instances.extend(query_routers) for instance in instances: self.get_guest(instance).cluster_complete()
def _shrink_cluster(): removal_instances = [ Instance.load(context, instance_id) for instance_id in removal_instance_ids ] for instance in removal_instances: Instance.delete(instance) # wait for instances to be deleted def all_instances_marked_deleted(): non_deleted_instances = DBInstance.find_all( cluster_id=cluster_id, deleted=False).all() non_deleted_ids = [ db_instance.id for db_instance in non_deleted_instances ] return not bool( set(removal_instance_ids).intersection( set(non_deleted_ids))) try: LOG.info("Deleting instances (%s)", removal_instance_ids) utils.poll_until(all_instances_marked_deleted, sleep_time=2, time_out=CONF.cluster_delete_time_out) except PollTimeOut: LOG.error("timeout for instances to be marked as deleted.") return db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() leftover_instances = [ Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in removal_instance_ids ] leftover_cluster_ips = [ self.get_ip(instance) for instance in leftover_instances ] # Get config changes for left over instances rnd_cluster_guest = self.get_guest(leftover_instances[0]) cluster_context = rnd_cluster_guest.get_cluster_context() # apply the new config to all leftover instances for instance in leftover_instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(leftover_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) guest.write_cluster_configuration_overrides( cluster_configuration)
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") instances = [ Instance.load(context, instance_id) for instance_id in instance_ids ] member_ips = [self.get_ip(instance) for instance in instances] guests = [self.get_guest(instance) for instance in instances] # Users to be configured for password-less SSH. authorized_users_without_password = ['root', 'dbadmin'] # Configuring password-less SSH for cluster members. # Strategy for setting up SSH: # get public keys for user from member-instances in cluster, # combine them, finally push it back to all instances, # and member instances add them to authorized keys. LOG.debug("Configuring password-less SSH on cluster members.") try: for user in authorized_users_without_password: pub_key = [guest.get_public_keys(user) for guest in guests] for guest in guests: guest.authorize_public_keys(user, pub_key) LOG.debug("Installing cluster with members: %s." % member_ips) for db_instance in db_instances: if db_instance['type'] == 'master': master_instance = Instance.load( context, db_instance.id) self.get_guest(master_instance).install_cluster( member_ips) break LOG.debug("Finalizing cluster configuration.") for guest in guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") instances = [Instance.load(context, instance_id) for instance_id in instance_ids] member_ips = [self.get_ip(instance) for instance in instances] guests = [self.get_guest(instance) for instance in instances] # Users to be configured for password-less SSH. authorized_users_without_password = ['root', 'dbadmin'] # Configuring password-less SSH for cluster members. # Strategy for setting up SSH: # get public keys for user from member-instances in cluster, # combine them, finally push it back to all instances, # and member instances add them to authorized keys. LOG.debug("Configuring password-less SSH on cluster members.") try: for user in authorized_users_without_password: pub_key = [guest.get_public_keys(user) for guest in guests] for guest in guests: guest.authorize_public_keys(user, pub_key) LOG.debug("Installing cluster with members: %s." % member_ips) for db_instance in db_instances: if db_instance['type'] == 'master': master_instance = Instance.load(context, db_instance.id) self.get_guest(master_instance).install_cluster( member_ips) break LOG.debug("Finalizing cluster configuration.") for guest in guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)
def _shrink_cluster(): removal_instances = [Instance.load(context, instance_id) for instance_id in removal_instance_ids] for instance in removal_instances: Instance.delete(instance) # wait for instances to be deleted def all_instances_marked_deleted(): non_deleted_instances = DBInstance.find_all( cluster_id=cluster_id, deleted=False).all() non_deleted_ids = [db_instance.id for db_instance in non_deleted_instances] return not bool( set(removal_instance_ids).intersection( set(non_deleted_ids)) ) try: LOG.info(_("Deleting instances (%s)") % removal_instance_ids) utils.poll_until(all_instances_marked_deleted, sleep_time=2, time_out=CONF.cluster_delete_time_out) except PollTimeOut: LOG.error(_("timeout for instances to be marked as deleted.")) return db_instances = DBInstance.find_all(cluster_id=cluster_id).all() leftover_instances = [Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in removal_instance_ids] leftover_cluster_ips = [self.get_ip(instance) for instance in leftover_instances] # Get config changes for left over instances rnd_cluster_guest = self.get_guest(leftover_instances[0]) cluster_context = rnd_cluster_guest.get_cluster_context() # apply the new config to all leftover instances for instance in leftover_instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(leftover_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) guest.write_cluster_configuration_overrides( cluster_configuration)
def _create_instances(context, db_info, datastore, datastore_version, instances, extended_properties, locality): member_config = {"id": db_info.id, "instance_type": "member"} name_index = 1 for instance in instances: if not instance.get("name"): instance['name'] = "%s-member-%s" % (db_info.name, str(name_index)) name_index += 1 return map(lambda instance: Instance.create(context, instance['name'], instance['flavor_id'], datastore_version.image_id, [], [], datastore, datastore_version, instance.get('volume_size', None), None, availability_zone=instance.get( 'availability_zone', None), nics=instance.get('nics', None), configuration_id=None, cluster_config=member_config, locality=locality ), instances)
def shrink(self, instances): """Removes instances from a cluster.""" LOG.debug("Shrinking cluster %s.", self.id) self.validate_cluster_available() removal_instances = [ Instance.load(self.context, inst_id) for inst_id in instances ] db_instances = DBInstance.find_all(cluster_id=self.db_info.id, deleted=False).all() if len(db_instances) - len(removal_instances) < 1: raise exception.ClusterShrinkMustNotLeaveClusterEmpty() self.db_info.update(task_status=ClusterTasks.SHRINKING_CLUSTER) try: task_api.load(self.context, self.ds_version.manager).shrink_cluster( self.db_info.id, [instance.id for instance in removal_instances]) except Exception: self.db_info.update(task_status=ClusterTasks.NONE) raise return self.__class__(self.context, self.db_info, self.ds, self.ds_version)
def _create_instances(context, db_info, datastore, datastore_version, instances): member_config = {"id": db_info.id, "instance_type": "member"} name_index = 1 for instance in instances: if not instance.get("name"): instance["name"] = "%s-member-%s" % (db_info.name, str(name_index)) name_index += 1 return map( lambda instance: Instance.create( context, instance["name"], instance["flavor_id"], datastore_version.image_id, [], [], datastore, datastore_version, instance.get("volume_size", None), None, availability_zone=instance.get("availability_zone", None), nics=instance.get("nics", None), configuration_id=None, cluster_config=member_config, ), instances, )
def test_create_sg_ha(self): name = "NAME" flavor_id = "flavor_id" image_id = "image_id" databases = "databases" users = "users" service_type = "mysql" volume_size = "10" backup_id = None #"backup_id" master_id = "master_id" extend = { "autobackup_at": 2300, "duration": 1440, "expire_after": 7, "admin_user": "******", "admin_password": "******", "port": "3306"} #tempalte_config_id = "0c4a1148-5cfd-463a-b205-a0b7d3d2ebd6" tempalte_config_id = None #when(KSC_Instance)._create_instance(any(), any(), # any(), any(), any(), any(), # any(), any(), any(), any(), any(), any()).thenReturn(None) when(KSC_Instance)._check_flavor(any(),any()).thenReturn({'image_id':"123"}) instance_type = InstanceType.SG self.assertEqual(None,KSC_Instance.create(self.context, name, flavor_id, image_id, databases, users, service_type, volume_size, backup_id,instance_type,tempalte_config_id,extend,master_id))
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %(cluster_id)s: %(instance_ids)s", {'cluster_id': cluster_id, 'instance_ids': instance_ids}) if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("all instances in cluster %s ready.", cluster_id) instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # filter tidb_server in instances into a new list: query_routers tidb_server = [instance for instance in instances if instance.type == 'tidb_server'] LOG.debug("tidb_server: %s", [instance.id for instance in query_routers]) # filter pd_server in instances into new list: config_servers pd_server = [instance for instance in instances if instance.type == 'pd_server'] LOG.debug("pd_server: %s", [instance.id for instance in pd_server]) # filter tikv into a new list: tikvs tikv = [instance for instance in instances if instance.type == 'tikv'] LOG.debug("tikv: %s", [instance.id for instance in tikv])
def _create_instances(context, db_info, datastore, datastore_version, instances, extended_properties, locality): member_config = {"id": db_info.id, "instance_type": "member"} name_index = 1 for instance in instances: if not instance.get("name"): instance['name'] = "%s-member-%s" % (db_info.name, str(name_index)) name_index += 1 return map( lambda instance: Instance.create( context, instance['name'], instance['flavor_id'], datastore_version.image_id, [], [], datastore, datastore_version, instance.get('volume_size', None), None, availability_zone=instance.get('availability_zone', None), nics=instance.get('nics', None), configuration_id=None, cluster_config=member_config, locality=locality), instances)
def _create_resources(): # parse the ID from the Ref instance_id = utils.get_id_from_href(instance) # verify that the instance exists and can perform actions from trove.instance.models import Instance instance_model = Instance.load(context, instance_id) instance_model.validate_can_perform_action() cls.verify_swift_auth_token(context) try: db_info = DBBackup.create(name=name, description=description, tenant_id=context.tenant, state=BackupState.NEW, instance_id=instance_id, deleted=False) except exception.InvalidModelError as ex: LOG.exception("Unable to create Backup record:") raise exception.BackupCreationError(str(ex)) backup_info = {'id': db_info.id, 'name': name, 'description': description, 'instance_id': instance_id, 'backup_type': db_info.backup_type, 'checksum': db_info.checksum, } api.API(context).create_backup(backup_info, instance_id) return db_info
def _create_instances(context, db_info, datastore, datastore_version, instances, extended_properties, locality, configuration_id): member_config = {"id": db_info.id, "instance_type": "member"} name_index = int(time.time()) for instance in instances: if not instance.get("name"): instance['name'] = "%s-member-%s" % (db_info.name, str(name_index)) name_index += 1 return [ Instance.create(context, instance['name'], instance['flavor_id'], datastore_version.image_id, [], [], datastore, datastore_version, instance.get('volume_size', None), None, availability_zone=instance.get( 'availability_zone', None), nics=instance.get('nics', None), configuration_id=configuration_id, cluster_config=member_config, volume_type=instance.get('volume_type', None), modules=instance.get('modules'), locality=locality, region_name=instance.get('region_name')) for instance in instances ]
def _grow_cluster(): # Wait for new nodes to get to cluster-ready status. LOG.debug("Waiting for new nodes to become ready.") if not self._all_instances_ready(new_instance_ids, cluster_id): return new_instances = [Instance.load(context, instance_id) for instance_id in new_instance_ids] added_nodes = [self.build_node_info(instance) for instance in new_instances] LOG.debug("All nodes ready, proceeding with cluster setup.") cluster_node_ids = self.find_cluster_node_ids(cluster_id) cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) # Rebalance the cluster via one of the existing nodes. # Clients can continue to store and retrieve information and # do not need to be aware that a rebalance operation is taking # place. # The new nodes are marked active only if the rebalancing # completes. try: coordinator = cluster_nodes[0] self._add_nodes(coordinator, added_nodes) LOG.debug("Cluster configuration finished successfully.") except Exception: LOG.exception(_("Error growing cluster.")) self.update_statuses_on_failure(cluster_id)
def _grow_cluster(): # Wait for new nodes to get to cluster-ready status. LOG.debug("Waiting for new nodes to become ready.") if not self._all_instances_ready(new_instance_ids, cluster_id): return new_instances = [Instance.load(context, instance_id) for instance_id in new_instance_ids] add_node_info = [self.build_node_info(instance) for instance in new_instances] LOG.debug("All nodes ready, proceeding with cluster setup.") cluster_node_ids = self.find_cluster_node_ids(cluster_id) cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) old_node_info = [node for node in cluster_nodes if node['id'] not in new_instance_ids] # Rebalance the cluster via one of the existing nodes. # Clients can continue to store and retrieve information and # do not need to be aware that a rebalance operation is taking # place. coordinator = old_node_info[0] self._add_nodes(coordinator, add_node_info) LOG.debug("Cluster grow finished successfully.")
def test_upgrade_ha(self): instance_type = InstanceType.STANDBY instance = fake() instance.id = 1 instance.context = self.context instance.name = 'name' instance.flavor_id = 'flavor_id' instance.service_type = 'service_type' instance.volume_size = 'volume_size' instance.group_id = 'group_id' when(KSC_Instance)._create_instance(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()).thenReturn(None) group_item = fake() group_item.id = 1 group_item.group_id = 1 group_item.type = DBInstanceType.SINGLE when(InstanceGroupItem).get_by_instance_id(any(), any()).thenReturn(group_item) service = {'image_id':''} when(ServiceImage).find_by(service_name=any()).thenReturn(service) when(Backup).get_latest_backup(any(), group_id =any()).thenReturn(None) when(KSC_Instance).is_service_active(any(), instance_id=any()).thenReturn(None) when(KSC_Instance).is_lastbackup_ready(any(), instance_id=any()).thenReturn(None) self.assertEqual(None,KSC_Instance.upgrade_ha(instance))
def _grow_cluster(): # Wait for new nodes to get to cluster-ready status. LOG.debug("Waiting for new nodes to become ready.") if not self._all_instances_ready(new_instance_ids, cluster_id): return new_instances = [ Instance.load(context, instance_id) for instance_id in new_instance_ids ] added_nodes = [ self.build_node_info(instance) for instance in new_instances ] LOG.debug("All nodes ready, proceeding with cluster setup.") cluster_node_ids = self.find_cluster_node_ids(cluster_id) cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) # Rebalance the cluster via one of the existing nodes. # Clients can continue to store and retrieve information and # do not need to be aware that a rebalance operation is taking # place. # The new nodes are marked active only if the rebalancing # completes. try: coordinator = cluster_nodes[0] self._add_nodes(coordinator, added_nodes) LOG.debug("Cluster configuration finished successfully.") except Exception: LOG.exception(_("Error growing cluster.")) self.update_statuses_on_failure(cluster_id)
def _remove_nodes(self, coordinator, removed_nodes): LOG.debug("Decommissioning nodes and rebalacing the cluster.") coordinator['guest'].remove_nodes({node['ip'] for node in removed_nodes}) # Always remove decommissioned instances from the cluster, # irrespective of the result of rebalancing. for node in removed_nodes: node['instance'].update_db(cluster_id=None) LOG.debug("Waiting for the rebalancing process to finish.") self._wait_for_rebalance_to_finish(coordinator) # Delete decommissioned instances only when the cluster is in a # consistent state. LOG.debug("Deleting decommissioned instances.") for node in removed_nodes: Instance.delete(node['instance'])
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %s: %s" % (cluster_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id): return instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # filter query routers in instances into a new list: query_routers query_routers = [instance for instance in instances if instance.type == 'query_router'] LOG.debug("query routers: %s" % [instance.id for instance in query_routers]) # filter config servers in instances into new list: config_servers config_servers = [instance for instance in instances if instance.type == 'config_server'] LOG.debug("config servers: %s" % [instance.id for instance in config_servers]) # filter members (non router/configsvr) into a new list: members members = [instance for instance in instances if instance.type == 'member'] LOG.debug("members: %s" % [instance.id for instance in members]) # for config_server in config_servers, append ip/hostname to # "config_server_hosts", then # peel off the replica-set name and ip/hostname from 'x' config_server_ips = [self.get_ip(instance) for instance in config_servers] LOG.debug("config server ips: %s" % config_server_ips) LOG.debug("calling add_config_servers on query_routers") try: for query_router in query_routers: (self.get_guest(query_router) .add_config_servers(config_server_ips)) except Exception: LOG.exception(_("error adding config servers")) self.update_statuses_on_failure(cluster_id) return if not self._create_replica_set(members, cluster_id): return replica_set_name = "rs1" if not self._create_shard(query_routers, replica_set_name, members, cluster_id): return # call to start checking status for instance in instances: self.get_guest(instance).cluster_complete()
def _remove_nodes(self, coordinator, removed_nodes): LOG.debug("Decommissioning nodes and rebalancing the cluster.") guest_node_info = self.build_guest_node_info(removed_nodes) result = coordinator['guest'].remove_nodes(guest_node_info) if not result or len(result) < 2: raise exception.TroveError( _("No status returned from removing nodes from cluster.")) if result[0]: for node in removed_nodes: instance = node['instance'] LOG.debug("Deleting decommissioned instance %s." % instance.id) instance.update_db(cluster_id=None) Instance.delete(instance) else: raise exception.TroveError( _("Could not remove nodes from cluster: %s") % result[1])
def _grow_cluster(): LOG.debug("begin grow_cluster for Vertica cluster %s" % cluster_id) db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for new cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): return new_insts = [ Instance.load(context, instance_id) for instance_id in new_instance_ids ] existing_instances = [ Instance.load(context, instance_id) for instance_id in instance_ids if instance_id not in new_instance_ids ] existing_guests = [self.get_guest(i) for i in existing_instances] new_guests = [self.get_guest(i) for i in new_insts] all_guests = new_guests + existing_guests authorized_users_without_password = ['root', 'dbadmin'] new_ips = [self.get_ip(instance) for instance in new_insts] for user in authorized_users_without_password: pub_key = [guest.get_public_keys(user) for guest in all_guests] for guest in all_guests: guest.authorize_public_keys(user, pub_key) for db_instance in db_instances: if db_instance['type'] == 'master': LOG.debug("Found 'master' instance, calling grow on guest") master_instance = Instance.load(context, db_instance.id) self.get_guest(master_instance).grow_cluster(new_ips) break for guest in new_guests: guest.cluster_complete()
def _remove_nodes(self, coordinator, removed_nodes): LOG.debug("Decommissioning nodes and rebalacing the cluster.") coordinator['guest'].remove_nodes( {node['ip'] for node in removed_nodes}) # Always remove decommissioned instances from the cluster, # irrespective of the result of rebalancing. for node in removed_nodes: node['instance'].update_db(cluster_id=None) LOG.debug("Waiting for the rebalancing process to finish.") self._wait_for_rebalance_to_finish(coordinator) # Delete decommissioned instances only when the cluster is in a # consistent state. LOG.debug("Deleting decommissioned instances.") for node in removed_nodes: Instance.delete(node['instance'])
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %s: %s" % (cluster_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("all instances in cluster %s ready." % cluster_id) instances = [ Instance.load(context, instance_id) for instance_id in instance_ids ] # filter query routers in instances into a new list: query_routers query_routers = [ instance for instance in instances if instance.type == 'query_router' ] LOG.debug("query routers: %s" % [instance.id for instance in query_routers]) # filter config servers in instances into new list: config_servers config_servers = [ instance for instance in instances if instance.type == 'config_server' ] LOG.debug("config servers: %s" % [instance.id for instance in config_servers]) # filter members (non router/configsvr) into a new list: members members = [ instance for instance in instances if instance.type == 'member' ] LOG.debug("members: %s" % [instance.id for instance in members]) # for config_server in config_servers, append ip/hostname to # "config_server_hosts", then # peel off the replica-set name and ip/hostname from 'x' config_server_ips = [ self.get_ip(instance) for instance in config_servers ] LOG.debug("config server ips: %s" % config_server_ips) if not self._add_query_routers(query_routers, config_server_ips): return if not self._create_shard(query_routers[0], members): return # call to start checking status for instance in instances: self.get_guest(instance).cluster_complete()
def grow_cluster(self, context, cluster_id, new_instance_ids): """Grow a K2hdkc Cluster.""" LOG.debug( "Begins grow_cluster for %s. new_instance_ids:{}".format( new_instance_ids), cluster_id) # 1. validates args if context is None: LOG.error("no context") return if cluster_id is None: LOG.error("no cluster_id") return if new_instance_ids is None: LOG.error("no new_instance_ids") return timeout = Timeout(CONF.cluster_usage_timeout) try: # 2. Retrieves db_instances from the database db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() LOG.debug("len(db_instances) {}".format(len(db_instances))) # 3. Checks if new instances are ready if not self._all_instances_running(new_instance_ids, cluster_id): LOG.error("instances are not ready yet") return # 4. Loads instances instances = [ Instance.load(context, instance_id) for instance_id in new_instance_ids ] LOG.debug("len(instances) {}".format(len(instances))) # 5. Instances GuestAgent class # 6. Calls cluster_complete endpoint of K2hdkcGuestAgent LOG.debug( "Calling cluster_complete as a final hook to each node in the cluster" ) for instance in instances: self.get_guest(instance).cluster_complete() # 7. reset the current cluster task status to None LOG.debug("reset cluster task to None") self.reset_task() except Timeout: # Note adminstrators should reset task via CLI in this case. if Timeout is not timeout: raise # not my timeout LOG.exception("Timeout for growing cluster.") self.update_statuses_on_failure( cluster_id, status=inst_tasks.InstanceTasks.GROWING_ERROR) finally: timeout.cancel() LOG.debug("Completed grow_cluster for %s.", cluster_id)
def _grow_cluster(): LOG.debug("begin grow_cluster for Vertica cluster %s" % cluster_id) db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for new cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): return new_insts = [Instance.load(context, instance_id) for instance_id in new_instance_ids] existing_instances = [Instance.load(context, instance_id) for instance_id in instance_ids if instance_id not in new_instance_ids] existing_guests = [self.get_guest(i) for i in existing_instances] new_guests = [self.get_guest(i) for i in new_insts] all_guests = new_guests + existing_guests authorized_users_without_password = ['root', 'dbadmin'] new_ips = [self.get_ip(instance) for instance in new_insts] for user in authorized_users_without_password: pub_key = [guest.get_public_keys(user) for guest in all_guests] for guest in all_guests: guest.authorize_public_keys(user, pub_key) for db_instance in db_instances: if db_instance['type'] == 'master': LOG.debug("Found 'master' instance, calling grow on guest") master_instance = Instance.load(context, db_instance.id) self.get_guest(master_instance).grow_cluster(new_ips) break for guest in new_guests: guest.cluster_complete()
def _create_resources(): # parse the ID from the Ref instance_id = utils.get_id_from_href(instance) # verify that the instance exists and can perform actions from trove.instance.models import Instance instance_model = Instance.load(context, instance_id) instance_model.validate_can_perform_action() cls.validate_can_perform_action(instance_model, 'backup_create') cls.verify_swift_auth_token(context) if instance_model.cluster_id is not None: raise exception.ClusterInstanceOperationNotSupported() ds = instance_model.datastore ds_version = instance_model.datastore_version parent = None if parent_id: # Look up the parent info or fail early if not found or if # the user does not have access to the parent. _parent = cls.get_by_id(context, parent_id) parent = { 'location': _parent.location, 'checksum': _parent.checksum, } try: db_info = DBBackup.create(name=name, description=description, tenant_id=context.tenant, state=BackupState.NEW, instance_id=instance_id, parent_id=parent_id, datastore_version_id=ds_version.id, deleted=False) except exception.InvalidModelError as ex: LOG.exception( _("Unable to create backup record for " "instance: %s"), instance_id) raise exception.BackupCreationError(str(ex)) backup_info = { 'id': db_info.id, 'name': name, 'description': description, 'instance_id': instance_id, 'backup_type': db_info.backup_type, 'checksum': db_info.checksum, 'parent': parent, 'datastore': ds.name, 'datastore_version': ds_version.name, } api.API(context).create_backup(backup_info, instance_id) return db_info
def _create_resources(): # parse the ID from the Ref instance_id = utils.get_id_from_href(instance) # verify that the instance exists and can perform actions from trove.instance.models import Instance instance_model = Instance.load(context, instance_id) instance_model.validate_can_perform_action() cls.validate_can_perform_action( instance_model, 'backup_create') cls.verify_swift_auth_token(context) if instance_model.cluster_id is not None: raise exception.ClusterInstanceOperationNotSupported() ds = instance_model.datastore ds_version = instance_model.datastore_version parent = None if parent_id: # Look up the parent info or fail early if not found or if # the user does not have access to the parent. _parent = cls.get_by_id(context, parent_id) parent = { 'location': _parent.location, 'checksum': _parent.checksum, } try: db_info = DBBackup.create(name=name, description=description, tenant_id=context.tenant, state=BackupState.NEW, instance_id=instance_id, parent_id=parent_id, datastore_version_id=ds_version.id, deleted=False) except exception.InvalidModelError as ex: LOG.exception(_("Unable to create backup record for " "instance: %s"), instance_id) raise exception.BackupCreationError(str(ex)) backup_info = {'id': db_info.id, 'name': name, 'description': description, 'instance_id': instance_id, 'backup_type': db_info.backup_type, 'checksum': db_info.checksum, 'parent': parent, 'datastore': ds.name, 'datastore_version': ds_version.name, } api.API(context).create_backup(backup_info, instance_id) return db_info
def create_cluster(self, context, cluster_id): """Create K2hdkcClusterTasks. This function is called in trove.taskmanager.Manager.create_cluster. """ LOG.debug("Begins create_cluster for %s.", cluster_id) # 1. validates args if context is None: LOG.error("no context") return if cluster_id is None: LOG.error("no cluster_id") return timeout = Timeout(CONF.cluster_usage_timeout) LOG.debug("CONF.cluster_usage_timeout %s.", timeout) try: # 2. Retrieves db_instances from the database db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() # 3. Retrieves instance ids from the db_instances instance_ids = [db_instance.id for db_instance in db_instances] # 4. Checks if instances are ready if not self._all_instances_running(instance_ids, cluster_id): LOG.error("instances are not ready yet") return # 5. Loads instances instances = [ Instance.load(context, instance_id) for instance_id in instance_ids ] # 6. Instantiates GuestAgent for each guest instance # 7. Calls cluster_complete endpoint of K2hdkcGuestAgent for instance in instances: self.get_guest(instance).cluster_complete() # 8. reset the current cluster task status to None LOG.debug("reset cluster task to None") self.reset_task() except Timeout: # Note adminstrators should reset task via CLI in this case. if Timeout is not timeout: raise # not my timeout LOG.exception("Timeout for building cluster.") self.update_statuses_on_failure(cluster_id) finally: timeout.cancel() LOG.debug("Completed create_cluster for %s.", cluster_id)
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %s: %s" % (cluster_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("all instances in cluster %s ready." % cluster_id) instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # filter query routers in instances into a new list: query_routers query_routers = [instance for instance in instances if instance.type == 'query_router'] LOG.debug("query routers: %s" % [instance.id for instance in query_routers]) # filter config servers in instances into new list: config_servers config_servers = [instance for instance in instances if instance.type == 'config_server'] LOG.debug("config servers: %s" % [instance.id for instance in config_servers]) # filter members (non router/configsvr) into a new list: members members = [instance for instance in instances if instance.type == 'member'] LOG.debug("members: %s" % [instance.id for instance in members]) # for config_server in config_servers, append ip/hostname to # "config_server_hosts", then # peel off the replica-set name and ip/hostname from 'x' config_server_ips = [self.get_ip(instance) for instance in config_servers] LOG.debug("config server ips: %s" % config_server_ips) if not self._add_query_routers(query_routers, config_server_ips): return if not self._create_shard(query_routers[0], members): return # call to start checking status for instance in instances: self.get_guest(instance).cluster_complete()
def shrink(self, instances): """Removes instances from a cluster.""" LOG.debug("Shrinking cluster %s." % self.id) self.validate_cluster_available() removal_instances = [Instance.load(self.context, inst_id) for inst_id in instances] db_instances = DBInstance.find_all(cluster_id=self.db_info.id).all() if len(db_instances) - len(removal_instances) < 1: raise exception.ClusterShrinkMustNotLeaveClusterEmpty() self.db_info.update(task_status=ClusterTasks.SHRINKING_CLUSTER) task_api.load(self.context, self.ds_version.manager).shrink_cluster( self.db_info.id, [instance.id for instance in removal_instances] ) return PXCCluster(self.context, self.db_info, self.ds, self.ds_version)
def _create_security_group(self,type): instance_type = type CONF.trove_security_groups_support = True master_id = 1 security_group = {'name':'security_group'} when(SecurityGroup).get_security_group_by_id_or_instance_id(any(), any()).thenReturn(security_group) when(SecurityGroup).create_for_instance(any(),any()).thenReturn(security_group) when(SecurityGroupRule).create_sec_group_rule( any(), any(), any(), any(), any(), any()).thenReturn(None) self.assertEqual([security_group['name']],KSC_Instance._create_security_group(self.context, type, master_id))
def test_is_lastbackup_ready(self): instance_id = "instance_id" group_item = fake() group_item.group_id = 1 when(InstanceGroupItem).get_by_instance_id(any(), any()).thenReturn(group_item) when(Backup).get_latest_backup(any(), group_id =any()).thenReturn(None) self.assertRaises(exception.TroveError,KSC_Instance.is_lastbackup_ready,self.context,instance_id) backup_item = fake() backup_item.id = 1 backup_item.state = BackupState.FAILED when(Backup).get_latest_backup(any(), group_id =any()).thenReturn(backup_item) self.assertRaises(exception.TroveError,KSC_Instance.is_lastbackup_ready,self.context,instance_id) backup_item.state = BackupState.COMPLETED self.assertEquals(True,KSC_Instance.is_lastbackup_ready(self.context,instance_id))
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") instances = [ Instance.load(context, instance_id) for instance_id in instance_ids ] # Connect nodes to the first node guests = [self.get_guest(instance) for instance in instances] try: cluster_head = instances[0] cluster_head_port = '6379' cluster_head_ip = self.get_ip(cluster_head) for guest in guests[1:]: guest.cluster_meet(cluster_head_ip, cluster_head_port) num_nodes = len(instances) total_slots = 16384 slots_per_node = total_slots / num_nodes leftover_slots = total_slots % num_nodes first_slot = 0 for guest in guests: last_slot = first_slot + slots_per_node if leftover_slots > 0: leftover_slots -= 1 else: last_slot -= 1 guest.cluster_addslots(first_slot, last_slot) first_slot = last_slot + 1 for guest in guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)
def test_create_rr(self): name = "NAME" flavor_id = "flavor_id" image_id = "image_id" databases = "databases" users = "users" service_type = "mysql" volume_size = "10" backup_id = "backup_id" master_id = "master_id" extend = "" when(KSC_Instance)._create_instance(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()).thenReturn(None) instance_type = InstanceType.RR group_item = fake() group_item.id = 1 group_item.type = DBInstanceType.MASTER when(InstanceGroupItem).get_by_instance_id(any(), any()).thenReturn(group_item) when(KSC_Instance).is_service_active(any(), instance_id=any()).thenReturn(None) when(KSC_Instance).is_lastbackup_ready(any(), instance_id=any()).thenReturn(None) self.assertEqual(None,KSC_Instance.create(self.context, name, flavor_id, image_id, databases, users, service_type, volume_size, backup_id,instance_type,extend,master_id)) verify(InstanceGroupItem).get_by_instance_id(any(), any()) group_item.type = DBInstanceType.STANDBY self.assertRaises(Exception,KSC_Instance.create,self.context, name, flavor_id, image_id, databases, users, service_type, volume_size, backup_id,instance_type,extend,master_id) when(KSC_Instance).is_service_active(any(), instance_id=any()).thenRaise(Exception("is_service_active")) group_item.type = DBInstanceType.MASTER self.assertRaises(Exception,KSC_Instance.create,self.context, name, flavor_id, image_id, databases, users, service_type, volume_size, backup_id,instance_type,extend,master_id)
def _create_resources(): # parse the ID from the Ref instance_id = utils.get_id_from_href(instance) # verify that the instance exists and can perform actions from trove.instance.models import Instance instance_model = Instance.load(context, instance_id) instance_model.validate_can_perform_action() cls.verify_swift_auth_token(context) parent = None if parent_id: # Look up the parent info or fail early if not found or if # the user does not have access to the parent. _parent = cls.get_by_id(context, parent_id) parent = {"location": _parent.location, "checksum": _parent.checksum} try: db_info = DBBackup.create( name=name, description=description, tenant_id=context.tenant, state=BackupState.NEW, instance_id=instance_id, parent_id=parent_id, deleted=False, ) except exception.InvalidModelError as ex: LOG.exception("Unable to create Backup record:") raise exception.BackupCreationError(str(ex)) backup_info = { "id": db_info.id, "name": name, "description": description, "instance_id": instance_id, "backup_type": db_info.backup_type, "checksum": db_info.checksum, "parent": parent, } api.API(context).create_backup(backup_info, instance_id) return db_info
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # Connect nodes to the first node guests = [self.get_guest(instance) for instance in instances] try: cluster_head = instances[0] cluster_head_port = '6379' cluster_head_ip = self.get_ip(cluster_head) for guest in guests[1:]: guest.cluster_meet(cluster_head_ip, cluster_head_port) num_nodes = len(instances) total_slots = 16384 slots_per_node = total_slots / num_nodes leftover_slots = total_slots % num_nodes first_slot = 0 for guest in guests: last_slot = first_slot + slots_per_node if leftover_slots > 0: leftover_slots -= 1 else: last_slot -= 1 guest.cluster_addslots(first_slot, last_slot) first_slot = last_slot + 1 for guest in guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)
def test_get_root_on_create(self): root_on_create_val = Instance.get_root_on_create('redis') self.assertFalse(root_on_create_val)
def _grow_cluster(): # Wait for new nodes to get to cluster-ready status. LOG.debug("Waiting for new nodes to become ready.") if not self._all_instances_ready(new_instance_ids, cluster_id): return new_instances = [Instance.load(context, instance_id) for instance_id in new_instance_ids] added_nodes = [self.build_node_info(instance) for instance in new_instances] LOG.debug("All nodes ready, proceeding with cluster setup.") cluster_node_ids = self.find_cluster_node_ids(cluster_id) cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) old_nodes = [node for node in cluster_nodes if node['id'] not in new_instance_ids] try: # All nodes should have the same seeds and credentials. # Retrieve the information from the first node. test_node = old_nodes[0] current_seeds = test_node['guest'].get_seeds() admin_creds = test_node['guest'].get_admin_credentials() # Bootstrap new nodes. # Seed nodes do not bootstrap. Current running nodes # must be used as seeds during the process. # Since we are adding to an existing cluster, ensure that the # new nodes have auto-bootstrapping enabled. # Start the added nodes. LOG.debug("Starting new nodes.") for node in added_nodes: node['guest'].set_auto_bootstrap(True) node['guest'].set_seeds(current_seeds) node['guest'].store_admin_credentials(admin_creds) node['guest'].restart() node['guest'].cluster_complete() # Recompute the seed nodes based on the updated cluster # geometry. seeds = self.choose_seed_nodes(cluster_nodes) # Configure each cluster node with the updated list of seeds. LOG.debug("Updating all nodes with new seeds: %s" % seeds) for node in cluster_nodes: node['guest'].set_seeds(seeds) # Run nodetool cleanup on each of the previously existing nodes # to remove the keys that no longer belong to those nodes. # Wait for cleanup to complete on one node before running # it on the next node. LOG.debug("Cleaning up orphan data on old cluster nodes.") for node in old_nodes: nid = node['id'] node['guest'].node_cleanup_begin() node['guest'].node_cleanup() LOG.debug("Waiting for node to finish its " "cleanup: %s" % nid) if not self._all_instances_running([nid], cluster_id): LOG.warning(_("Node did not complete cleanup " "successfully: %s") % nid) LOG.debug("Cluster configuration finished successfully.") except Exception: LOG.exception(_("Error growing cluster.")) self.update_statuses_on_failure(cluster_id)
def load_cluster_nodes(cls, context, node_ids): return [cls.build_node_info(Instance.load(context, node_id)) for node_id in node_ids]
def load_cluster_nodes(cls, context, node_ids): return [ cls.build_node_info(Instance.load(context, node_id)) for node_id in node_ids ]
def get_cluster_admin_password(self, context): """The cluster admin's user credentials are stored on all query routers. Find one and get the guest to return the password. """ instance = Instance.load(context, self._get_running_query_router_id()) return self.get_guest(instance).get_admin_password()
def _grow_cluster(): # Wait for new nodes to get to cluster-ready status. LOG.debug("Waiting for new nodes to become ready.") if not self._all_instances_ready(new_instance_ids, cluster_id): return new_instances = [ Instance.load(context, instance_id) for instance_id in new_instance_ids ] added_nodes = [ self.build_node_info(instance) for instance in new_instances ] LOG.debug("All nodes ready, proceeding with cluster setup.") cluster_node_ids = self.find_cluster_node_ids(cluster_id) cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) old_nodes = [ node for node in cluster_nodes if node['id'] not in new_instance_ids ] try: # All nodes should have the same seeds and credentials. # Retrieve the information from the first node. test_node = old_nodes[0] current_seeds = test_node['guest'].get_seeds() admin_creds = test_node['guest'].get_admin_credentials() # Bootstrap new nodes. # Seed nodes do not bootstrap. Current running nodes # must be used as seeds during the process. # Since we are adding to an existing cluster, ensure that the # new nodes have auto-bootstrapping enabled. # Start the added nodes. LOG.debug("Starting new nodes.") for node in added_nodes: node['guest'].set_auto_bootstrap(True) node['guest'].set_seeds(current_seeds) node['guest'].store_admin_credentials(admin_creds) node['guest'].restart() node['guest'].cluster_complete() # Recompute the seed nodes based on the updated cluster # geometry. seeds = self.choose_seed_nodes(cluster_nodes) # Configure each cluster node with the updated list of seeds. LOG.debug("Updating all nodes with new seeds: %s", seeds) for node in cluster_nodes: node['guest'].set_seeds(seeds) # Run nodetool cleanup on each of the previously existing nodes # to remove the keys that no longer belong to those nodes. # Wait for cleanup to complete on one node before running # it on the next node. LOG.debug("Cleaning up orphan data on old cluster nodes.") for node in old_nodes: nid = node['id'] node['guest'].node_cleanup_begin() node['guest'].node_cleanup() LOG.debug("Waiting for node to finish its " "cleanup: %s", nid) if not self._all_instances_running([nid], cluster_id): LOG.warning( _("Node did not complete cleanup " "successfully: %s"), nid) LOG.debug("Cluster configuration finished successfully.") except Exception: LOG.exception(_("Error growing cluster.")) self.update_statuses_on_failure(cluster_id)
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %s: %s" % (cluster_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("all instances in cluster %s ready." % cluster_id) instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # filter query routers in instances into a new list: query_routers query_routers = [instance for instance in instances if instance.type == 'query_router'] LOG.debug("query routers: %s" % [instance.id for instance in query_routers]) # filter config servers in instances into new list: config_servers config_servers = [instance for instance in instances if instance.type == 'config_server'] LOG.debug("config servers: %s" % [instance.id for instance in config_servers]) # filter members (non router/configsvr) into a new list: members members = [instance for instance in instances if instance.type == 'member'] LOG.debug("members: %s" % [instance.id for instance in members]) # for config_server in config_servers, append ip/hostname to # "config_server_hosts", then # peel off the replica-set name and ip/hostname from 'x' config_server_ips = [self.get_ip(instance) for instance in config_servers] LOG.debug("config server ips: %s" % config_server_ips) # Give the query routers the configsvr ips to connect to. # Create the admin user on the query routers. # The first will create the user, and the others will just reset # the password to the same value. LOG.debug("calling add_config_servers on, and sending admin user " "password to, query_routers") try: admin_created = False admin_password = utils.generate_random_password() for query_router in query_routers: guest = self.get_guest(query_router) guest.add_config_servers(config_server_ips) if admin_created: guest.store_admin_password(admin_password) else: guest.create_admin_user(admin_password) admin_created = True except Exception: LOG.exception(_("error adding config servers")) self.update_statuses_on_failure(cluster_id) return if not self._create_replica_set(members, cluster_id): return replica_set_name = "rs1" if not self._create_shard(query_routers, replica_set_name, members, cluster_id): return # call to start checking status for instance in instances: self.get_guest(instance).cluster_complete()
def _shrink_cluster(): cluster_node_ids = self.find_cluster_node_ids(cluster_id) cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids) removed_nodes = CassandraClusterTasks.load_cluster_nodes( context, removal_ids) LOG.debug("All nodes ready, proceeding with cluster setup.") # Update the list of seeds on remaining nodes if necessary. # Once all nodes are configured, decommission the removed nodes. # Cassandra will stream data from decommissioned nodes to the # remaining ones. try: # All nodes should have the same seeds. # We retrieve current seeds from the first node. test_node = self.load_cluster_nodes(context, cluster_node_ids[:1])[0] current_seeds = test_node['guest'].get_seeds() # The seeds will have to be updated on all remaining instances # if any of the seed nodes is going to be removed. update_seeds = any(node['ip'] in current_seeds for node in removed_nodes) LOG.debug("Decommissioning removed nodes.") for node in removed_nodes: node['guest'].node_decommission() node['instance'].update_db(cluster_id=None) # Recompute the seed nodes based on the updated cluster # geometry if any of the existing seed nodes was removed. if update_seeds: LOG.debug("Updating seeds on the remaining nodes.") cluster_nodes = self.load_cluster_nodes( context, cluster_node_ids) remaining_nodes = [ node for node in cluster_nodes if node['id'] not in removal_ids ] seeds = self.choose_seed_nodes(remaining_nodes) LOG.debug("Selected seed nodes: %s", seeds) for node in remaining_nodes: LOG.debug("Configuring node: %s.", node['id']) node['guest'].set_seeds(seeds) # Wait for the removed nodes to go SHUTDOWN. LOG.debug("Waiting for all decommissioned nodes to shutdown.") if not self._all_instances_shutdown(removal_ids, cluster_id): # Now detached, failed nodes will stay available # in the list of standalone instances. return # Delete decommissioned instances only when the cluster is in a # consistent state. LOG.debug("Deleting decommissioned instances.") for node in removed_nodes: Instance.delete(node['instance']) LOG.debug("Cluster configuration finished successfully.") except Exception: LOG.exception(_("Error shrinking cluster.")) self.update_statuses_on_failure(cluster_id)
def shrink_cluster(self, context, cluster_id, removal_ids): """Shrink a K2hdkc Cluster.""" LOG.debug( "Begins shrink_cluster for %s. removal_ids:{}".format(removal_ids), cluster_id) # 1. validates args if context is None: LOG.error("no context") return if cluster_id is None: LOG.error("no cluster_id") return if removal_ids is None: LOG.error("no removal_ids") return timeout = Timeout(CONF.cluster_usage_timeout) try: # 2. Retrieves db_instances from the database db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() # 3. Retrieves instance ids from the db_instances instance_ids = [db_instance.id for db_instance in db_instances] # 4. Checks if instances are running if not self._all_instances_running(instance_ids, cluster_id): LOG.error("instances are not ready yet") return # 4. Loads instances instances = [ Instance.load(context, instance_id) for instance_id in removal_ids ] LOG.debug("len(instances) {}".format(len(instances))) # 5. Instances GuestAgent class # 6.2. Checks if removing instances are # if not self._all_instances_shutdown(removal_ids, cluster_id): # LOG.error("removing instances are not shutdown yet") # return # 7. Calls cluster_complete endpoint of K2hdkcGuestAgent LOG.debug( "Calling cluster_complete as a final hook to each node in the cluster" ) for instance in instances: self.get_guest(instance).cluster_complete() # 8. delete node from OpenStack LOG.debug("delete node from OpenStack") for instance in instances: Instance.delete(instance) # 9. reset the current cluster task status to None LOG.debug("reset cluster task to None") self.reset_task() except Timeout: # Note adminstrators should reset task via CLI in this case. if Timeout is not timeout: raise # not my timeout LOG.exception("Timeout for shrink cluster.") self.update_statuses_on_failure( cluster_id, status=inst_tasks.InstanceTasks.SHRINKING_ERROR) finally: timeout.cancel() LOG.debug("Completed shrink_cluster for %s.", cluster_id)
def _grow_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() existing_instances = [ Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in new_instance_ids ] if not existing_instances: raise TroveError( _("Unable to determine existing cluster " "member(s)")) # get list of ips of existing cluster members existing_cluster_ips = [ self.get_ip(instance) for instance in existing_instances ] existing_instance_guests = [ self.get_guest(instance) for instance in existing_instances ] # get the cluster context to setup new members cluster_context = existing_instance_guests[0].get_cluster_context() # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): raise TroveError( _("Instances in cluster did not report " "ACTIVE")) LOG.debug("All members ready, proceeding for cluster setup.") # Get the new instances to join the cluster new_instances = [ Instance.load(context, instance_id) for instance_id in new_instance_ids ] new_cluster_ips = [ self.get_ip(instance) for instance in new_instances ] for instance in new_instances: guest = self.get_guest(instance) guest.reset_admin_password(cluster_context['admin_password']) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(existing_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) # push the cluster config and bootstrap the first instance bootstrap = False guest.install_cluster(cluster_context['replication_user'], cluster_configuration, bootstrap) self._check_cluster_for_root(context, existing_instances, new_instances) # apply the new config to all instances for instance in existing_instances + new_instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(existing_cluster_ips + new_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) guest.write_cluster_configuration_overrides( cluster_configuration) for instance in new_instances: guest = self.get_guest(instance) guest.cluster_complete()
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("Waiting for instances to get to cluster-ready status.") # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): raise TroveError( _("Instances in cluster did not report " "ACTIVE")) LOG.debug("All members ready, proceeding for cluster setup.") instances = [ Instance.load(context, instance_id) for instance_id in instance_ids ] cluster_ips = [self.get_ip(instance) for instance in instances] instance_guests = [ self.get_guest(instance) for instance in instances ] # Create replication user and password for synchronizing the # galera cluster replication_user = { "name": self.CLUSTER_REPLICATION_USER, "password": utils.generate_random_password(), } # Galera cluster name must be unique and be shorter than a full # uuid string so we remove the hyphens and chop it off. It was # recommended to be 16 chars or less. # (this is not currently documented on Galera docs) cluster_name = utils.generate_uuid().replace("-", "")[:16] LOG.debug("Configuring cluster configuration.") try: # Set the admin password for all the instances because the # password in the my.cnf will be wrong after the joiner # instances syncs with the donor instance. admin_password = str(utils.generate_random_password()) for guest in instance_guests: guest.reset_admin_password(admin_password) bootstrap = True for instance in instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(cluster_ips), cluster_name, replication_user) # push the cluster config and bootstrap the first instance guest.install_cluster(replication_user, cluster_configuration, bootstrap) bootstrap = False LOG.debug("Finalizing cluster configuration.") for guest in instance_guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)