def get_partition_sizes(self): # Get broker partition sizes size_re = re.compile("^([0-9]+)\s+.*?\/([a-z0-9_-]+)-([0-9]+)\s*$", re.I) for broker_id, broker in self.cluster.brokers.items(): if broker.hostname is None: raise UnknownBrokerException("Cannot get sizes for broker ID {0} which has no hostname. " "Remove the broker from the cluster before balance".format(broker_id)) log.info("Getting partition sizes via SSH for {0}".format(broker.hostname)) self._client.connect(broker.hostname, allow_agent=True) stdin, stdout, stderr = self._client.exec_command('du -sk {0}/*'.format(self.args.datadir)) for ln in stdout.readlines(): m = size_re.match(ln) if m: size = int(m.group(1)) topic = m.group(2) pnum = int(m.group(3)) if topic not in self.cluster.topics: log.warn("Unknown topic found on disk on broker {0}: {1}".format(broker, topic)) elif pnum >= len(self.cluster.topics[topic].partitions): log.warn("Unknown partition found on disk on broker {0}: {1}:{2}".format(broker, topic, pnum)) else: self.cluster.topics[topic].partitions[pnum].set_size(size) self._client.close()
def log_broker_summary(self): for broker_id in sorted(self.brokers.keys()): broker = self.brokers[broker_id] log.info( "Broker {0}: partitions={1}/{2} ({3:.2f}%), size={4}".format( broker_id, broker.num_leaders(), broker.num_partitions(), broker.percent_leaders(), broker.total_size()))
def execute(self, num, total, zookeeper, tools_path, plugins=[], dry_run=True): for plugin in plugins: plugin.before_execute_batch(num) if not dry_run: with NamedTemporaryFile(mode='w') as assignfile: json.dump(self.dict_for_reassignment(), assignfile) assignfile.flush() proc = subprocess.Popen(['{0}/kafka-reassign-partitions.sh'.format(tools_path), '--execute', '--zookeeper', zookeeper, '--reassignment-json-file', assignfile.name]) proc.wait() # Wait until finished while True: remaining_partitions = check_reassignment_completion(zookeeper, tools_path, assignfile.name) if remaining_partitions == 0: break log.info('Partition reassignment {0}/{1} in progress [ {2}/{3} partitions remain ]. Sleeping {4} seconds'.format(num, total, remaining_partitions, len(self.partitions), self.pause_time)) time.sleep(self.pause_time) for plugin in plugins: plugin.after_execute_batch(num)
def log_broker_summary(self): for broker_id in sorted(self.brokers.keys()): broker = self.brokers[broker_id] log.info("Broker {0}: partitions={1}/{2} ({3:.2f}%), size={4}".format(broker_id, broker.num_leaders(), broker.num_partitions(), broker.percent_leaders(), broker.total_size()))
def run_preferred_replica_elections(batches, args, tools_path, plugins, dry_run): for i, batch in enumerate(batches): # Sleep between PLEs if i > 0 and not dry_run: log.info("Waiting {0} seconds for replica election to complete".format(args.ple_wait)) time.sleep(args.ple_wait) log.info("Executing preferred replica election {0}/{1}".format(i + 1, len(batches))) batch.execute(i + 1, len(batches), args.zookeeper, tools_path, plugins, dry_run)
def check_and_get_sizes(action_cls, args, cluster, sizer_map): if action_cls.needs_sizes: sizer_to_run = sizer_map[args.sizer](args, cluster) sizer_to_run.get_partition_sizes() if args.size: log.info("Partition Sizes:") for topic in cluster.topics: for partition in cluster.topics[topic].partitions: log.info("{0} {1}:{2}".format(partition.size, topic, partition.num))
def process_cluster(self): log.info("Starting partition balance by rack") # Check if rack information is set for the cluster broker_racks = [broker.rack for broker in self.cluster.brokers.values()] if len(set(broker_racks)) == 1: raise BalanceException("Cannot balance cluster by rack as it has no rack information") # Figure out the max RF for the cluster max_rf = self.cluster.max_replication_factor() # Balance partitions at each position separately for pos in range(max_rf): self._process_partitions_at_pos(pos)
def get_partition_sizes(self): # Get broker partition sizes FNULL = open(os.devnull, 'w') for broker_id, broker in self.cluster.brokers.items(): if broker.hostname is None: raise UnknownBrokerException("Cannot get sizes for broker ID {0} which has no hostname. " "Remove the broker from the cluster before balance".format(broker_id)) log.info("Getting partition sizes via SSH for {0}".format(broker.hostname)) proc = subprocess.Popen(['ssh', broker.hostname, 'du -sk {0}/*'.format(self.properties['datadir'])], stdout=subprocess.PIPE, stderr=FNULL) for line in proc.stdout: self.process_df_match(self.size_re.match(line.decode()), broker_id)
def get_partition_sizes(self): # Get broker partition sizes for broker_id, broker in self.cluster.brokers.items(): _validate_broker(broker) log.info("Getting partition sizes via JMX for {0}".format(broker.hostname)) jmxurl = self._java_provider.javax.management.remote.JMXServiceURL( "service:jmx:rmi:///jndi/rmi://{0}:{1}/jmxrmi".format(broker.hostname, broker.jmx_port)) jmxsoc = self._java_provider.javax.management.remote.JMXConnectorFactory.connect(jmxurl, self._envhash) connection = jmxsoc.getMBeanServerConnection() beans = connection.queryNames(self._java_provider.javax.management.ObjectName("kafka.log:name=Size,*"), None) for bean in beans: self._fetch_bean(connection, bean) jmxsoc.close()
def get_partition_sizes(self): # Get broker partition sizes FNULL = open(os.devnull, "w") for broker_id, broker in self.cluster.brokers.items(): if broker.hostname is None: raise UnknownBrokerException( "Cannot get sizes for broker ID {0} which has no hostname. " "Remove the broker from the cluster before balance".format(broker_id) ) log.info("Getting partition sizes via SSH for {0}".format(broker.hostname)) proc = subprocess.Popen( ["ssh", broker.hostname, "du -sk {0}/*".format(self.args.datadir)], stdout=subprocess.PIPE, stderr=FNULL ) for line in proc.stdout: self.process_df_match(self.size_re.match(line.decode()), broker_id)
def process_cluster(self): log.info("Starting partition balance by rack") # Check if rack information is set for the cluster broker_racks = [ broker.rack for broker in self.cluster.brokers.values() ] if len(set(broker_racks)) == 1: raise BalanceException( "Cannot balance cluster by rack as it has no rack information") # Figure out the max RF for the cluster max_rf = self.cluster.max_replication_factor() # Balance partitions at each position separately for pos in range(max_rf): self._process_partitions_at_pos(pos)
def main(): # Start by loading all the modules action_map = get_module_map(kafka.tools.assigner.actions, kafka.tools.assigner.actions.ActionModule) sizer_map = get_module_map(kafka.tools.assigner.sizers, kafka.tools.assigner.sizers.SizerModule) plugins = get_all_plugins() # Set up and parse all CLI arguments args = set_up_arguments(action_map, sizer_map, plugins) run_plugins_at_step(plugins, 'set_arguments', args) tools_path = get_tools_path(args.tools_path) check_java_home() cluster = Cluster.create_from_zookeeper(args.zookeeper) run_plugins_at_step(plugins, 'set_cluster', cluster) # If the module needs the partition sizes, call a size module to get the information check_and_get_sizes(action_map[args.action], args, cluster, sizer_map) run_plugins_at_step(plugins, 'after_sizes') print_leadership("before", cluster, args.leadership) # Clone the cluster, and run the action to generate a new cluster state newcluster = cluster.clone() action_to_run = action_map[args.action](args, newcluster) action_to_run.process_cluster() run_plugins_at_step(plugins, 'set_new_cluster', action_to_run.cluster) print_leadership("after", newcluster, args.leadership) move_partitions = cluster.changed_partitions(action_to_run.cluster) batches = split_partitions_into_batches(move_partitions, batch_size=args.moves, use_class=Reassignment) run_plugins_at_step(plugins, 'set_batches', batches) log.info("Partition moves required: {0}".format(len(move_partitions))) log.info("Number of batches: {0}".format(len(batches))) dry_run = is_dry_run(args) for i, batch in enumerate(batches): log.info("Executing partition reassignment {0}/{1}: {2}".format(i + 1, len(batches), repr(batch))) batch.execute(i + 1, len(batches), args.zookeeper, tools_path, plugins, dry_run) run_plugins_at_step(plugins, 'before_ple') if not args.skip_ple: all_cluster_partitions = [p for p in action_to_run.cluster.partitions()] batches = split_partitions_into_batches(all_cluster_partitions, batch_size=args.ple_size, use_class=ReplicaElection) log.info("Number of replica elections: {0}".format(len(batches))) run_preferred_replica_elections(batches, args, tools_path, plugins, dry_run) run_plugins_at_step(plugins, 'finished') return os.EX_OK
def create_from_zookeeper(cls, zkconnect): log.info("Connecting to zookeeper {0}".format(zkconnect)) try: zk = KazooClient(zkconnect) zk.start() except Exception as e: raise ZookeeperException( "Cannot connect to Zookeeper: {0}".format(e)) # Get broker list cluster = cls() add_brokers_from_zk(cluster, zk) # Get current partition state log.info("Getting partition list from Zookeeper") for topic in zk.get_children("/brokers/topics"): zdata, zstat = zk.get("/brokers/topics/{0}".format(topic)) add_topic_with_replicas(cluster, topic, json.loads(zdata)) if cluster.num_topics() == 0: raise ZookeeperException( "The cluster specified does not have any topics") log.info("Closing connection to zookeeper") zk.stop() zk.close() return cluster
def create_from_zookeeper(cls, zkconnect): log.info("Connecting to zookeeper {0}".format(zkconnect)) try: zk = KazooClient(zkconnect) zk.start() except Exception as e: raise ZookeeperException("Cannot connect to Zookeeper: {0}".format(e)) # Get broker list cluster = cls() add_brokers_from_zk(cluster, zk) # Get current partition state log.info("Getting partition list from Zookeeper") for topic in zk.get_children("/brokers/topics"): zdata, zstat = zk.get("/brokers/topics/{0}".format(topic)) add_topic_with_replicas(cluster, topic, json.loads(zdata)) if cluster.num_topics() == 0: raise ZookeeperException("The cluster specified does not have any topics") log.info("Closing connection to zookeeper") zk.stop() zk.close() return cluster
def _execute(self, num, total, zookeeper, tools_path): with NamedTemporaryFile(mode='w') as assignfile: json.dump(self.dict_for_reassignment(), assignfile) assignfile.flush() FNULL = open(os.devnull, 'w') proc = subprocess.Popen(['{0}/kafka-reassign-partitions.sh'.format(tools_path), '--execute', '--zookeeper', zookeeper, '--reassignment-json-file', assignfile.name], stdout=FNULL, stderr=FNULL) proc.wait() # Wait until finished while True: remaining_partitions = self.check_completion(zookeeper, tools_path, assignfile.name) if remaining_partitions == 0: break log.info('Partition reassignment {0}/{1} in progress [ {2}/{3} partitions remain ]. Sleeping {4} seconds'.format(num, total, remaining_partitions, len(self.partitions), self.pause_time)) time.sleep(self.pause_time)
def create_from_zookeeper(cls, zkconnect): log.info("Connecting to zookeeper {0}".format(zkconnect)) try: zk = KazooClient(zkconnect) zk.start() except Exception as e: raise ZookeeperException("Cannot connect to Zookeeper: {0}".format(e)) # Get broker list cluster = cls() for b in zk.get_children("/brokers/ids"): broker_data, bstat = zk.get("/brokers/ids/{0}".format(b)) cluster.add_broker(Broker.create_from_json(int(b), broker_data)) if cluster.num_brokers() == 0: raise ZookeeperException("The cluster specified does not have any brokers") # Get current partition state log.info("Getting partition list from Zookeeper") for topic in zk.get_children("/brokers/topics"): zdata, zstat = zk.get("/brokers/topics/{0}".format(topic)) zj = json.loads(zdata) newtopic = Topic(topic, len(zj['partitions'])) for partition in zj['partitions']: for i, replica in enumerate(zj['partitions'][partition]): if replica not in cluster.brokers: # Hit a replica that's not in the ID list (which means it's dead) # We'll add it, but trying to get sizes will fail as we don't have a hostname cluster.add_broker(Broker(replica, None)) newtopic.partitions[int(partition)].add_replica(cluster.brokers[replica], i) cluster.add_topic(newtopic) if cluster.num_topics() == 0: raise ZookeeperException("The cluster specified does not have any topics") log.info("Closing connection to zookeeper") zk.stop() zk.close() return cluster
def process_cluster(self): log.info("Starting partition balance by size") # Figure out the max RF for the cluster max_rf = self.cluster.max_replication_factor() # Calculate cluster information and sorted partition lists first partitions = {} sizes = {} targets = {} margins = {} for pos in range(max_rf): sizes[pos] = {} targets[pos] = {} margins[pos] = {} # Create a sorted list of partitions to use at this position (descending size) # Throw out partitions that are 4K or less in size, as they are effectively empty partitions[pos] = [p for p in self.cluster.partitions() if (len(p.replicas) > pos) and (p.size > 4)] partitions[pos].sort(key=attrgetter('size'), reverse=True) # Calculate broker size at this position for broker in self.cluster.brokers: if pos in self.cluster.brokers[broker].partitions: sizes[pos][broker] = sum([p.size for p in self.cluster.brokers[broker].partitions[pos]], 0) else: sizes[pos][broker] = 0 # Calculate the median size of partitions (margin is median/2) and the average size per broker to target # Yes, I know the median calculation is slightly broken (it keeps integers). This is OK targets[pos] = sum([p.size for p in partitions[pos]], 0) // len(self.cluster.brokers) sizelen = len(partitions[pos]) if not sizelen % 2: margins[pos] = (partitions[pos][sizelen // 2].size + partitions[pos][sizelen // 2 - 1].size) // 4 else: margins[pos] = partitions[pos][sizelen // 2].size // 2 # Balance partitions for each replica position separately for pos in range(max_rf): log.info("Calculating ideal state for replica position {0}".format(pos)) log.debug("Target average size per-broker is {0} kibibytes (+/- {1})".format(targets[pos], margins[pos])) for broker_id in self.cluster.brokers: broker = self.cluster.brokers[broker_id] # Skip brokers that are larger than our minimum target size min_move = targets[pos] - margins[pos] - sizes[pos][broker_id] max_move = min_move + (margins[pos] * 2) if min_move <= 0: continue log.debug("Moving between {0} and {1} kibibytes to broker {2}".format(min_move, max_move, broker_id)) # Find partitions to move to this broker for partition in partitions[pos]: # We can use this partition if all of the following are true: the partition has a replica at this position, # it's size is less than or equal to the max move size, the broker at this replica position would not go out # of range, and it doesn't already exist on this broker at this position if ((len(partition.replicas) <= pos) or (partition.size > max_move) or ((sizes[pos][partition.replicas[pos].id] - partition.size) < (targets[pos] - margins[pos])) or (partition.replicas[pos] == broker)): continue # We can only use a partition that this replica exists on if swapping positions wouldn't hurt balance of the other position or broker source = partition.replicas[pos] if broker in partition.replicas: other_pos = partition.replicas.index(broker) if ((sizes[other_pos][broker_id] - partition.size < targets[other_pos] - margins[other_pos]) or (sizes[other_pos][source.id] + partition.size > targets[pos] + margins[pos]) or (sizes[pos][broker_id] + partition.size > targets[pos] + margins[pos]) or (sizes[pos][source.id] - partition.size < targets[pos] - margins[pos])): continue partition.swap_replica_positions(source, broker) sizes[other_pos][broker_id] -= partition.size sizes[other_pos][source.id] += partition.size else: # Move the partition and adjust sizes partition.swap_replicas(source, broker) sizes[pos][broker_id] += partition.size sizes[pos][source.id] -= partition.size min_move -= partition.size max_move -= partition.size # If we have moved enough partitions, stop for this broker if min_move <= 0: break
def process_cluster(self): log.info("Starting partition balance by size") # Figure out the max RF for the cluster max_rf = self.cluster.max_replication_factor() # Calculate cluster information and sorted partition lists first partitions = {} sizes = {} targets = {} margins = {} for pos in range(max_rf): sizes[pos] = {} targets[pos] = {} margins[pos] = {} # Create a sorted list of partitions to use at this position (descending size) # Throw out partitions that are 4K or less in size, as they are effectively empty partitions[pos] = [ p for p in self.cluster.partitions() if (len(p.replicas) > pos) and (p.size > 4) ] partitions[pos].sort(key=attrgetter('size'), reverse=True) # Calculate broker size at this position for broker in self.cluster.brokers: if pos in self.cluster.brokers[broker].partitions: sizes[pos][broker] = sum([ p.size for p in self.cluster.brokers[broker].partitions[pos] ], 0) else: sizes[pos][broker] = 0 # Calculate the median size of partitions (margin is median/2) and the average size per broker to target # Yes, I know the median calculation is slightly broken (it keeps integers). This is OK targets[pos] = sum([p.size for p in partitions[pos]], 0) // len( self.cluster.brokers) sizelen = len(partitions[pos]) if not sizelen % 2: margins[pos] = (partitions[pos][sizelen // 2].size + partitions[pos][sizelen // 2 - 1].size) // 4 else: margins[pos] = partitions[pos][sizelen // 2].size // 2 # Balance partitions for each replica position separately for pos in range(max_rf): log.info( "Calculating ideal state for replica position {0}".format(pos)) log.debug( "Target average size per-broker is {0} kibibytes (+/- {1})". format(targets[pos], margins[pos])) for broker_id in self.cluster.brokers: broker = self.cluster.brokers[broker_id] # Skip brokers that are larger than our minimum target size min_move = targets[pos] - margins[pos] - sizes[pos][broker_id] max_move = min_move + (margins[pos] * 2) if min_move <= 0: continue log.debug("Moving between {0} and {1} kibibytes to broker {2}". format(min_move, max_move, broker_id)) # Find partitions to move to this broker for partition in partitions[pos]: # We can use this partition if all of the following are true: the partition has a replica at this position, # it's size is less than or equal to the max move size, the broker at this replica position would not go out # of range, and it doesn't already exist on this broker at this position if ((len(partition.replicas) <= pos) or (partition.size > max_move) or ((sizes[pos][partition.replicas[pos].id] - partition.size) < (targets[pos] - margins[pos])) or (partition.replicas[pos] == broker)): continue # We can only use a partition that this replica exists on if swapping positions wouldn't hurt balance of the other position or broker source = partition.replicas[pos] if broker in partition.replicas: other_pos = partition.replicas.index(broker) if ((sizes[other_pos][broker_id] - partition.size < targets[other_pos] - margins[other_pos]) or (sizes[other_pos][source.id] + partition.size > targets[pos] + margins[pos]) or (sizes[pos][broker_id] + partition.size > targets[pos] + margins[pos]) or (sizes[pos][source.id] - partition.size < targets[pos] - margins[pos])): continue partition.swap_replica_positions(source, broker) sizes[other_pos][broker_id] -= partition.size sizes[other_pos][source.id] += partition.size else: # Move the partition and adjust sizes partition.swap_replicas(source, broker) sizes[pos][broker_id] += partition.size sizes[pos][source.id] -= partition.size min_move -= partition.size max_move -= partition.size # If we have moved enough partitions, stop for this broker if min_move <= 0: break
def main(): # Start by loading all the modules action_map = get_action_map() sizer_map = get_sizer_map() plugins_list = get_plugins_list() # Instantiate all plugins plugins = [plugin() for plugin in plugins_list] # Set up and parse all CLI arguments args = set_up_arguments(action_map, sizer_map, plugins) for plugin in plugins: plugin.set_arguments(args) tools_path = get_tools_path(args.tools_path) check_java_home() cluster = Cluster.create_from_zookeeper(args.zookeeper) for plugin in plugins: plugin.set_cluster(cluster) # If the module needs the partition sizes, call a size module to get the information check_and_get_sizes(action_map[args.action], args, cluster, sizer_map) for plugin in plugins: plugin.after_sizes() if args.leadership: log.info("Cluster Leadership Balance (before):") cluster.log_broker_summary() # Clone the cluster, and run the action to generate a new cluster state newcluster = cluster.clone() action_to_run = action_map[args.action](args, newcluster) action_to_run.process_cluster() for plugin in plugins: plugin.set_new_cluster(action_to_run.cluster) if args.leadership: log.info("Cluster Leadership Balance (after):") newcluster.log_broker_summary() move_partitions = cluster.changed_partitions(action_to_run.cluster) batches = split_partitions_into_batches(move_partitions, batch_size=args.moves, use_class=Reassignment) for plugin in plugins: plugin.set_batches(batches) log.info("Partition moves required: {0}".format(len(move_partitions))) log.info("Number of batches: {0}".format(len(batches))) dry_run = args.generate or not args.execute if dry_run: log.info("--execute flag NOT specified. DRY RUN ONLY") for i, batch in enumerate(batches): log.info("Executing partition reassignment {0}/{1}: {2}".format(i + 1, len(batches), repr(batch))) batch.execute(i + 1, len(batches), args.zookeeper, tools_path, plugins, dry_run) for plugin in plugins: plugin.before_ple() if not args.skip_ple: batches = split_partitions_into_batches(move_partitions, batch_size=args.moves, use_class=ReplicaElection) log.info("Number of replica elections: {0}".format(len(batches))) run_preferred_replica_elections(batches, args, tools_path, plugins, dry_run) for plugin in plugins: plugin.finished() return 0
def process_cluster(self): log.info("Starting partition balance by count") # Figure out the max RF for the cluster and sort all partition lists by size (ascending) max_pos = self.cluster.max_replication_factor() for broker in self.cluster.brokers: for pos in self.cluster.brokers[broker].partitions: self.cluster.brokers[broker].partitions[pos].sort( key=attrgetter('size')) # Calculate partition counts for each position first max_count = {} for pos in range(max_pos): # Calculate the maximum number of partitions each broker should have (floor(average)) # We'll also track a remainder and make sure they only go 1 per broker pcount = 0 for broker in self.cluster.brokers: if pos in self.cluster.brokers[broker].partitions: pcount += self.cluster.brokers[ broker].num_partitions_at_position(pos) max_count[pos] = [ pcount / len(self.cluster.brokers), pcount % len(self.cluster.brokers) ] log.info( "Calculating ideal state for replica position {0} - max {1} partitions" .format(pos, max_count[pos][0] + 1)) # Balance partition counts for each replica position separately for pos in range(max_pos): for broker_id in self.cluster.brokers: broker = self.cluster.brokers[broker_id] # Figure out how many more partitions this broker needs diff = max_count[pos][0] if max_count[pos][1]: diff += 1 max_count[pos][1] -= 1 if pos in broker.partitions: diff -= broker.num_partitions_at_position(pos) if diff > 0: log.debug("Moving {0} partitions to broker {1}".format( diff, broker_id)) # Iterate through the largest brokers to find diff partitions to move to this broker for source_id in self.cluster.brokers: source = self.cluster.brokers[source_id] if diff == 0: break if pos not in source.partitions: continue iterlist = list(source.partitions[pos]) for partition in iterlist: # If we have moved enough partitions from this broker, exit out of the inner loop if (source.num_partitions_at_position(pos) < max_count[pos][0]) or (diff == 0): break # Skip topics that are being excluded if partition.topic.name in self.args.exclude_topics: continue # If the partition is already on the target, swap positions only if it makes the balance better if broker in partition.replicas: other_pos = partition.replicas.index(broker) if (other_pos in source.partitions ) and (source.num_partitions_at_position( other_pos) < max_count[other_pos][0]): partition.swap_replica_positions( source, broker) else: partition.swap_replicas(source, broker) diff -= 1 log.debug("Finish broker {0} with {1} partitions".format( broker_id, broker.num_partitions_at_position(pos))) elif diff < 0: log.debug("Moving {0} partitions off broker {1}".format( -diff, broker_id)) # Iterate through the smallest brokers to find diff partitions to move off this broker for target_id in self.cluster.brokers: target = self.cluster.brokers[target_id] if diff == 0: break if (pos in target.partitions) and ( target.num_partitions_at_position(pos) > (max_count[pos][0] + 1)): continue iterlist = list(broker.partitions[pos]) for partition in iterlist: # If we have moved enough partitions to this broker, exit out of the inner loop if ((pos in target.partitions) and (target.num_partitions_at_position(pos) >= max_count[pos][0])) or (diff == 0): break # Skip partitions that are already on the target broker if target in partition.replicas: continue partition.swap_replicas(broker, target) diff += 1 log.debug("Finish broker {0} with {1} partitions".format( broker, broker.num_partitions_at_position(pos))) else: log.debug( "Skipping broker {0} which has {1} partitions".format( broker, broker.num_partitions_at_position(pos))) continue
def process_cluster(self): log.info("Starting partition balance by count") # Figure out the max RF for the cluster and sort all partition lists by size (ascending) max_pos = self.cluster.max_replication_factor() for broker in self.cluster.brokers: for pos in self.cluster.brokers[broker].partitions: self.cluster.brokers[broker].partitions[pos].sort(key=attrgetter('size')) # Calculate partition counts for each position first max_count = {} for pos in range(max_pos): # Calculate the maximum number of partitions each broker should have (floor(average)) # We'll also track a remainder and make sure they only go 1 per broker pcount = 0 for broker in self.cluster.brokers: if pos in self.cluster.brokers[broker].partitions: pcount += self.cluster.brokers[broker].num_partitions_at_position(pos) max_count[pos] = [pcount / len(self.cluster.brokers), pcount % len(self.cluster.brokers)] log.info("Calculating ideal state for replica position {0} - max {1} partitions".format(pos, max_count[pos][0] + 1)) # Balance partition counts for each replica position separately for pos in range(max_pos): for broker_id in self.cluster.brokers: broker = self.cluster.brokers[broker_id] # Figure out how many more partitions this broker needs diff = max_count[pos][0] if max_count[pos][1]: diff += 1 max_count[pos][1] -= 1 if pos in broker.partitions: diff -= broker.num_partitions_at_position(pos) if diff > 0: log.debug("Moving {0} partitions to broker {1}".format(diff, broker_id)) # Iterate through the largest brokers to find diff partitions to move to this broker for source_id in self.cluster.brokers: source = self.cluster.brokers[source_id] if diff == 0: break if pos not in source.partitions: continue iterlist = list(source.partitions[pos]) for partition in iterlist: # If we have moved enough partitions from this broker, exit out of the inner loop if (source.num_partitions_at_position(pos) < max_count[pos][0]) or (diff == 0): break # Skip topics that are being excluded if partition.topic.name in self.args.exclude_topics: continue # If the partition is already on the target, swap positions only if it makes the balance better if broker in partition.replicas: other_pos = partition.replicas.index(broker) if (other_pos in source.partitions) and (source.num_partitions_at_position(other_pos) < max_count[other_pos][0]): partition.swap_replica_positions(source, broker) else: partition.swap_replicas(source, broker) diff -= 1 log.debug("Finish broker {0} with {1} partitions".format(broker_id, broker.num_partitions_at_position(pos))) elif diff < 0: log.debug("Moving {0} partitions off broker {1}".format(-diff, broker_id)) # Iterate through the smallest brokers to find diff partitions to move off this broker for target_id in self.cluster.brokers: target = self.cluster.brokers[target_id] if diff == 0: break if (pos in target.partitions) and (target.num_partitions_at_position(pos) > (max_count[pos][0] + 1)): continue iterlist = list(broker.partitions[pos]) for partition in iterlist: # If we have moved enough partitions to this broker, exit out of the inner loop if ((pos in target.partitions) and (target.num_partitions_at_position(pos) >= max_count[pos][0])) or (diff == 0): break # Skip partitions that are already on the target broker if target in partition.replicas: continue partition.swap_replicas(broker, target) diff += 1 log.debug("Finish broker {0} with {1} partitions".format(broker, broker.num_partitions_at_position(pos))) else: log.debug("Skipping broker {0} which has {1} partitions".format(broker, broker.num_partitions_at_position(pos))) continue
def print_leadership(type_str, cluster, dont_skip): if dont_skip: log.info("Cluster Leadership Balance (before):") cluster.log_broker_summary()
def is_dry_run(args): if args.generate or not args.execute: log.info("--execute flag NOT specified. DRY RUN ONLY") return True return False