def balance(self): self._initialize_current_subscriptions() initializing = (len(self.current_assignment[ self._get_consumer_with_most_subscriptions()]) == 0) # assign all unassigned partitions for partition in self.unassigned_partitions: # skip if there is no potential consumer for the partition if not self.partition_to_all_potential_consumers[partition]: continue self._assign_partition(partition) # narrow down the reassignment scope to only those partitions that can actually be reassigned fixed_partitions = set() for partition in six.iterkeys( self.partition_to_all_potential_consumers): if not self._can_partition_participate_in_reassignment(partition): fixed_partitions.add(partition) for fixed_partition in fixed_partitions: remove_if_present(self.sorted_partitions, fixed_partition) remove_if_present(self.unassigned_partitions, fixed_partition) # narrow down the reassignment scope to only those consumers that are subject to reassignment fixed_assignments = {} for consumer in six.iterkeys( self.consumer_to_all_potential_partitions): if not self._can_consumer_participate_in_reassignment(consumer): self._remove_consumer_from_current_subscriptions_and_maintain_order( consumer) fixed_assignments[consumer] = self.current_assignment[consumer] del self.current_assignment[consumer] # create a deep copy of the current assignment so we can revert to it # if we do not get a more balanced assignment later prebalance_assignment = deepcopy(self.current_assignment) prebalance_partition_consumers = deepcopy( self.current_partition_consumer) # if we don't already need to revoke something due to subscription changes, # first try to balance by only moving newly added partitions if not self.revocation_required: self._perform_reassignments(self.unassigned_partitions) reassignment_performed = self._perform_reassignments( self.sorted_partitions) # if we are not preserving existing assignments and we have made changes to the current assignment # make sure we are getting a more balanced assignment; otherwise, revert to previous assignment if (not initializing and reassignment_performed and self._get_balance_score(self.current_assignment) >= self._get_balance_score(prebalance_assignment)): self.current_assignment = prebalance_assignment self.current_partition_consumer.clear() self.current_partition_consumer.update( prebalance_partition_consumers) # add the fixed assignments (those that could not change) back for consumer, partitions in six.iteritems(fixed_assignments): self.current_assignment[consumer] = partitions self._add_consumer_to_current_subscriptions_and_maintain_order( consumer)
def test_assignment_with_conflicting_previous_generations( mocker, execution_number): cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1, 2, 3, 4, 5}) member_assignments = { 'C1': [TopicPartition('t', p) for p in {0, 1, 4}], 'C2': [TopicPartition('t', p) for p in {0, 2, 3}], 'C3': [TopicPartition('t', p) for p in {3, 4, 5}], } member_generations = { 'C1': 1, 'C2': 1, 'C3': 2, } member_metadata = {} for member in six.iterkeys(member_assignments): member_metadata[member] = StickyPartitionAssignor._metadata( {'t'}, member_assignments[member], member_generations[member]) assignment = StickyPartitionAssignor.assign(cluster, member_metadata) verify_validity_and_balance({ 'C1': {'t'}, 'C2': {'t'}, 'C3': {'t'} }, assignment) assert StickyPartitionAssignor._latest_partition_movements.are_sticky()
def verify_validity_and_balance(subscriptions, assignment): """ Verifies that the given assignment is valid with respect to the given subscriptions Validity requirements: - each consumer is subscribed to topics of all partitions assigned to it, and - each partition is assigned to no more than one consumer Balance requirements: - the assignment is fully balanced (the numbers of topic partitions assigned to consumers differ by at most one), or - there is no topic partition that can be moved from one consumer to another with 2+ fewer topic partitions :param subscriptions topic subscriptions of each consumer :param assignment: given assignment for balance check """ assert six.viewkeys(subscriptions) == six.viewkeys(assignment) consumers = sorted(six.viewkeys(assignment)) for i in range(len(consumers)): consumer = consumers[i] partitions = assignment[consumer].partitions() for partition in partitions: assert partition.topic in subscriptions[consumer], ( 'Error: Partition {} is assigned to consumer {}, ' 'but it is not subscribed to topic {}\n' 'Subscriptions: {}\n' 'Assignments: {}'.format(partition, consumers[i], partition.topic, subscriptions, assignment)) if i == len(consumers) - 1: continue for j in range(i + 1, len(consumers)): other_consumer = consumers[j] other_partitions = assignment[other_consumer].partitions() partitions_intersection = set(partitions).intersection( set(other_partitions)) assert partitions_intersection == set(), ( 'Error: Consumers {} and {} have common partitions ' 'assigned to them: {}\n' 'Subscriptions: {}\n' 'Assignments: {}'.format(consumer, other_consumer, partitions_intersection, subscriptions, assignment)) if abs(len(partitions) - len(other_partitions)) <= 1: continue assignments_by_topic = group_partitions_by_topic(partitions) other_assignments_by_topic = group_partitions_by_topic( other_partitions) if len(partitions) > len(other_partitions): for topic in six.iterkeys(assignments_by_topic): assert topic not in other_assignments_by_topic, ( 'Error: Some partitions can be moved from {} ({} partitions) ' 'to {} ({} partitions) ' 'to achieve a better balance\n' 'Subscriptions: {}\n' 'Assignments: {}'.format(consumer, len(partitions), other_consumer, len(other_partitions), subscriptions, assignment)) if len(other_partitions) > len(partitions): for topic in six.iterkeys(other_assignments_by_topic): assert topic not in assignments_by_topic, ( 'Error: Some partitions can be moved from {} ({} partitions) ' 'to {} ({} partitions) ' 'to achieve a better balance\n' 'Subscriptions: {}\n' 'Assignments: {}'.format(other_consumer, len(other_partitions), consumer, len(partitions), subscriptions, assignment))
def _populate_sorted_partitions(self): # set of topic partitions with their respective potential consumers all_partitions = set((tp, tuple(consumers)) for tp, consumers in six.iteritems( self.partition_to_all_potential_consumers)) partitions_sorted_by_num_of_potential_consumers = sorted( all_partitions, key=partitions_comparator_key) self.sorted_partitions = [] if not self.is_fresh_assignment and self._are_subscriptions_identical( ): # if this is a reassignment and the subscriptions are identical (all consumers can consumer from all topics) # then we just need to simply list partitions in a round robin fashion (from consumers with # most assigned partitions to those with least) assignments = deepcopy(self.current_assignment) for consumer_id, partitions in six.iteritems(assignments): to_remove = [] for partition in partitions: if partition not in self.partition_to_all_potential_consumers: to_remove.append(partition) for partition in to_remove: partitions.remove(partition) sorted_consumers = SortedSet( iterable=[ (consumer, tuple(partitions)) for consumer, partitions in six.iteritems(assignments) ], key=subscriptions_comparator_key, ) # at this point, sorted_consumers contains an ascending-sorted list of consumers based on # how many valid partitions are currently assigned to them while sorted_consumers: # take the consumer with the most partitions consumer, _ = sorted_consumers.pop_last() # currently assigned partitions to this consumer remaining_partitions = assignments[consumer] # from partitions that had a different consumer before, # keep only those that are assigned to this consumer now previous_partitions = set( six.iterkeys(self.previous_assignment)).intersection( set(remaining_partitions)) if previous_partitions: # if there is a partition of this consumer that was assigned to another consumer before # mark it as good options for reassignment partition = previous_partitions.pop() remaining_partitions.remove(partition) self.sorted_partitions.append(partition) sorted_consumers.add( (consumer, tuple(assignments[consumer]))) elif remaining_partitions: # otherwise, mark any other one of the current partitions as a reassignment candidate self.sorted_partitions.append(remaining_partitions.pop()) sorted_consumers.add( (consumer, tuple(assignments[consumer]))) while partitions_sorted_by_num_of_potential_consumers: partition = partitions_sorted_by_num_of_potential_consumers.pop( 0)[0] if partition not in self.sorted_partitions: self.sorted_partitions.append(partition) else: while partitions_sorted_by_num_of_potential_consumers: self.sorted_partitions.append( partitions_sorted_by_num_of_potential_consumers.pop(0)[0])