def test_replicas_delayed_startup(): """Unit test simulating replicas taking time to start up.""" config = AutoscalingConfig( min_replicas=1, max_replicas=200, target_num_ongoing_requests_per_replica=1, upscale_delay_s=0, downscale_delay_s=100000, ) policy = BasicAutoscalingPolicy(config) new_num_replicas = policy.get_decision_num_replicas([100], 1) assert new_num_replicas == 100 # New target is 100, but no new replicas finished spinning up during this # timestep. new_num_replicas = policy.get_decision_num_replicas([100], 100) assert new_num_replicas == 100 # Two new replicas spun up during this timestep. new_num_replicas = policy.get_decision_num_replicas([100, 20, 3], 100) assert new_num_replicas == 123 # A lot of queries got drained and a lot of replicas started up, but # new_num_replicas should not decrease, because of the downscale delay. new_num_replicas = policy.get_decision_num_replicas([6, 2, 1, 1], 123) assert new_num_replicas == 123
def test_imbalanced_replicas(ongoing_requests): config = AutoscalingConfig( min_replicas=1, max_replicas=10, target_num_ongoing_requests_per_replica=5, upscale_delay_s=0.0, downscale_delay_s=0.0, ) policy = BasicAutoscalingPolicy(config) # Check that as long as the average number of ongoing requests equals # the target_num_ongoing_requests_per_replica, the number of replicas # stays the same if ( sum(ongoing_requests) / len(ongoing_requests) == config.target_num_ongoing_requests_per_replica ): new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=ongoing_requests, curr_target_num_replicas=4, current_handle_queued_queries=0, ) assert new_num_replicas == 4 # Check downscaling behavior when average number of requests # is lower than target_num_ongoing_requests_per_replica elif ( sum(ongoing_requests) / len(ongoing_requests) < config.target_num_ongoing_requests_per_replica ): new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=ongoing_requests, curr_target_num_replicas=4, current_handle_queued_queries=0, ) if ( config.target_num_ongoing_requests_per_replica - sum(ongoing_requests) / len(ongoing_requests) <= 1 ): # Autoscaling uses a ceiling operator, which means a slightly low # current_num_ongoing_requests value is insufficient to downscale assert new_num_replicas == 4 else: assert new_num_replicas == 3 # Check upscaling behavior when average number of requests # is higher than target_num_ongoing_requests_per_replica else: new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=ongoing_requests, curr_target_num_replicas=4, current_handle_queued_queries=0, ) assert new_num_replicas == 5
def test_fluctuating_ongoing_requests(delay_s): """ Simulates a workload that switches between too many and too few ongoing requests. """ config = AutoscalingConfig( min_replicas=1, max_replicas=10, target_num_ongoing_requests_per_replica=50, upscale_delay_s=delay_s, downscale_delay_s=delay_s, ) policy = BasicAutoscalingPolicy(config) if delay_s > 0: wait_periods = int(delay_s / CONTROL_LOOP_PERIOD_S) assert wait_periods > 1 underload_requests, overload_requests = [20, 20], [100] trials = 1000 new_num_replicas = None for trial in range(trials): if trial % 2 == 0: new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=overload_requests, curr_target_num_replicas=1, current_handle_queued_queries=0, ) if delay_s > 0: assert new_num_replicas == 1, trial else: assert new_num_replicas == 2, trial else: new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=underload_requests, curr_target_num_replicas=2, current_handle_queued_queries=0, ) if delay_s > 0: assert new_num_replicas == 2, trial else: assert new_num_replicas == 1, trial
def test_single_replica_receives_all_requests(ongoing_requests): target_requests = 5 config = AutoscalingConfig( min_replicas=1, max_replicas=50, target_num_ongoing_requests_per_replica=target_requests, upscale_delay_s=0.0, downscale_delay_s=0.0) policy = BasicAutoscalingPolicy(config) new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=ongoing_requests, curr_target_num_replicas=4) assert new_num_replicas == sum(ongoing_requests) / target_requests
def test_upscale_downscale_delay(): """Unit test for upscale_delay_s and downscale_delay_s.""" upscale_delay_s = 30.0 downscale_delay_s = 600.0 config = AutoscalingConfig( min_replicas=1, max_replicas=2, target_num_ongoing_requests_per_replica=1, upscale_delay_s=30.0, downscale_delay_s=600.0, ) policy = BasicAutoscalingPolicy(config) upscale_wait_periods = int(upscale_delay_s / CONTROL_LOOP_PERIOD_S) downscale_wait_periods = int(downscale_delay_s / CONTROL_LOOP_PERIOD_S) overload_requests = [100] # We should scale up only after enough consecutive scale-up decisions. for i in range(upscale_wait_periods): new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=overload_requests, curr_target_num_replicas=1) assert new_num_replicas == 1, i new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=overload_requests, curr_target_num_replicas=1) assert new_num_replicas == 2 no_requests = [0, 0] # We should scale down only after enough consecutive scale-down decisions. for i in range(downscale_wait_periods): new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=no_requests, curr_target_num_replicas=2) assert new_num_replicas == 2, i new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=no_requests, curr_target_num_replicas=2) assert new_num_replicas == 1 # Get some scale-up decisions, but not enough to trigger a scale up. for i in range(int(upscale_wait_periods / 2)): new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=overload_requests, curr_target_num_replicas=1) assert new_num_replicas == 1, i # Interrupt with a scale-down decision. policy.get_decision_num_replicas(current_num_ongoing_requests=[0], curr_target_num_replicas=1) # The counter should be reset, so it should require `upscale_wait_periods` # more periods before we actually scale up. for i in range(upscale_wait_periods): new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=overload_requests, curr_target_num_replicas=1) assert new_num_replicas == 1, i new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=overload_requests, curr_target_num_replicas=1) assert new_num_replicas == 2 # Get some scale-down decisions, but not enough to trigger a scale down. for i in range(int(downscale_wait_periods / 2)): new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=no_requests, curr_target_num_replicas=2) assert new_num_replicas == 2, i # Interrupt with a scale-up decision. policy.get_decision_num_replicas(current_num_ongoing_requests=[100, 100], curr_target_num_replicas=2) # The counter should be reset so it should require `downscale_wait_periods` # more periods before we actually scale down. for i in range(downscale_wait_periods): new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=no_requests, curr_target_num_replicas=2) assert new_num_replicas == 2, i new_num_replicas = policy.get_decision_num_replicas( current_num_ongoing_requests=no_requests, curr_target_num_replicas=2) assert new_num_replicas == 1