def test_allocate_job(): nodes = { "0": NodeInfo({"gpu": 1, "cpu": 500, "pods": 32}, preemptible=False), "1": NodeInfo({"gpu": 2, "cpu": 2000, "pods": 32}, preemptible=False), "2": NodeInfo({"gpu": 2, "cpu": 3000, "pods": 32}, preemptible=True), } perf_params = PerfParams(0.121, 0.00568, 0.0236, 0.00634, 0.0118, 0.00317, 1.14) grad_params = GradParams(sqr=0.00136, var=0.000502) goodput_fn = GoodputFunction(perf_params, grad_params, 128) speedup_fn = SpeedupFunction(goodput_fn, max_batch_size=1280, atomic_bsz_range=(64, 256)) now = datetime.now() min_replicas = 0 job_1 = JobInfo({"gpu": 1, "cpu": 500, "pods": 1}, speedup_fn, now + timedelta(minutes=0), min_replicas, max_replicas=1) job_2 = JobInfo({"gpu": 1, "cpu": 1000, "pods": 1}, speedup_fn, now + timedelta(minutes=1), min_replicas, max_replicas=1) job_3 = JobInfo({"gpu": 1, "cpu": 1000, "pods": 1}, speedup_fn, now + timedelta(minutes=1), 2, max_replicas=2) job_4 = JobInfo({"gpu": 1, "cpu": 2000, "pods": 1}, speedup_fn, now + timedelta(minutes=1), 2, max_replicas=2) policy = PolluxPolicy() assert(policy.allocate_job(job_1, nodes) == ["0"]) assert(policy.allocate_job(job_2, nodes) == ["1"]) assert(policy.allocate_job(job_3, nodes) == ["1", "1"]) assert(policy.allocate_job(job_4, nodes) == [])
def test_unusable_node(): # Test where one of the nodes can't be used due to one resource type. nodes = { 0: NodeInfo({"gpu": 1, "cpu": 500, "pods": 32}, preemptible=False), 1: NodeInfo({"gpu": 1, "cpu": 8000, "pods": 32}, preemptible=False), 2: NodeInfo({"gpu": 1, "cpu": 8000, "pods": 32}, preemptible=False), } template = NodeInfo({"gpu": 1, "cpu": 8000, "pods": 32}, preemptible=True) perf_params = PerfParams(0.121, 0.00568, 0.0236, 0.00634, 0.0118, 0.00317, 1.14) grad_params = GradParams(sqr=0.00136, var=0.000502) goodput_fn = GoodputFunction(perf_params, grad_params, 128) speedup_fn = SpeedupFunction(goodput_fn, max_batch_size=1280, atomic_bsz_range=(64, 256)) now = datetime.now() min_replicas = 0 jobs = { 0: JobInfo({"gpu": 1, "cpu": 1000, "pods": 1}, speedup_fn, now + timedelta(minutes=0), min_replicas, max_replicas=1), 1: JobInfo({"gpu": 1, "cpu": 1000, "pods": 1}, speedup_fn, now + timedelta(minutes=1), min_replicas, max_replicas=1), 2: JobInfo({"gpu": 1, "cpu": 1000, "pods": 1}, speedup_fn, now + timedelta(minutes=2), min_replicas, max_replicas=1), } policy = PolluxPolicy() allocations, desired_nodes = policy.optimize(jobs, nodes, {}, template) # Check that more nodes are asked for. assert desired_nodes > 3 # Check no job was allocated more than 1 replica. assert max(len(alloc) for alloc in allocations.values()) == 1 # Check two jobs were allocated. assert sum(len(alloc) for alloc in allocations.values()) == 2
def hints(self): hints = copy.deepcopy(self._last_metrics) if hints: if hints["gradParams"]: hints["gradParams"]["sqr"] = hints["gradParams"]["norm"] del hints["gradParams"]["norm"] else: hints["gradParams"] = {"sqr": 1.0, "var": 1.0} hints["gradParams"] = GradParams(**hints["gradParams"]) hints["perfParams"] = PerfParams(**hints["perfParams"]) return hints
def _get_job_info(self, job): job["spec"]["template"]["spec"] = \ set_default_resources(job["spec"]["template"]["spec"]) resources = get_pod_requests(job["spec"]["template"]["spec"]) hints = job.get("status", {}).get("train", {}) max_replicas = max(2 * hints.get("maxProfiledReplicas", 0), 1) if job["spec"].get("maxReplicas"): max_replicas = min(max_replicas, job["spec"]["maxReplicas"]) min_replicas = job["spec"].get("minReplicas", 0) # max_replicas should be greater or equal to min_replicas max_replicas = max(max_replicas, min_replicas) preemptible = job["spec"].get("preemptible", True) if {"perfParams", "initBatchSize"} <= hints.keys() and preemptible: max_batch_size = (hints.get("maxBatchSize") or hints["initBatchSize"]) if hints.get("localBszBounds"): min_local_bsz = hints["localBszBounds"][0] or 1 # Make sure max_batch_size / replicas >= min_local_bsz if max_batch_size < min_local_bsz * max_replicas: max_replicas = int(max_batch_size / min_local_bsz) perf_params = PerfParams( *[hints["perfParams"][k] for k in PERF_PARAMS.keys()]) if "gradParams" in hints: grad_params = GradParams(hints["gradParams"]["norm"], hints["gradParams"]["var"]) else: grad_params = GradParams(0.0, 1.0) goodput_fn = GoodputFunction(perf_params, grad_params, hints["initBatchSize"], self._metrics_options) speedup_fn = SpeedupFunction( goodput_fn, hints.get("maxBatchSize"), hints.get("localBszBounds"), hints.get("gradientAccumulation", False)) else: speedup_fn = lambda n, r: r # noqa: E731 creation_ts = dateutil.parser.isoparse( job["metadata"]["creationTimestamp"]) return JobInfo(resources, speedup_fn, creation_ts, min_replicas, max_replicas, preemptible)
def test_optimize(num_nodes, total_devices=16): assert total_devices % num_nodes == 0 num_devices = total_devices // num_nodes print("{}x{} nodes:".format(num_nodes, num_devices)) # Make up a realistic speedup function. perf_params = PerfParams(0.121, 0.00568, 0.0236, 0.00634, 0.0118, 0.00317, 1.14) grad_params = GradParams(sqr=0.00136, var=0.000502) goodput_fn = GoodputFunction(perf_params, grad_params, 128) speedup_fn = SpeedupFunction(goodput_fn, max_batch_size=1280, atomic_bsz_range=(64, 256)) now = datetime.now() jobs = {} # Add a few jobs. job_resources = {"nvidia.com/gpu": 1, "pods": 1} for i in range(16): creation_timestamp = now + timedelta(minutes=len(jobs)), max_replicas = 8 min_replicas = 0 key = len(jobs) jobs[key] = JobInfo(job_resources, speedup_fn, creation_timestamp, min_replicas, max_replicas) # Add a few nodes. node_resources = {"nvidia.com/gpu": num_devices, "pods": 32} nodes = {i: NodeInfo(node_resources, preemptible=False) for i in range(num_nodes)} # Add a node template. node_template = NodeInfo(node_resources, preemptible=True) policy = PolluxPolicy() prev_allocs = {} for i in range(3): start = time.time() allocations, desired_nodes = \ policy.optimize(jobs, nodes, prev_allocs, node_template) duration = time.time() - start print("optimize {}x ({}s sec):".format(i + 1, duration)) node_count = Counter() for job_key, placement in allocations.items(): assert len(placement) <= jobs[job_key].max_replicas for node_key in placement: node_count[node_key] += 1 for node_key, count in node_count.items(): assert count <= nodes[node_key].resources["nvidia.com/gpu"] assert count <= nodes[node_key].resources["pods"]
def job_info(self) -> JobInfo: metrics = self._fetch_metrics() if metrics is not None: perf_params = metrics.perf_params if metrics.grad_params is not None: grad_params = metrics.grad_params else: grad_params = GradParams(0.0, 1.0) goodput_fn = GoodputFunction(perf_params, grad_params, metrics.init_batch_size) speedup_fn = SpeedupFunction(goodput_fn, metrics.max_batch_size, metrics.local_bsz_bounds, metrics.gradient_accumulation) else: speedup_fn = lambda n, r: r # noqa: E731 return JobInfo(config.job_resources(), speedup_fn, self.creation_timestamp, config._JOB_MIN_REPLICAS, config._JOB_MAX_REPLICAS)
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from adaptdl.goodput import GoodputFunction, PerfParams, GradParams import itertools import numpy as np import pytest RNG = np.random.RandomState(0) PERF_PARAMS = [PerfParams(*RNG.gamma(2.0, 2.0, [7])) for i in range(10)] GRAD_PARAMS = [GradParams(*RNG.gamma(2.0, 2.0, [2])) for i in range(10)] def groupby_indices(*args): _, indices = np.unique(np.stack(args), axis=1, return_inverse=True) groups = {} for i, g in enumerate(indices): groups.setdefault(g, []).append(i) return list(groups.values()) @pytest.mark.parametrize("perf_params", PERF_PARAMS) @pytest.mark.parametrize("grad_params", GRAD_PARAMS) def test_evaluate(perf_params, grad_params): init_batch_size = 16 goodput_fn = GoodputFunction(perf_params, grad_params, init_batch_size)
def test_optimize(num_nodes, total_devices=16): # Globals N_JOBS = 10 JOBS = list(range(N_JOBS)) random.shuffle(JOBS) PREEMPTIBLE_IDXS = JOBS[:len(JOBS) // 2] NON_PREEMPTIBLE_IDXS = JOBS[len(JOBS) // 2:] assert total_devices % num_nodes == 0 num_devices = total_devices // num_nodes print(f"{num_nodes}x{num_devices} nodes:") # Make up a realistic speedup function. perf_params = PerfParams(0.121, 0.00568, 0.0236, 0.00634, 0.0118, 0.00317, 1.14) grad_params = GradParams(sqr=0.00136, var=0.000502) goodput_fn = GoodputFunction(perf_params, grad_params, 128) speedup_fn = SpeedupFunction(goodput_fn, max_batch_size=1280, atomic_bsz_range=(64, 256)) now = datetime.now() # Add a node template. policy = PolluxPolicy() job_resources = {"nvidia.com/gpu": 1, "pods": 1} # Add a few nodes. node_resources = {"nvidia.com/gpu": num_devices, "pods": 32} nodes = { i: NodeInfo(node_resources, preemptible=False) for i in range(num_nodes) } node_template = NodeInfo(node_resources, preemptible=True) # Empty allocations prev_allocs = {i: [] for i in JOBS} for cycle in range(3): # Start allocation cycle jobs = {} for i in PREEMPTIBLE_IDXS: creation_timestamp = now + timedelta(minutes=i), jobs[i] = JobInfo(job_resources, speedup_fn, creation_timestamp, min_replicas=0, max_replicas=8) for i in NON_PREEMPTIBLE_IDXS: creation_timestamp = now + timedelta(minutes=i), jobs[i] = JobInfo(job_resources, speedup_fn, creation_timestamp, min_replicas=2, max_replicas=4, preemptible=False) start = time.time() assert len(jobs) > 0 allocations, desired_nodes = \ policy.optimize(jobs, nodes, prev_allocs, node_template) duration = time.time() - start print(f"optimize {cycle + 1}x ({duration}s sec)") node_count = Counter() for job_key, placement in allocations.items(): assert len(placement) <= jobs[job_key].max_replicas if placement: assert len(placement) >= jobs[job_key].min_replicas for node_key in placement: node_count[node_key] += 1 for node_key, count in node_count.items(): assert count <= nodes[node_key].resources["nvidia.com/gpu"] assert count <= nodes[node_key].resources["pods"] # Check if we are maintaining allocations for non-preemptible jobs for i in NON_PREEMPTIBLE_IDXS: if (i in allocations) and prev_allocs[i]: assert allocations[i] == prev_allocs[i] prev_allocs = copy.deepcopy(allocations) # Remove one random job remove = random.sample(allocations.keys(), 1)[0] if remove in NON_PREEMPTIBLE_IDXS: NON_PREEMPTIBLE_IDXS.remove(remove) print(f"Deleting non-preemptible job {remove}") else: PREEMPTIBLE_IDXS.remove(remove) print(f"Deleting preemptible job {remove}") prev_allocs.pop(remove)