示例#1
0
class Env1:
    def __init__(self, cur_time, time_step):
        self.params = Parameters()
        self.cur_time = cur_time
        self.time_step = time_step
        self.machines = []
        self.waiting_tasks = []
        #initialize machines
        for i in range(self.params.num_machines):
            self.machines.append(Machine(i, self.params.machine_res_cap))

        self.task_dist = Task_Dist()
        self.workload_seq = None
        self.seq_id = 0

    #Generate workload and populate self.waiting tasks after each interval
    def generate_workload(self):
        if len(self.workload_seq) <= self.seq_id:
            return

    #	print('Incoming Tasks: ',self.seq_id, self.workload_seq[self.seq_id])
        if self.workload_seq[self.seq_id]:
            for task_type in self.workload_seq[self.seq_id]:
                task_color, task_cpu_limit, task_finish_time = self.task_dist.get_task_details(
                    task_type)

                max_color = task_color
                for tsk in self.waiting_tasks:
                    if task_type == tsk.service:
                        if max_color <= tsk.color:
                            max_color = tsk.color
                for mcn in self.machines:
                    for tsk in mcn.running_tasks:
                        if task_type == tsk.service and max_color <= tsk.color:
                            max_color = tsk.color
                task_color = max_color + 0.01

                self.waiting_tasks.append(
                    Task(task_type, task_color, task_cpu_limit,
                         task_finish_time, self.cur_time))

    def observe(self):
        img_repr = np.zeros((self.params.state_len, self.params.state_width))
        #add machines
        used_width = 0
        for res in range(self.params.num_res):
            for machine in self.machines:
                img_repr[:, used_width:used_width +
                         self.params.machine_res_cap] = machine.canvas[
                             res, :, :]
                used_width += self.params.machine_res_cap
        #add backlog queue
        if len(self.waiting_tasks) > 0:
            t = 0
            for i in range(self.params.state_len):
                for j in range(self.params.backlog_width):
                    img_repr[i, used_width + j] = self.waiting_tasks[t].color
                    t += 1
                    if (t == len(self.waiting_tasks)):
                        break
                if (t == len(self.waiting_tasks)):
                    break

        used_width += self.params.backlog_width
        assert used_width == self.params.state_width

        return img_repr

    def step(self, action, episode_time, rewards, c, log=False):
        status = None
        done = False
        reward = 0

        if len(self.waiting_tasks) == 0:
            status = 'Backlog_Empty'

        elif action == self.params.num_machines:
            status = 'Invalid'
        else:
            allocated = self.machines[action].allocate_task(
                self.waiting_tasks[c], episode_time)
            if allocated:
                status = 'Allocation_Success'
                self.waiting_tasks[c].start_time = self.cur_time
                self.waiting_tasks = self.waiting_tasks[
                    0:c] + self.waiting_tasks[c + 1:]
            else:
                status = 'Allocation_Failed'

        if status == 'Allocation_Success' or 'Invalid' or 'Allocation_Failed' or 'Backlog_Empty':
            self.seq_id += 1
            # self.generate_workload()
            self.update()
            #TODO fix max no of jobs, so when all jobs complete episode ends
            unfinished = 0
            for machine in self.machines:
                if len(machine.running_tasks) != 0:
                    unfinished += 1
            if unfinished == 0 and len(self.waiting_tasks) == 0:
                done = True
            if self.cur_time > self.params.episode_max_length:  # run too long, force termination
                done = True
            rewards = self.get_reward(rewards, log)
            self.generate_workload()
            # if status == 'Allocation_Failed':
            # 	reward += -100
            # if status == 'No_More_Jobs' and action < self.params.num_machines:
            # 	reward += -100
            # if len(self.waiting_tasks) != 0 and action == self.params.num_machines:
            # 	reward += -100

        ob = self.observe()
        if done:
            self.reset()
        return ob, rewards, done, status

    def reset(self):
        self.cur_time = 0
        self.machines = []
        self.waiting_tasks = []
        for i in range(self.params.num_machines):
            self.machines.append(Machine(i, self.params.machine_res_cap))
        self.seq_id = 0
        #self.workload_seq = self.task_dist.gen_seq_workload()

    def get_suitable_machines(self, task):
        return [
            machine for machine in self.machines
            if machine.cpus_left > task.cpu_limit
        ]

    def update(self):
        self.cur_time += self.time_step
        for machine in self.machines:
            machine.update(self.task_dist, self.cur_time)

    def schedule(self):
        unscheduled_tasks = []
        for task in self.waiting_tasks:
            # suitable_machines = self.get_suitable_machines(task)
            # if not suitable_machines:
            # 	break

            machine = self.find_best_machine(task, suitable_machines)
            if (machine is not None):
                task.start_time = self.cur_time
                machine[0].allocate_task(task)
            else:
                unscheduled_tasks.append(task)
        self.waiting_tasks = unscheduled_tasks

    def find_best_machine(self, task, suitable_machines):
        if np.random.randint(2):
            return np.random.choice(suitable_machines,
                                    1,
                                    p=[1 / len(suitable_machines)] *
                                    len(suitable_machines))
        else:
            return None

    def get_reward(self, rewards, log):
        rewards.append(0)
        #Penaly for putting a task on hold
        rewards[-1] += self.params.hold_penalty * len(self.waiting_tasks) / (
            self.params.state_len * self.params.state_width
        )  #TODO add some penaly factor
        # print('Hold : ', self.params.hold_penalty * len(self.waiting_tasks))
        #Penalty using cross co-relation of cpu_util
        for i, machine in enumerate(self.machines):
            # print('cpus left:', machine.cpus_left)
            if len(machine.running_tasks) == 1 and len(
                    machine.running_tasks[0].cpu_util) == 1:
                # print(i+1, 'New machine allocated')
                rewards[-1] += self.params.machine_used_penalty / (
                    self.params.state_len * self.params.state_width)
            if machine.cpus_left < 0 and not machine.running_tasks[
                    -1].already_overshoot:
                if log:
                    print(i + 1, 'Overshoot', self.params.overshoot_penalty)
                machine.running_tasks[-1].already_overshoot = True
                rewards[machine.running_tasks[-1].
                        episode_time] += self.params.overshoot_penalty / (
                            self.params.state_len * self.params.state_width)
            for task in machine.running_tasks:
                for tsk in task.conf_at_scheduling:
                    if tsk in machine.running_tasks:
                        rewards[
                            task.
                            episode_time] += self.params.interference_penalty * (
                                task.cpu_util[-1] *
                                tsk.cpu_util[-1]) / (self.params.state_len *
                                                     self.params.state_width)
                        # print('Other : ', self.params.interference_penalty * (task.cpu_util[-1] * tsk.cpu_util[-1]))
            # for i in range(len(machine.running_tasks)):
            # 	for j in range(i+1, len(machine.running_tasks)):
            # 		task_i, task_j = machine.running_tasks[i], machine.running_tasks[j]
            # 		# if task_i != task_j and len(task_i.cpu_util) > self.params.hist_wind_len and len(task_j.cpu_util) > self.params.hist_wind_len:
            # 		# 	reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-self.params.hist_wind_len:], task_j.cpu_util[-self.params.hist_wind_len:]))
            # 		if task_i != task_j:
            # 			# m = min(self.params.hist_wind_len, min(len(task_i.cpu_util), len(task_j.cpu_util)))
            # 			# reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-m:], task_j.cpu_util[-m:]))
            # 			reward += self.params.interference_penalty * (task_i.cpu_util[-1] * task_j.cpu_util[-1])
        return rewards
示例#2
0
class Env:
    def __init__(self, cur_time, time_step):
        self.params = Params()
        self.cur_time = cur_time
        self.time_step = time_step
        self.machines = []
        self.waiting_tasks = []
        #initialize machines
        for i in range(self.params.num_machines):
            self.machines.append(
                Machine(i, self.params.machine_res_cap[0],
                        self.params.machine_res_cap[1]))

        self.task_dist = Task_Dist()
        self.workload_seq = None
        self.seq_id = 0

    #Generate workload and populate self.waiting tasks after each interval
    def generate_workload(self):
        if len(self.workload_seq) <= self.seq_id:
            return

    #	print('Incoming Tasks: ',self.seq_id, self.workload_seq[self.seq_id])
        if self.workload_seq[self.seq_id]:
            for task_type in self.workload_seq[self.seq_id]:
                task_color, task_cpu_limit, task_mem_limit, task_finish_time = self.task_dist.get_task_details(
                    task_type)

                max_color = task_color
                for tsk in self.waiting_tasks:
                    if task_type == tsk.service:
                        if max_color <= tsk.color:
                            max_color = tsk.color
                for mcn in self.machines:
                    for tsk in mcn.running_tasks:
                        if task_type == tsk.service and max_color <= tsk.color:
                            max_color = tsk.color
                task_color = max_color + 0.01

                self.waiting_tasks.append(
                    Task(task_type, task_color, task_cpu_limit, task_mem_limit,
                         task_finish_time, self.cur_time))

    #return the state of environment as 2D-matrix

    def observe(self):
        img_repr = np.zeros((self.params.state_len, self.params.state_width))
        #add machines
        used_width = 0
        for res in range(self.params.num_res):
            for machine in self.machines:
                #	print(res,machine.canvas[res][0,:,:].shape)
                if res == 0:
                    img_repr[:, used_width:used_width + self.params.
                             machine_res_cap[res]] = machine.canvas1[0, :, :]
                else:
                    img_repr[:, used_width:used_width + self.params.
                             machine_res_cap[res]] = machine.canvas2[0, :, :]
            #	print('image',img_repr)
                used_width += self.params.machine_res_cap[res] + 1
        #add backlog queue
        if len(self.waiting_tasks) > 0:
            t = 0
            for i in range(self.params.state_len):
                for j in range(self.params.backlog_width):
                    img_repr[i, used_width + j] = self.waiting_tasks[t].color
                    t += 1
                    if (t == len(self.waiting_tasks)):
                        break
                if (t == len(self.waiting_tasks)):
                    break

        used_width += self.params.backlog_width
        assert used_width == self.params.state_width

        k = -1
        for res in range(self.params.num_res):
            for machine in self.machines:
                k += self.params.machine_res_cap[res] + 1
                j = 0

                for m in machine.running_tasks:
                    img_repr[j, k] = m.color + machine.mid
                    j += 1

        return img_repr

    #changes the state of system by taking the action and returns the rewards, new state due to that action
    def step(self, action, episode_time, rewards, task_no):
        status = None
        done = False
        reward = 0

        if len(self.waiting_tasks) == 0:
            status = 'Backlog_Empty'
        elif action == self.params.num_machines:
            status = 'Invalid'
        else:
            allocated = self.machines[action].allocate_task(
                self.waiting_tasks[0], episode_time)
            if allocated:
                status = 'Allocation_Success'
                #	print('Current Time>>',self.cur_time)
                self.waiting_tasks[task_no].start_time = self.cur_time
                self.waiting_tasks = self.waiting_tasks[
                    0:task_no] + self.waiting_tasks[task_no + 1:]
            else:
                status = 'Allocation_Failed'

        if (status == 'Invalid') or (status == 'Allocation_Failed') or (
                status == 'Backlog_Empty'):
            self.seq_id += 1
            # self.generate_workload()
            self.update()
            #TODO fix max no of jobs, so when all jobs complete episode ends
            unfinished = 0
            for machine in self.machines:
                if len(machine.running_tasks) != 0:
                    unfinished += 1
            if unfinished == 0 and len(self.waiting_tasks) == 0:
                done = True
            if self.cur_time > self.params.episode_max_length:  # run too long, force termination
                done = True
            rewards = self.get_reward(rewards)
            self.generate_workload()
            # if status == 'Allocation_Failed':
            # 	reward += -100
            # if status == 'No_More_Jobs' and action < self.params.num_machines:
            # 	reward += -100
            # if len(self.waiting_tasks) != 0 and action == self.params.num_machines:

            # 	reward += -100

        else:
            unfinished = 0
            for machine in self.machines:
                if len(machine.running_tasks) != 0:
                    unfinished += 1
            if unfinished == 0 and len(self.waiting_tasks) == 0:
                done = True
            if self.cur_time > self.params.episode_max_length:  # run too long, force termination
                done = True
            rewards = self.get_reward(rewards)

        ob = self.observe()
        if done:
            self.reset()

        return ob, rewards, done, status

    #reset the system by making all machines empty and time to 0
    def reset(self):
        self.cur_time = 0
        self.machines = []
        self.waiting_tasks = []
        for i in range(self.params.num_machines):
            self.machines.append(
                Machine(i, self.params.machine_res_cap[0],
                        self.params.machine_res_cap[1]))
        self.seq_id = 0

    #update time, status of running tasks in every machine
    def update(self):
        self.cur_time += self.time_step
        for machine in self.machines:
            machine.update(self.task_dist, self.cur_time)

    def get_reward(self, rewards):
        rewards.append(0)

        #Penaly for putting a task on hold
        rewards[-1] += self.params.hold_penalty * len(
            self.waiting_tasks)  #TODO add some penaly factor
        # print('Hold : ', self.params.hold_penalty * len(self.waiting_tasks))
        #Penalty using cross co-relation of cpu_util
        for i, machine in enumerate(self.machines):
            tasks = []
            # print('cpus left:', machine.cpus_left)
            if len(machine.running_tasks) > 0 and machine.cpus_left > 0:
                # print(i+1, 'New machine allocated')
                rewards[-1] += (-1) * pow(machine.cpus_left,
                                          self.params.machine_used_penalty)
            if len(machine.running_tasks) > 0 and machine.mems_left > 0:
                # print(i+1, 'New machine allocated')
                rewards[-1] += (-1) * pow(machine.mems_left,
                                          self.params.machine_used_penalty)
        #	if len(machine.running_tasks) == 1 and len(machine.running_tasks[0].cpu_util) == 0:
        #		print(i+1, 'New machine allocated')
        #		rewards[-1] += self.params.machine_used_penalty
            for j, task in enumerate(reversed(machine.running_tasks)):
                tasks.append(task)
                if j == 0:
                    if machine.cpus_left < 0 and not task.already_overshoot_cpu:
                        print(i + 1, 'OvershootA_CPU', abs(machine.cpus_left))
                        task.already_overshoot_cpu = True
                        rewards[
                            task.episode_time] += self.params.overshoot_penalty
                else:
                    sum = 0
                    for m in tasks[0:j]:
                        if len(m.cpu_util) > 0:
                            sum += m.cpu_util[-1]
                    if (machine.cpus_left +
                            sum) < 0 and not task.already_overshoot_cpu:
                        print(i + 1, 'OvershootB_CPU',
                              abs(machine.cpus_left + sum))
                        task.already_overshoot_cpu = True
                        rewards[
                            task.episode_time] += self.params.overshoot_penalty

            tasks = []
            for j, task in enumerate(reversed(machine.running_tasks)):
                tasks.append(task)
                if j == 0:
                    if machine.mems_left < 0 and not task.already_overshoot_mem:
                        print(i + 1, 'OvershootA_MEM', abs(machine.mems_left))
                        task.already_overshoot_mem = True
                        rewards[
                            task.episode_time] += self.params.overshoot_penalty
                else:
                    sum = 0
                    for m in tasks[0:j]:
                        if len(m.mem_util) > 0:
                            sum += m.mem_util[-1]
                    if (machine.mems_left +
                            sum) < 0 and not task.already_overshoot_mem:
                        print(i + 1, 'OvershootB_MEM',
                              abs(machine.mems_left + sum))
                        task.already_overshoot_mem = True
                        rewards[
                            task.episode_time] += self.params.overshoot_penalty

            for task in machine.running_tasks:
                for tsk in task.conf_at_scheduling:
                    if tsk in machine.running_tasks:
                        if len(task.cpu_util) > 0 and len(tsk.cpu_util) > 0:
                            rewards[
                                task.
                                episode_time] += self.params.interference_penalty_cpu * (
                                    task.cpu_util[-1] * tsk.cpu_util[-1])
                        if len(task.mem_util) > 0 and len(tsk.mem_util) > 0:
                            rewards[
                                task.
                                episode_time] += self.params.interference_penalty_mem * (
                                    task.mem_util[-1] * tsk.mem_util[-1])
                        # print('Other : ', self.params.interference_penalty * (task.cpu_util[-1] * tsk.cpu_util[-1]))
            # for i in range(len(machine.running_tasks)):
            # 	for j in range(i+1, len(machine.running_tasks)):
            # 		task_i, task_j = machine.running_tasks[i], machine.running_tasks[j]
            # 		# if task_i != task_j and len(task_i.cpu_util) > self.params.hist_wind_len and len(task_j.cpu_util) > self.params.hist_wind_len:
            # 		# 	reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-self.params.hist_wind_len:], task_j.cpu_util[-self.params.hist_wind_len:]))
            # 		if task_i != task_j:
            # 			# m = min(self.params.hist_wind_len, min(len(task_i.cpu_util), len(task_j.cpu_util)))
            # 			# reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-m:], task_j.cpu_util[-m:]))
            # 			reward += self.params.interference_penalty * (task_i.cpu_util[-1] * task_j.cpu_util[-1])
        return rewards