def genDAG(cls, alpha=0.2, taskCount=30): ENVs = [] DAG = CybershakeDAGGen.getDAG() for i in range(100): CybershakeWorklfow = Workflow(taskCount=taskCount, alpha=alpha, DAG=DAG) env = Environment(taskCount=taskCount, save=False, alpha=alpha, workflow=CybershakeWorklfow) ENVs.append(env) dbfile = open('Cybershake-' + str(alpha) + '-ENVs', 'wb') pickle.dump(ENVs, dbfile) dbfile.close()
from Env.Workflow import Workflow from Env.Environment import Environment import numpy as np import pickle taskCount = 30 alpha = 0.8 ENVs = [] with open('./LIGO-Origin', 'rb') as file: OriginWorkflow = pickle.load(file) for i in range(100): # used for other env = Environment(taskCount=taskCount, save=False, alpha=alpha, workflow=OriginWorkflow[i].workflow) # used for LIGO # env = Environment(taskCount=taskCount, save=False, alpha=alpha, workflow=OriginWorkflow[i]) ENVs.append(env) dbfile = open('LIGO-' + str(alpha) + '-ENVs', 'wb') pickle.dump(ENVs, dbfile) dbfile.close() #name = 'Epig' #alpha = 0.8 #ENVs = [] #DAG = XMLtoDAG("Epigenomics_24.xml", taskCount=24).getDAG() #
from Env.Environment import Environment import numpy as np import time env = Environment(taskCount=10, alpha=0.5) env.workflow.print() cpt = env.workflow.CPTime print('cptime:', cpt) print('forwardCP:', env.workflow.forwardCP) print('DL: ', env.workflowbak.DeadLine) print('\n\n') t = time.time() actions = [] while True: taskNos = env.getNewTasks() if len(taskNos) == 0: env.spanTimeProcess() done, reward = env.isDone() if done: ob = env.getObservation() print('Actions: ', actions, 'Reward: ', reward) print('DeadLine:', env.workflow.DeadLine, ' Makespan: ', env.currentTime) print('Cost: ', env.totalCost) break else:
def run(self): POPULATE_FLAG = True #MEMORY_SIZE = self.shared['memory'].capacity MEMORY_SIZE = self.MEMORY_SIZE #POPULATE_MAX = MEMORY_SIZE POPULATE_MAX = 1 populate_num = 0 #PLOT_FREQ = int(MEMORY_SIZE/32) PLOT_FREQ = 20 R = 0. # for plotting step = 0 Repi = 0 self.env = Environment() self.env.set_all() self.env.reset() last_time = time.time() prev_plot = self.steps_done pretrain = True #PRETRAIN_MAX = self.MEMORY_SIZE // 2 PRETRAIN_MAX = 2000 if os.path.isfile('save/checkpoint_improver.pth.tar'): print("=> loading checkpoint '{}'".format('save/checkpoint_improver.pth.tar')) checkpoint = torch.load('save/checkpoint_improver.pth.tar') print("=> loaded checkpoint '{}'" .format('save/checkpoint_improver.pth.tar')) else: print("=> no checkpoint found at '{}'".format('save/checkpoint_improver.pth.tar')) while True: if POPULATE_FLAG: if pretrain: if populate_num == PRETRAIN_MAX: pretrain = False self.shared['SENT_FLAG'] = 0 POPULATE_FLAG = False else: if populate_num == POPULATE_MAX: # reset populate num self.shared['SENT_FLAG'] = 0 POPULATE_FLAG = False else: if self.shared['SENT_FLAG']: # evaluator sent the weights print('copying...') self.net.load_state_dict(self.shared['weights']) POPULATE_FLAG = True populate_num = 0 # after evaluating the policy for one round, set the epislon smaller next_step = self.steps_done + 1 if next_step != 0: # loop to back # then set eps_threshold to a small value self.steps_done = next_step self.save_checkpoint({ 'state_dict': self.net.state_dict(), }) #print('loop took {0} seconds' . format(time.time()-last_time)) state = self.env.get_state() # this is to avoid time delay last_time = time.time() # 0.003s state_torch = torch.from_numpy(state).type(FloatTensor) # convert to torch and normalize state_torch = state_torch.unsqueeze(0).type(FloatTensor) action = self.behavior_policy(state_torch) next_state, r, done = self.env.step(action) # 0.03s if len(self.memory) == MEMORY_SIZE: self.memory.pop(0) self.memory.append((state, [action], [r], \ next_state, [1-done]) ) #print(len(self.memory)) # 0.001s if POPULATE_FLAG: populate_num += 1 # plot the average reward in PLOT_FREQ episodes Repi += r if done: step += 1 R += Repi Repi = 0. # reset episode reward if step and self.steps_done > prev_plot: print('average rewards after {} train: {}' . format(self.steps_done-prev_plot, R/step)) self.plotter.plot_train_rewards(R/step) # 2-3s R = 0. step = 0 prev_plot = self.steps_done self.plotter.terminate()
class Improver(object): def __init__(self, net, memory_size, memory, shared): self.net = net # Deep Net self.memory = memory self.shared = shared # shared resources, {'memory', 'SENT_FLAG', 'weights'} #self.env = env self.plotter = Plotter(folder='DQN/plot/cartpole_simple/exp4') self.steps_done = 0 # hyperparameters: self.EPS_START = 1. self.EPS_END = 0.05 self.EPS_DECAY = 50 # DECAY larger: slower self.MEMORY_SIZE = memory_size def behavior_policy(self, state): # We can add epislon here to decay the randomness # We store the tensor of size 1x1 sample = random.random() eps_threshold = self.EPS_END + (self.EPS_START - self.EPS_END) * \ math.exp(-1. * self.steps_done / self.EPS_DECAY) #print('threshold: {}' . format(eps_threshold)) if sample > eps_threshold: return self.policy(state) else: #return LongTensor([[self.env.action_space.sample()]]) return self.env.action_space.sample() def policy(self, state): # return tensor of size 1x1 res = self.net(Variable(state, volatile=True).type(FloatTensor)).data #print('policy values: {}, {}' . format(res[0][0], res[0][1])) #print('policy value: {}' . format(res.max(1)[0][0])) return res.max(1)[1][0] #return res.max(1)[1].view(1,1) #return self.net(Variable(state, volatile=True).type(FloatTensor))\ # .data.max(1)[1].view(1,1) def save_checkpoint(self, state, filename='save/checkpoint_improver.pth.tar'): torch.save(state, filename) def run(self): POPULATE_FLAG = True #MEMORY_SIZE = self.shared['memory'].capacity MEMORY_SIZE = self.MEMORY_SIZE #POPULATE_MAX = MEMORY_SIZE POPULATE_MAX = 1 populate_num = 0 #PLOT_FREQ = int(MEMORY_SIZE/32) PLOT_FREQ = 20 R = 0. # for plotting step = 0 Repi = 0 self.env = Environment() self.env.set_all() self.env.reset() last_time = time.time() prev_plot = self.steps_done pretrain = True #PRETRAIN_MAX = self.MEMORY_SIZE // 2 PRETRAIN_MAX = 2000 if os.path.isfile('save/checkpoint_improver.pth.tar'): print("=> loading checkpoint '{}'".format('save/checkpoint_improver.pth.tar')) checkpoint = torch.load('save/checkpoint_improver.pth.tar') print("=> loaded checkpoint '{}'" .format('save/checkpoint_improver.pth.tar')) else: print("=> no checkpoint found at '{}'".format('save/checkpoint_improver.pth.tar')) while True: if POPULATE_FLAG: if pretrain: if populate_num == PRETRAIN_MAX: pretrain = False self.shared['SENT_FLAG'] = 0 POPULATE_FLAG = False else: if populate_num == POPULATE_MAX: # reset populate num self.shared['SENT_FLAG'] = 0 POPULATE_FLAG = False else: if self.shared['SENT_FLAG']: # evaluator sent the weights print('copying...') self.net.load_state_dict(self.shared['weights']) POPULATE_FLAG = True populate_num = 0 # after evaluating the policy for one round, set the epislon smaller next_step = self.steps_done + 1 if next_step != 0: # loop to back # then set eps_threshold to a small value self.steps_done = next_step self.save_checkpoint({ 'state_dict': self.net.state_dict(), }) #print('loop took {0} seconds' . format(time.time()-last_time)) state = self.env.get_state() # this is to avoid time delay last_time = time.time() # 0.003s state_torch = torch.from_numpy(state).type(FloatTensor) # convert to torch and normalize state_torch = state_torch.unsqueeze(0).type(FloatTensor) action = self.behavior_policy(state_torch) next_state, r, done = self.env.step(action) # 0.03s if len(self.memory) == MEMORY_SIZE: self.memory.pop(0) self.memory.append((state, [action], [r], \ next_state, [1-done]) ) #print(len(self.memory)) # 0.001s if POPULATE_FLAG: populate_num += 1 # plot the average reward in PLOT_FREQ episodes Repi += r if done: step += 1 R += Repi Repi = 0. # reset episode reward if step and self.steps_done > prev_plot: print('average rewards after {} train: {}' . format(self.steps_done-prev_plot, R/step)) self.plotter.plot_train_rewards(R/step) # 2-3s R = 0. step = 0 prev_plot = self.steps_done self.plotter.terminate()
from learn_Scientific import RLagent_Scientific from Env.Environment import Environment, Workflow from ScientificWorkflow.XMLProcess import XMLtoDAG import copy taskCount = 24 alpha = 0.4 DAG = XMLtoDAG("../ScientificWorkflow/Epigenomics_24.xml", taskCount=taskCount).getDAG() montageWorklfow = Workflow(taskCount=taskCount, alpha=alpha, DAG=DAG) env = Environment(taskCount=taskCount, alpha=alpha, workflow=montageWorklfow) mt = RLagent_Scientific(env, taskCount, alpha, hiddenSize=60, perfix='Epig') mt.epsilon = 0.3 mt.epsilon_end = 0.05 mt.epsilon_decay = 200 mt.train()
from model import DQN from Env.Environment import Environment import torch import torch.autograd as autograd from utils import sample_action import numpy as np from ICPCP import ICPCP the_model = torch.load('./tmp/100-0.8.pth', map_location='cpu') dqn = DQN() dqn.load_state_dict(the_model) env = Environment(taskCount=100, alpha=0.8) done = False var_phi = autograd.Variable(torch.Tensor(6), volatile=True) RL_fail = 0 RL_cost = [] Random_fail = 0 Random_cost = [] for i in range(100): RL_actions = [] He_actions = [] while True: taskNos = env.getNewTasks() if len(taskNos) == 0: env.spanTimeProcess() done, r = env.isDone() else:
from learn_Scientific import RLagent_Scientific from Env.Environment import Environment, Workflow from ScientificWorkflow import generateCybershakeENV from ScientificWorkflow.XMLProcess import XMLtoDAG import copy import numpy as np taskCount = 30 alpha = 0.6 DAG = generateCybershakeENV.CybershakeDAGGen.getDAG() wf = Workflow(taskCount=taskCount, alpha=alpha, DAG=DAG) env = Environment(taskCount=taskCount, alpha=alpha, workflow=wf) mt = RLagent_Scientific(env, taskCount, alpha, hiddenSize=80, perfix='CyberShake') mt.epsilon = 0.3 mt.epsilon_end = 0.05 mt.epsilon_decay = 200 mt.train()