def addReward(self, rnnkey, reward): if self.nn_model: #i.e. no training return assert rnnkey in self.keyedSBatch and rnnkey in self.keyedActionProb state = self.keyedSBatch[rnnkey] action_prob = self.keyedActionProb[rnnkey] action = self.keyedAction[rnnkey] myprint( "Training dataset:", { "input": self.keyedInputParam[rnnkey], "action": self._vActionset[self.keyedAction[rnnkey]], "key": rnnkey, "reward": reward, "action_prob": action_prob.tolist() }) del self.keyedSBatch[rnnkey] del self.keyedActionProb[rnnkey] del self.keyedAction[rnnkey] del self.keyedInputParam[rnnkey] self.r_batch.append(reward) self.entropy_record.append(a3c.compute_entropy(action_prob[0])) self.s_batch.append(state) action_vec = np.zeros(self.a_dim) action_vec[action] = 1 self.a_batch.append(action_vec) if len(self.r_batch) >= TRAIN_SEQ_LEN: # do training once self.saveModel()
def saveModel(self, s_batch, a_batch, r_batch, entropy_record, end_of_video=False): actor_gradient, critic_gradient, td_batch = \ a3c.compute_gradients(s_batch=np.stack(s_batch, axis=0), # ignore the first chuck a_batch=np.vstack(a_batch), # since we don't have the r_batch=np.vstack(r_batch), # control over it terminal=end_of_video, actor=self.actor, critic=self.critic) td_loss = np.mean(td_batch) self.actor_gradient_batch.append(actor_gradient) self.critic_gradient_batch.append(critic_gradient) myprint("====") myprint("Master: Quality: Epoch", self.epoch) myprint("TD_loss", td_loss, "Avg_reward", np.mean(r_batch), "Avg_entropy", np.mean(entropy_record)) myprint("====") summary_str = self.sess.run(self.summary_ops, feed_dict={ self.summary_vars[0]: td_loss, self.summary_vars[1]: np.mean(r_batch), self.summary_vars[2]: np.mean(entropy_record) }) self.writer.add_summary(summary_str, self.epoch) self.writer.flush() self.entropy_record = [] if len(self.actor_gradient_batch) >= GRADIENT_BATCH_SIZE: assert len(self.actor_gradient_batch) == len( self.critic_gradient_batch) for i in range(len(self.actor_gradient_batch)): self.actor.apply_gradients(self.actor_gradient_batch[i]) self.critic.apply_gradients(self.critic_gradient_batch[i]) self.actor_gradient_batch = [] self.critic_gradient_batch = [] self.epoch += 1 if self.epoch % MODEL_SAVE_INTERVAL == 0: # Save the neural net parameters to disk. save_path = self.saver.save( self.sess, self.summary_dir + "/nn_model_ep_" + str(self.epoch) + ".ckpt") myprint("Model saved in file: %s" % save_path) return self.getParams()
def getNextAction(self, rnnkey, state): #peerId and segId are Identifier #pendings_, curbufs_, pbdelay_, uploaded_, lastDlAt_, players_, deadline = state # lastPlayerId_, lastQl_, lastClens_, lastStartsAt_, lastFinishAt_, pendings_, deadline = state thrpt_, lastQl_, lastClens_, clens_, wthrghpt, buf, deadline = state # myprint("thrpt_:", thrpt_, '\n'," lastQl_:", lastQl_, '\n'," lastClens_:", lastClens_, '\n'," clens_:", clens_, '\n'," wthrghpt:", wthrghpt, '\n'," buf:", buf, '\n'," deadline:", deadline, '\n') inputset = state v_dim = len(thrpt_) # reward is video quality - rebuffer penalty - smooth penalty # retrieve previous state if len(self.s_batch) == 0: state = np.zeros((self._vInfoDim, self._vInfoDept)) else: state = np.array(self.s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) state[0, :len(thrpt_)] = thrpt_ state[1, :len(lastQl_)] = lastQl_ state[2, :len(lastClens_)] = lastClens_ state[3, :len(clens_)] = clens_ state[4, -1] = wthrghpt state[5, -1] = buf state[6, -1] = deadline reshapedInput = np.reshape(state, (1, self._vInfoDim, self._vInfoDept)) action_prob = self.actor.predict(reshapedInput) action_cumsum = np.cumsum(action_prob) action = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() myprint("action:", action, "action cumsum:", action_cumsum.tolist(), "reshapedInput:", reshapedInput.tolist()) # for i, x in enumerate(state): # if np.count_nonzero(x) <= 0: # myprint("Some error=======================================") # myprint(f"\033[1;31mError in param {i}\033[m") for x in action_prob[0]: if math.isnan(x): myprint(inputset, "batch len", len(self.s_batch), "actor out", self.actor.out) assert not math.isnan(x) # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states if not self.nn_model: #i.e. only for training self.keyedSBatch[rnnkey] = state self.keyedActionProb[rnnkey] = action_prob self.keyedAction[rnnkey] = action self.keyedInputParam[rnnkey] = inputset return self._vActionset[action]
def stopAbr(self): self.count -= 1 if self.count != 0: return if self.proc: self._rSend(("cleanup", None, None)) self.send = None pop = self._rRecv() self.recv = None self.proc.join() self.proc = None AbrPensieveClass.__instance = None myprint("=" * 30, "cleaned up", "=" * 30, sep="\n") # import pdb; pdb.set_trace() return pop
def handle(self, recv, send, *args, **kwargs): self.orig = AbrPensieveClassProc(*args, **kwargs) funcs = { func: getattr(self.orig, func) for func in dir(self.orig) if not func.startswith("__") and not func.endswith("__") and callable(getattr(self.orig, func)) } while True: func, args, kwargs = recv.get() try: if func in funcs: res = funcs[func](*args, **kwargs) send.put({"st": True, "res": res}) elif func == "cleanup": send.put({"st": True, "res": "exit"}) myprint("proc cleanup") recv = None send = None return else: send.put({"st": True, "res": None}) myprint("unknown:", func, args, kwargs) except: trace = sys.exc_info() simpTrace = getTraceBack(trace) send.put({"st": False, "trace": simpTrace}) myprint(simpTrace)
def _rFinish(self): if self._vDead: return # assert self.playbackTime > 0 if self._vAbr and "stopAbr" in dir(self._vAbr) and callable( self._vAbr.stopAbr): self._vAbr.stopAbr() self._vFinished = True self._vBufferLenOverTime.append((self._vEnv.getNow(), 0)) self._vQualitiesPlayedOverTime.append((self._vEnv.getNow(), 0, -1)) myprint("Simulation finished at:", self._vEnv.getNow(), "totalStallTime:", self._vTotalStallTime, "startUpDelay:", self._vStartUpDelay, "firstSegDlTime:", self._vFirstSegmentDlTime, "segSkipped:", self._vSegmentSkiped) myprint("QoE:", self._rCalculateQoE()) myprint("stallTime:", self._vStallsAt)
def _rFinish(self): myprint(self._vTraceFile) self._vAgent._rFinish() self._vFinished = True
def _rRecv(self): dt = self.recv.get(timeout=60) if not dt.get("st", False): myprint(dt.get("trace", "")) raise Exception(dt.get("trace", "")) return dt["res"]
def saveModel(self, end_of_video=False): if self.ipcQueue: self.ipcQueue[0].put({ "id": self.ipcId, "cmd": IPC_CMD_UPDATE, "pid": self.pid, "data": [ self.s_batch, self.a_batch, self.r_batch, self.entropy_record, end_of_video ] }) res = None while True: res = self.ipcQueue[1].get() pid = res["pid"] res = res["res"] if pid == self.pid: break actor_net_params, critic_net_params = res self.actor.set_network_params(actor_net_params) self.critic.set_network_params(critic_net_params) del self.s_batch[:] del self.a_batch[:] del self.r_batch[:] del self.entropy_record[:] return actor_gradient, critic_gradient, td_batch = \ a3c.compute_gradients(s_batch=np.stack(self.s_batch, axis=0), # ignore the first chuck a_batch=np.vstack(self.a_batch), # since we don't have the r_batch=np.vstack(self.r_batch), # control over it terminal=end_of_video, actor=self.actor, critic=self.critic) td_loss = np.mean(td_batch) self.actor_gradient_batch.append(actor_gradient) self.critic_gradient_batch.append(critic_gradient) myprint("====") myprint("Quality: Epoch", self.epoch) myprint("TD_loss", td_loss, "Avg_reward", np.mean(self.r_batch), "Avg_entropy", np.mean(self.entropy_record)) myprint("====") summary_str = self.sess.run(self.summary_ops, feed_dict={ self.summary_vars[0]: td_loss, self.summary_vars[1]: np.mean(self.r_batch), self.summary_vars[2]: np.mean(self.entropy_record) }) self.writer.add_summary(summary_str, self.epoch) self.writer.flush() self.entropy_record = [] if len(self.actor_gradient_batch) >= GRADIENT_BATCH_SIZE: assert len(self.actor_gradient_batch) == len( self.critic_gradient_batch) for i in range(len(self.actor_gradient_batch)): self.actor.apply_gradients(self.actor_gradient_batch[i]) self.critic.apply_gradients(self.critic_gradient_batch[i]) self.actor_gradient_batch = [] self.critic_gradient_batch = [] self.epoch += 1 if self.epoch % MODEL_SAVE_INTERVAL == 0: # Save the neural net parameters to disk. save_path = self.saver.save( self.sess, self.summary_dir + "/nn_model_ep_" + str(self.epoch) + ".ckpt") myprint("Model saved in file: %s" % save_path) del self.s_batch[:] del self.a_batch[:] del self.r_batch[:]
def __init__(self, actionset=[], infoDept=S_LEN, infoDim=S_INFO, log_path=None, summary_dir=None, nn_model=None): assert summary_dir myprint("Central init Params:", actionset, infoDept, log_path, summary_dir, nn_model) self.summary_dir = summary_dir #os.path.join(summary_dir, "rnnQuality") self.nn_model = nn_model self.a_dim = len(actionset) self._vActionset = actionset self._vInfoDim = infoDim self._vInfoDept = infoDept if not os.path.exists(self.summary_dir): os.makedirs(self.summary_dir) self.sess = tf.Session() # log_file = open(os.path.join(log_path, "PensiveLearner", "wb")) self.actor = a3c.ActorNetwork( self.sess, state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim, learning_rate=ACTOR_LR_RATE) self.critic = a3c.CriticNetwork( self.sess, state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim, learning_rate=CRITIC_LR_RATE) self.summary_ops, self.summary_vars = a3c.build_summaries() self.sess.run(tf.global_variables_initializer()) self.writer = tf.summary.FileWriter( self.summary_dir, self.sess.graph) # training monitor self.saver = tf.train.Saver() # save neural net parameters self.epoch = 0 # restore neural net parameters if self.nn_model is None: nn_model, epoch = guessSavedSession(self.summary_dir) if nn_model: self.nn_model = nn_model self.epoch = epoch # nn_model = NN_MODEL if self.nn_model is not None: # nn_model is the path to file self.saver.restore(self.sess, self.nn_model) myprint("Model restored.") self.actor_gradient_batch = [] self.critic_gradient_batch = []
def __init__(self, actionset=[], infoDept=S_LEN, infoDim=S_INFO, log_path=None, summary_dir=None, nn_model=None, ipcQueue=None, ipcId=None): assert summary_dir assert (not ipcQueue and not ipcId) or (ipcQueue and ipcId) myprint("Pensieproc init Params:", actionset, infoDept, log_path, summary_dir, nn_model) self.ipcQueue = ipcQueue self.pid = os.getpid() self.ipcId = ipcId self.summary_dir = os.path.join(summary_dir, "rnnQuality") self.nn_model = None if not nn_model else os.path.join( self.summary_dir, nn_model) self.a_dim = len(actionset) self._vActionset = actionset self._vInfoDim = infoDim self._vInfoDept = infoDept if not os.path.exists(self.summary_dir): os.makedirs(self.summary_dir) self.sess = tf.Session() # log_file = open(os.path.join(log_path, "PensiveLearner", "wb")) self.actor = a3c.ActorNetwork( self.sess, state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim, learning_rate=ACTOR_LR_RATE) self.critic = a3c.CriticNetwork( self.sess, state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim, learning_rate=CRITIC_LR_RATE) self.summary_ops, self.summary_vars = a3c.build_summaries() self.sess.run(tf.global_variables_initializer()) self.writer = tf.summary.FileWriter( self.summary_dir, self.sess.graph) # training monitor self.saver = tf.train.Saver() # save neural net parameters # restore neural net parameters self.epoch = 0 if self.nn_model is None and not self.ipcQueue: nn_model, epoch = guessSavedSession(self.summary_dir) if nn_model: self.nn_model = nn_model self.epoch = epoch # nn_model = NN_MODEL if self.nn_model is not None and not self.ipcQueue: # nn_model is the path to file self.saver.restore(self.sess, self.nn_model) myprint("Model restored with `" + self.nn_model + "'") if self.ipcQueue: self.ipcQueue[0].put({ "id": self.ipcId, "pid": self.pid, "cmd": IPC_CMD_PARAM }) myprint("=" * 50) myprint(self.ipcId, ": waiting for ipc") myprint("=" * 50) res = None while True: res = self.ipcQueue[1].get() pid = res["pid"] res = res["res"] if pid == self.pid: break actor_net_params, critic_net_params = res self.actor.set_network_params(actor_net_params) self.critic.set_network_params(critic_net_params) myprint("=" * 50) myprint(self.ipcId, ": ipcOver") myprint("=" * 50) self.s_batch = [] self.a_batch = [] self.r_batch = [] self.entropy_record = [] self.actor_gradient_batch = [] self.critic_gradient_batch = [] self.keyedSBatch = {} self.keyedActionProb = {} self.keyedAction = {} self.keyedInputParam = {}