示例#1
0
    def addReward(self, rnnkey, reward):
        if self.nn_model:  #i.e. no training
            return
        assert rnnkey in self.keyedSBatch and rnnkey in self.keyedActionProb

        state = self.keyedSBatch[rnnkey]
        action_prob = self.keyedActionProb[rnnkey]
        action = self.keyedAction[rnnkey]

        myprint(
            "Training dataset:", {
                "input": self.keyedInputParam[rnnkey],
                "action": self._vActionset[self.keyedAction[rnnkey]],
                "key": rnnkey,
                "reward": reward,
                "action_prob": action_prob.tolist()
            })
        del self.keyedSBatch[rnnkey]
        del self.keyedActionProb[rnnkey]
        del self.keyedAction[rnnkey]
        del self.keyedInputParam[rnnkey]

        self.r_batch.append(reward)

        self.entropy_record.append(a3c.compute_entropy(action_prob[0]))

        self.s_batch.append(state)

        action_vec = np.zeros(self.a_dim)
        action_vec[action] = 1
        self.a_batch.append(action_vec)

        if len(self.r_batch) >= TRAIN_SEQ_LEN:  # do training once
            self.saveModel()
示例#2
0
    def saveModel(self,
                  s_batch,
                  a_batch,
                  r_batch,
                  entropy_record,
                  end_of_video=False):
        actor_gradient, critic_gradient, td_batch = \
            a3c.compute_gradients(s_batch=np.stack(s_batch, axis=0),  # ignore the first chuck
                                  a_batch=np.vstack(a_batch),  # since we don't have the
                                  r_batch=np.vstack(r_batch),  # control over it
                                  terminal=end_of_video, actor=self.actor, critic=self.critic)
        td_loss = np.mean(td_batch)

        self.actor_gradient_batch.append(actor_gradient)
        self.critic_gradient_batch.append(critic_gradient)

        myprint("====")
        myprint("Master: Quality: Epoch", self.epoch)
        myprint("TD_loss", td_loss, "Avg_reward", np.mean(r_batch),
                "Avg_entropy", np.mean(entropy_record))
        myprint("====")

        summary_str = self.sess.run(self.summary_ops,
                                    feed_dict={
                                        self.summary_vars[0]:
                                        td_loss,
                                        self.summary_vars[1]:
                                        np.mean(r_batch),
                                        self.summary_vars[2]:
                                        np.mean(entropy_record)
                                    })

        self.writer.add_summary(summary_str, self.epoch)
        self.writer.flush()

        self.entropy_record = []

        if len(self.actor_gradient_batch) >= GRADIENT_BATCH_SIZE:

            assert len(self.actor_gradient_batch) == len(
                self.critic_gradient_batch)

            for i in range(len(self.actor_gradient_batch)):
                self.actor.apply_gradients(self.actor_gradient_batch[i])
                self.critic.apply_gradients(self.critic_gradient_batch[i])

            self.actor_gradient_batch = []
            self.critic_gradient_batch = []

            self.epoch += 1
            if self.epoch % MODEL_SAVE_INTERVAL == 0:
                # Save the neural net parameters to disk.
                save_path = self.saver.save(
                    self.sess, self.summary_dir + "/nn_model_ep_" +
                    str(self.epoch) + ".ckpt")
                myprint("Model saved in file: %s" % save_path)

        return self.getParams()
示例#3
0
    def getNextAction(self, rnnkey, state):  #peerId and segId are Identifier

        #pendings_, curbufs_, pbdelay_, uploaded_, lastDlAt_, players_, deadline = state
        #         lastPlayerId_, lastQl_, lastClens_, lastStartsAt_, lastFinishAt_, pendings_, deadline = state
        thrpt_, lastQl_, lastClens_, clens_, wthrghpt, buf, deadline = state
        #         myprint("thrpt_:", thrpt_, '\n'," lastQl_:",  lastQl_, '\n'," lastClens_:",  lastClens_, '\n'," clens_:",  clens_, '\n'," wthrghpt:",  wthrghpt, '\n'," buf:",  buf, '\n'," deadline:",  deadline, '\n')
        inputset = state

        v_dim = len(thrpt_)

        # reward is video quality - rebuffer penalty - smooth penalty
        # retrieve previous state
        if len(self.s_batch) == 0:
            state = np.zeros((self._vInfoDim, self._vInfoDept))
        else:
            state = np.array(self.s_batch[-1], copy=True)

        # dequeue history record
        state = np.roll(state, -1, axis=1)

        state[0, :len(thrpt_)] = thrpt_
        state[1, :len(lastQl_)] = lastQl_
        state[2, :len(lastClens_)] = lastClens_
        state[3, :len(clens_)] = clens_
        state[4, -1] = wthrghpt
        state[5, -1] = buf
        state[6, -1] = deadline

        reshapedInput = np.reshape(state, (1, self._vInfoDim, self._vInfoDept))
        action_prob = self.actor.predict(reshapedInput)
        action_cumsum = np.cumsum(action_prob)
        action = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                  float(RAND_RANGE)).argmax()
        myprint("action:", action, "action cumsum:", action_cumsum.tolist(),
                "reshapedInput:", reshapedInput.tolist())

        #         for i, x in enumerate(state):
        #             if np.count_nonzero(x) <= 0:
        #                 myprint("Some error=======================================")
        #                 myprint(f"\033[1;31mError in param {i}\033[m")

        for x in action_prob[0]:
            if math.isnan(x):
                myprint(inputset, "batch len", len(self.s_batch), "actor out",
                        self.actor.out)
            assert not math.isnan(x)
        # Note: we need to discretize the probability into 1/RAND_RANGE steps,
        # because there is an intrinsic discrepancy in passing single state and batch states

        if not self.nn_model:  #i.e. only for training
            self.keyedSBatch[rnnkey] = state
            self.keyedActionProb[rnnkey] = action_prob
            self.keyedAction[rnnkey] = action
            self.keyedInputParam[rnnkey] = inputset

        return self._vActionset[action]
示例#4
0
文件: Pensiev.py 项目: abhimp/FLiDASH
    def stopAbr(self):

        self.count -= 1
        if self.count != 0:
            return

        if self.proc:
            self._rSend(("cleanup", None, None))
            self.send = None
            pop = self._rRecv()
            self.recv = None
            self.proc.join()
            self.proc = None
            AbrPensieveClass.__instance = None
            myprint("=" * 30, "cleaned up", "=" * 30, sep="\n")
            #             import pdb; pdb.set_trace()
            return pop
示例#5
0
文件: Pensiev.py 项目: abhimp/FLiDASH
    def handle(self, recv, send, *args, **kwargs):
        self.orig = AbrPensieveClassProc(*args, **kwargs)
        funcs = {
            func: getattr(self.orig, func)
            for func in dir(self.orig) if not func.startswith("__")
            and not func.endswith("__") and callable(getattr(self.orig, func))
        }
        while True:
            func, args, kwargs = recv.get()
            try:
                if func in funcs:
                    res = funcs[func](*args, **kwargs)
                    send.put({"st": True, "res": res})
                elif func == "cleanup":
                    send.put({"st": True, "res": "exit"})
                    myprint("proc cleanup")
                    recv = None
                    send = None
                    return
                else:
                    send.put({"st": True, "res": None})
                    myprint("unknown:", func, args, kwargs)

            except:
                trace = sys.exc_info()
                simpTrace = getTraceBack(trace)
                send.put({"st": False, "trace": simpTrace})
                myprint(simpTrace)
示例#6
0
 def _rFinish(self):
     if self._vDead: return
     #         assert self.playbackTime > 0
     if self._vAbr and "stopAbr" in dir(self._vAbr) and callable(
             self._vAbr.stopAbr):
         self._vAbr.stopAbr()
     self._vFinished = True
     self._vBufferLenOverTime.append((self._vEnv.getNow(), 0))
     self._vQualitiesPlayedOverTime.append((self._vEnv.getNow(), 0, -1))
     myprint("Simulation finished at:", self._vEnv.getNow(),
             "totalStallTime:", self._vTotalStallTime, "startUpDelay:",
             self._vStartUpDelay, "firstSegDlTime:",
             self._vFirstSegmentDlTime, "segSkipped:", self._vSegmentSkiped)
     myprint("QoE:", self._rCalculateQoE())
     myprint("stallTime:", self._vStallsAt)
示例#7
0
文件: Simple.py 项目: abhimp/FLiDASH
 def _rFinish(self):
     myprint(self._vTraceFile)
     self._vAgent._rFinish()
     self._vFinished = True
示例#8
0
文件: Pensiev.py 项目: abhimp/FLiDASH
 def _rRecv(self):
     dt = self.recv.get(timeout=60)
     if not dt.get("st", False):
         myprint(dt.get("trace", ""))
         raise Exception(dt.get("trace", ""))
     return dt["res"]
示例#9
0
    def saveModel(self, end_of_video=False):
        if self.ipcQueue:
            self.ipcQueue[0].put({
                "id":
                self.ipcId,
                "cmd":
                IPC_CMD_UPDATE,
                "pid":
                self.pid,
                "data": [
                    self.s_batch, self.a_batch, self.r_batch,
                    self.entropy_record, end_of_video
                ]
            })
            res = None
            while True:
                res = self.ipcQueue[1].get()
                pid = res["pid"]
                res = res["res"]
                if pid == self.pid:
                    break
            actor_net_params, critic_net_params = res
            self.actor.set_network_params(actor_net_params)
            self.critic.set_network_params(critic_net_params)

            del self.s_batch[:]
            del self.a_batch[:]
            del self.r_batch[:]
            del self.entropy_record[:]

            return

        actor_gradient, critic_gradient, td_batch = \
            a3c.compute_gradients(s_batch=np.stack(self.s_batch, axis=0),  # ignore the first chuck
                                  a_batch=np.vstack(self.a_batch),  # since we don't have the
                                  r_batch=np.vstack(self.r_batch),  # control over it
                                  terminal=end_of_video, actor=self.actor, critic=self.critic)
        td_loss = np.mean(td_batch)

        self.actor_gradient_batch.append(actor_gradient)
        self.critic_gradient_batch.append(critic_gradient)

        myprint("====")
        myprint("Quality: Epoch", self.epoch)
        myprint("TD_loss", td_loss, "Avg_reward", np.mean(self.r_batch),
                "Avg_entropy", np.mean(self.entropy_record))
        myprint("====")

        summary_str = self.sess.run(self.summary_ops,
                                    feed_dict={
                                        self.summary_vars[0]:
                                        td_loss,
                                        self.summary_vars[1]:
                                        np.mean(self.r_batch),
                                        self.summary_vars[2]:
                                        np.mean(self.entropy_record)
                                    })

        self.writer.add_summary(summary_str, self.epoch)
        self.writer.flush()

        self.entropy_record = []

        if len(self.actor_gradient_batch) >= GRADIENT_BATCH_SIZE:

            assert len(self.actor_gradient_batch) == len(
                self.critic_gradient_batch)

            for i in range(len(self.actor_gradient_batch)):
                self.actor.apply_gradients(self.actor_gradient_batch[i])
                self.critic.apply_gradients(self.critic_gradient_batch[i])

            self.actor_gradient_batch = []
            self.critic_gradient_batch = []

            self.epoch += 1
            if self.epoch % MODEL_SAVE_INTERVAL == 0:
                # Save the neural net parameters to disk.
                save_path = self.saver.save(
                    self.sess, self.summary_dir + "/nn_model_ep_" +
                    str(self.epoch) + ".ckpt")
                myprint("Model saved in file: %s" % save_path)

        del self.s_batch[:]
        del self.a_batch[:]
        del self.r_batch[:]
示例#10
0
    def __init__(self,
                 actionset=[],
                 infoDept=S_LEN,
                 infoDim=S_INFO,
                 log_path=None,
                 summary_dir=None,
                 nn_model=None):

        assert summary_dir
        myprint("Central init Params:", actionset, infoDept, log_path,
                summary_dir, nn_model)
        self.summary_dir = summary_dir  #os.path.join(summary_dir, "rnnQuality")
        self.nn_model = nn_model

        self.a_dim = len(actionset)
        self._vActionset = actionset

        self._vInfoDim = infoDim
        self._vInfoDept = infoDept

        if not os.path.exists(self.summary_dir):
            os.makedirs(self.summary_dir)

        self.sess = tf.Session()
        #         log_file = open(os.path.join(log_path, "PensiveLearner", "wb"))

        self.actor = a3c.ActorNetwork(
            self.sess,
            state_dim=[self._vInfoDim, self._vInfoDept],
            action_dim=self.a_dim,
            learning_rate=ACTOR_LR_RATE)

        self.critic = a3c.CriticNetwork(
            self.sess,
            state_dim=[self._vInfoDim, self._vInfoDept],
            action_dim=self.a_dim,
            learning_rate=CRITIC_LR_RATE)

        self.summary_ops, self.summary_vars = a3c.build_summaries()

        self.sess.run(tf.global_variables_initializer())
        self.writer = tf.summary.FileWriter(
            self.summary_dir, self.sess.graph)  # training monitor
        self.saver = tf.train.Saver()  # save neural net parameters

        self.epoch = 0

        # restore neural net parameters
        if self.nn_model is None:
            nn_model, epoch = guessSavedSession(self.summary_dir)
            if nn_model:
                self.nn_model = nn_model
                self.epoch = epoch


#         nn_model = NN_MODEL
        if self.nn_model is not None:  # nn_model is the path to file
            self.saver.restore(self.sess, self.nn_model)
            myprint("Model restored.")

        self.actor_gradient_batch = []
        self.critic_gradient_batch = []
示例#11
0
    def __init__(self,
                 actionset=[],
                 infoDept=S_LEN,
                 infoDim=S_INFO,
                 log_path=None,
                 summary_dir=None,
                 nn_model=None,
                 ipcQueue=None,
                 ipcId=None):
        assert summary_dir
        assert (not ipcQueue and not ipcId) or (ipcQueue and ipcId)
        myprint("Pensieproc init Params:", actionset, infoDept, log_path,
                summary_dir, nn_model)

        self.ipcQueue = ipcQueue
        self.pid = os.getpid()
        self.ipcId = ipcId
        self.summary_dir = os.path.join(summary_dir, "rnnQuality")
        self.nn_model = None if not nn_model else os.path.join(
            self.summary_dir, nn_model)

        self.a_dim = len(actionset)
        self._vActionset = actionset

        self._vInfoDim = infoDim
        self._vInfoDept = infoDept

        if not os.path.exists(self.summary_dir):
            os.makedirs(self.summary_dir)

        self.sess = tf.Session()
        #         log_file = open(os.path.join(log_path, "PensiveLearner", "wb"))

        self.actor = a3c.ActorNetwork(
            self.sess,
            state_dim=[self._vInfoDim, self._vInfoDept],
            action_dim=self.a_dim,
            learning_rate=ACTOR_LR_RATE)

        self.critic = a3c.CriticNetwork(
            self.sess,
            state_dim=[self._vInfoDim, self._vInfoDept],
            action_dim=self.a_dim,
            learning_rate=CRITIC_LR_RATE)

        self.summary_ops, self.summary_vars = a3c.build_summaries()

        self.sess.run(tf.global_variables_initializer())
        self.writer = tf.summary.FileWriter(
            self.summary_dir, self.sess.graph)  # training monitor
        self.saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        self.epoch = 0
        if self.nn_model is None and not self.ipcQueue:
            nn_model, epoch = guessSavedSession(self.summary_dir)
            if nn_model:
                self.nn_model = nn_model
                self.epoch = epoch


#         nn_model = NN_MODEL
        if self.nn_model is not None and not self.ipcQueue:  # nn_model is the path to file
            self.saver.restore(self.sess, self.nn_model)
            myprint("Model restored with `" + self.nn_model + "'")

        if self.ipcQueue:
            self.ipcQueue[0].put({
                "id": self.ipcId,
                "pid": self.pid,
                "cmd": IPC_CMD_PARAM
            })
            myprint("=" * 50)
            myprint(self.ipcId, ": waiting for ipc")
            myprint("=" * 50)
            res = None
            while True:
                res = self.ipcQueue[1].get()
                pid = res["pid"]
                res = res["res"]
                if pid == self.pid:
                    break
            actor_net_params, critic_net_params = res
            self.actor.set_network_params(actor_net_params)
            self.critic.set_network_params(critic_net_params)
            myprint("=" * 50)
            myprint(self.ipcId, ": ipcOver")
            myprint("=" * 50)

        self.s_batch = []
        self.a_batch = []
        self.r_batch = []
        self.entropy_record = []

        self.actor_gradient_batch = []
        self.critic_gradient_batch = []

        self.keyedSBatch = {}
        self.keyedActionProb = {}
        self.keyedAction = {}
        self.keyedInputParam = {}