Exemplo n.º 1
0
    def __init__(self, A_DIM, **kws):
        #=====================================
        # SETUP
        #=====================================

        self.sess = tf.Session()

        self.A_DIM = A_DIM

        self.actor = a3c.ActorNetwork(self.sess,
                                      state_dim=[S_INFO, S_LEN],
                                      action_dim=A_DIM,
                                      learning_rate=ACTOR_LR_RATE)
        self.critic = a3c.CriticNetwork(self.sess,
                                        state_dim=[S_INFO, S_LEN],
                                        action_dim=A_DIM,
                                        learning_rate=CRITIC_LR_RATE)

        self.sess.run(tf.initialize_all_variables())
        self.saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            self.saver.restore(self.sess, nn_model)
            print("Model restored.")
Exemplo n.º 2
0
    def __init__(self, actionset = [], infoDept=S_LEN, infoDim=S_INFO, log_path=None, summary_dir=None, nn_model=None):

        assert summary_dir
        myprint("Central init Params:", actionset, infoDept, log_path, summary_dir, nn_model)
        self.summary_dir = os.path.join(summary_dir, "rnnQuality")
        self.nn_model = nn_model

        self.a_dim = len(actionset)
        self._vActionset = actionset

        self._vInfoDim = infoDim
        self._vInfoDept = infoDept


        if not os.path.exists(self.summary_dir):
            os.makedirs(self.summary_dir)

        self.sess = tf.Session()
#         log_file = open(os.path.join(log_path, "PensiveLearner", "wb"))


        self.actor = a3c.ActorNetwork(self.sess,
                                 state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim,
                                 learning_rate=ACTOR_LR_RATE)

        self.critic = a3c.CriticNetwork(self.sess,
                                   state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim,
                                   learning_rate=CRITIC_LR_RATE)

        self.summary_ops, self.summary_vars = a3c.build_summaries()

        self.sess.run(tf.global_variables_initializer())
        self.writer = tf.summary.FileWriter(self.summary_dir, self.sess.graph)  # training monitor
        self.saver = tf.train.Saver()  # save neural net parameters

        self.epoch = 0

        # restore neural net parameters
        if self.nn_model is None:
            nn_model, epoch = guessSavedSession(self.summary_dir)
            if nn_model:
                self.nn_model = nn_model
                self.epoch = epoch

#         nn_model = NN_MODEL
        if self.nn_model is not None:  # nn_model is the path to file
            self.saver.restore(self.sess, self.nn_model)
            myprint("Model restored.")



        self.actor_gradient_batch = []
        self.critic_gradient_batch = []
Exemplo n.º 3
0
    def __init__(self,
                 actionset=[],
                 infoDept=S_LEN,
                 infoDim=S_INFO,
                 log_path=None,
                 summary_dir=None,
                 nn_model=None,
                 ipcQueue=None,
                 ipcId=None,
                 readOnly=False):
        assert summary_dir
        assert (not ipcQueue and not ipcId) or (ipcQueue and ipcId)
        myprint("Pensieproc init Params:", actionset, infoDept, log_path,
                summary_dir, nn_model)

        self.ipcQueue = ipcQueue
        self.pid = os.getpid()
        self.ipcId = ipcId
        self.summary_dir = os.path.join(summary_dir, "rnnBuffer")
        self.nn_model = None if not nn_model else os.path.join(
            self.summary_dir, nn_model)

        self.a_dim = len(actionset)
        self._vActionset = actionset

        self._vInfoDim = infoDim
        self._vInfoDept = infoDept

        self._vReadOnly = readOnly

        if not os.path.exists(self.summary_dir):
            os.makedirs(self.summary_dir)

        self.sess = tf.Session()
        #         log_file = open(os.path.join(log_path, "PensiveLearner", "wb"))

        self.actor = a3c.ActorNetwork(
            self.sess,
            state_dim=[self._vInfoDim, self._vInfoDept],
            action_dim=self.a_dim,
            learning_rate=ACTOR_LR_RATE)

        self.critic = a3c.CriticNetwork(
            self.sess,
            state_dim=[self._vInfoDim, self._vInfoDept],
            action_dim=self.a_dim,
            learning_rate=CRITIC_LR_RATE)

        self.summary_ops, self.summary_vars = a3c.build_summaries()

        self.sess.run(tf.global_variables_initializer())
        self.writer = tf.summary.FileWriter(
            self.summary_dir, self.sess.graph)  # training monitor
        self.saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        self.epoch = 0
        if self.nn_model is None and not self.ipcQueue:
            nn_model, epoch = guessSavedSession(self.summary_dir)
            if nn_model:
                self.nn_model = nn_model
                self.epoch = epoch
#         nn_model = NN_MODEL
        if self.nn_model is not None and not self.ipcQueue:  # nn_model is the path to file
            self.saver.restore(self.sess, self.nn_model)
            myprint("Model restored with `" + self.nn_model + "'")

        if self.ipcQueue:
            self.ipcQueue[0].put({
                "id": self.ipcId,
                "pid": self.pid,
                "cmd": IPC_CMD_PARAM
            })
            myprint("=" * 50)
            myprint(self.ipcId, ": waiting for ipc")
            myprint("=" * 50)
            res = None
            while True:
                res = self.ipcQueue[1].get()
                pid = res["pid"]
                res = res["res"]
                if pid == self.pid:
                    break
            actor_net_params, critic_net_params = res
            self.actor.set_network_params(actor_net_params)
            self.critic.set_network_params(critic_net_params)
            myprint("=" * 50)
            myprint(self.ipcId, ": ipcOver")
            myprint("=" * 50)

        self.s_batch = []
        self.a_batch = []
        self.r_batch = []
        self.entropy_record = []

        self.actor_gradient_batch = []
        self.critic_gradient_batch = []

        self.keyedSBatch = {}
        self.keyedActionProb = {}
        self.keyedAction = {}
        self.keyedInputParam = {}
Exemplo n.º 4
0
    def __init__(self,
                 actionset=[],
                 infoDept=S_LEN,
                 infoDim=S_INFO,
                 log_path=None,
                 summary_dir=None,
                 nn_model=None,
                 readOnly=False,
                 master=False,
                 ipcQueues=None):
        assert summary_dir
        assert readOnly

        assert master == (ipcQueues is not None)

        self.master = master
        cprint.cyan("!!CREATING OBJECT!!")
        if not master:
            cprint.blue("!!SLAVE!!")
            self.recv, self.send = [mp.Queue(), mp.Queue()]
            self.proc = mp.Process(
                target=PensiveLearner,
                args=(actionset, infoDept, infoDim, log_path, summary_dir,
                      nn_model, readOnly, True, [self.send, self.recv]))
            self.proc.start()
            while True:
                cmd = self.recv.get()
                if cmd == "ready":
                    break
            return

        cprint.red("!!MASTER!!")

        self.recv, self.send = ipcQueues

        #         myprint("Pensieproc init Params:", actionset, infoDept, log_path, summary_dir, nn_model)

        self.pid = os.getpid()
        self.summary_dir = summary_dir
        self.nn_model = None if not nn_model else os.path.join(
            self.summary_dir, nn_model)

        self.a_dim = len(actionset)
        self._vActionset = actionset

        self._vInfoDim = infoDim
        self._vInfoDept = infoDept

        self._vReadOnly = readOnly

        if not os.path.exists(self.summary_dir):
            os.makedirs(self.summary_dir)

        self.sess = tf.Session()

        self.actor = a3c.ActorNetwork(
            self.sess,
            state_dim=[self._vInfoDim, self._vInfoDept],
            action_dim=self.a_dim,
            learning_rate=ACTOR_LR_RATE)

        self.critic = a3c.CriticNetwork(
            self.sess,
            state_dim=[self._vInfoDim, self._vInfoDept],
            action_dim=self.a_dim,
            learning_rate=CRITIC_LR_RATE)

        self.summary_ops, self.summary_vars = a3c.build_summaries()

        self.sess.run(tf.global_variables_initializer())
        self.writer = tf.summary.FileWriter(
            self.summary_dir, self.sess.graph)  # training monitor
        self.saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        self.epoch = 0
        if self.nn_model is None and self.master:
            nn_model, epoch = guessSavedSession(self.summary_dir)
            if nn_model:
                self.nn_model = nn_model
                self.epoch = epoch

        if self.nn_model is not None:  # nn_model is the path to file
            self.saver.restore(self.sess, self.nn_model)
            cprint.red("Model restored with `" + self.nn_model + "'")

        try:
            self.runCmd()
        except Exception as ex:
            track = tb.format_exc()
            cprint.red(track)
Exemplo n.º 5
0
    def __init__(self, videoInfo, agent, log_file_path=LOG_FILE, *kw, **kws):
        self.video = None
        #=====================================
        # SETUP
        #=====================================
        log_file = None if not log_file_path else open(log_file_path, 'wb')
        sess = tf.Session()

        A_DIM = len(videoInfo.bitratesKbps)

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   action_dim=A_DIM,
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.initialize_all_variables())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            saver.restore(sess, nn_model)
            print("Model restored.")

        init_action = np.zeros(A_DIM)
        init_action[DEFAULT_QUALITY] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [init_action]
        r_batch = []

        train_counter = 0

        # need this storage, because observation only contains total rebuffering time
        # we compute the difference to get

        input_dict = {
            'sess': sess,
            'log_file': log_file,
            'actor': actor,
            'critic': critic,
            'saver': saver,
            'train_counter': train_counter,
            's_batch': s_batch,
            'a_batch': a_batch,
            'r_batch': r_batch,
        }

        #=====================================
        # INIT
        #=====================================

        self.input_dict = input_dict
        self.sess = input_dict['sess']
        self.log_file = input_dict['log_file']
        self.actor = input_dict['actor']
        self.critic = input_dict['critic']
        self.saver = input_dict['saver']
        self.s_batch = input_dict['s_batch']
        self.a_batch = input_dict['a_batch']
        self.r_batch = input_dict['r_batch']