Exemplo n.º 1
0
    def run(self):
        TIMEOUT = 1000
        buffer = experience_buffer()

        self.poller.modify(self.sock, ALL_FLAGS)
        curr_flags = ALL_FLAGS
        rall = 0

        while self.running:
            if self.window_is_open():
                if curr_flags != ALL_FLAGS:
                    self.poller.modify(self.sock, ALL_FLAGS)
                    curr_flags = ALL_FLAGS
            else:
                if curr_flags != READ_ERR_FLAGS:
                    self.poller.modify(self.sock, READ_ERR_FLAGS)
                    curr_flags = READ_ERR_FLAGS

            #Polls the set of registered file descriptors, and returns a possibly-empty list containing (fd, event) 2-tuples for the descriptors that have events or errors to report.
            events = self.poller.poll(TIMEOUT)

            if not events:  # timed out
                self.send()

            for fd, flag in events:
                #fileno():Return the socket's file descriptor (a small integer)
                assert self.sock.fileno() == fd

                if flag & ERR_FLAGS:
                    sys.exit('Error occurred to the channel')

                if flag & READ_FLAGS:
                    s0 = self.state
                    norm_state = normalize(s0)
                    one_hot_action = one_hot(self.action, self.action_cnt)
                    s0 = norm_state + one_hot_action

                    s1, action, reward, done = self.recv()

                    norm_state = normalize(s1)
                    one_hot_action = one_hot(self.action, self.action_cnt)
                    s1 = norm_state + one_hot_action

                    buffer.add([[s0, action, reward, s1, done]])

                    rall += reward

                if flag & WRITE_FLAGS:
                    if self.window_is_open():
                        self.send()

        return buffer, rall
Exemplo n.º 2
0
    def sample_action(self, state):
        norm_state = normalize(state)

        one_hot_action = one_hot(self.prev_action, self.action_cnt)
        aug_state = norm_state + one_hot_action
        #  debug
        # print("entry")
        # print("dagger-runsender: aug_state: " + str(aug_state))
        #  debug
        # Get probability of each action from the local network.
        pi = self.model
        feed_dict = {
            pi.input: [[aug_state]],
            pi.state_in: self.lstm_state,
        }
        #debug
        self.logger.warning("RUN_SENDER: aug_state is: "+str(aug_state))
        #debug
        ops_to_run = [pi.action_probs, pi.state_out]
        action_probs, self.lstm_state = self.sess.run(ops_to_run, feed_dict)

        # Choose an action to take
        action = np.argmax(action_probs[0][0])
        self.prev_action = action
        return action
Exemplo n.º 3
0
    def recv(self):
        serialized_ack, addr = self.sock.recvfrom(1600)

        if addr != self.peer_addr:
            return

        ack = datagram_pb2.Ack()
        ack.ParseFromString(serialized_ack)

        action = self.action

        self.update_state(ack)

        if self.step_start_ms is None:
            self.step_start_ms = curr_ts_ms()

        done = False
        reward = 0
        # At each step end, feed the state:
        if curr_ts_ms() - self.step_start_ms > self.step_len_ms:  # step's end
            self.state = [
                self.delay_ewma, self.delivery_rate_ewma, self.send_rate_ewma,
                self.cwnd
            ]
            #print(state)

            # time how long it takes to get an action from the NN
            if self.debug:
                start_sample = time.time()

            norm_state = normalize(self.state)
            one_hot_action = one_hot(self.action, self.action_cnt)
            state = norm_state + one_hot_action

            self.action = self.sample_action(state)

            if self.debug:
                self.sampling_file.write('%.2f ms\n' %
                                         ((time.time() - start_sample) * 1000))

            self.take_action(self.action)
            '''
            self.delay_ewma = None
            self.delivery_rate_ewma = None
            self.send_rate_ewma = None
            '''

            self.step_start_ms = curr_ts_ms()

            done = False
            if self.train:
                self.step_cnt += 1
                reward = self.compute_performance()
                if self.step_cnt >= Sender.max_steps:
                    self.step_cnt = 0
                    self.running = False
                    done = True
                #print self.state,self.action, reward, done

        return self.state, action, reward, done
Exemplo n.º 4
0
    def sample_action(self, state):
        norm_state = normalize(state)

        one_hot_action = one_hot(self.prev_action, self.action_cnt)
        aug_state = norm_state + one_hot_action

        btime = time.time()
        # Get probability of each action from the local network.
        pi = self.model
        feed_dict = {
            pi.input: [[aug_state]],
            pi.state_in: self.lstm_state,
        }
        ops_to_run = [pi.action_probs, pi.state_out]
        action_probs, self.lstm_state = self.sess.run(ops_to_run, feed_dict)

        # Choose an action to take
        action = np.argmax(action_probs[0][0])
        self.prev_action = action

        info = 'make decision {} to {} with {}s \n'.format(
            action, state[3],
            time.time() - btime)
        self.log.write(info)

        # action = np.argmax(np.random.multinomial(1, action_probs[0] - 1e-5))
        # temperature = 1.0
        # temp_probs = softmax(action_probs[0] / temperature)
        # action = np.argmax(np.random.multinomial(1, temp_probs - 1e-5))
        return action, aug_state
Exemplo n.º 5
0
    def sample_action(self, state):
        norm_state = normalize(state)

        one_hot_action = one_hot(self.prev_action, self.action_cnt)
        aug_state = norm_state + one_hot_action

        # Get probability of each action from the local network.
        pi = self.mainQN
        feed_dict = {pi.input: [[aug_state]]}
        ops_to_run = pi.action_probs
        action_probs = self.sess.run(ops_to_run, feed_dict)

        # Choose an action to take
        action = np.argmax(action_probs[0][0])
        self.prev_action = action

        return action
Exemplo n.º 6
0
    def sample_action(self, state):
        """ Given a state buffer in the past step, returns an action
        to perform.

        Appends to the state/action buffers the state and the
        "correct" action to take according to the expert.
        """
        cwnd = state[self.state_dim - 1]
        expert_action = self.expert.sample_action(cwnd)

        # For decision-making, normalize.
        norm_state = normalize(state)

        one_hot_action = one_hot(self.prev_action, self.action_cnt)
        aug_state = norm_state + one_hot_action

        # Fill in state_buf, action_buf
        self.state_buf.append(aug_state)
        self.action_buf.append(expert_action)

        # Always use the expert on the first episode to get our bearings.
        if self.curr_ep == 0:
            self.prev_action = expert_action
            return expert_action

        # Get probability of each action from the local network.
        pi = self.local_network
        feed_dict = {
            pi.input: [[aug_state]],
            pi.state_in: self.lstm_state,
        }
        ops_to_run = [pi.action_probs, pi.state_out]
        action_probs, self.lstm_state = self.sess.run(ops_to_run, feed_dict)

        # Choose an action to take and update current LSTM state
        # action = np.argmax(np.random.multinomial(1, action_probs[0][0] - 1e-5))
        action = np.argmax(action_probs[0][0])
        self.prev_action = action

        return action
Exemplo n.º 7
0
    def sample_action(self, state):
        if np.random.rand(1) < e:
            action = np.random.randint(0, self.env.action_cnt)
        else:
            norm_state = normalize(state)

            one_hot_action = one_hot(self.prev_action, self.action_cnt)
            aug_state = norm_state + one_hot_action

            # Get probability of each action from the local network.
            pi = self.mainQN
            feed_dict = {
                pi.state: [[aug_state]],
            }
            ops_to_run = [pi.action_probs]
            action_probs = self.sess.run(ops_to_run, feed_dict)

            # Choose an action to take
            action = np.argmax(action_probs[0][0])

        self.prev_action = action
        return action
Exemplo n.º 8
0
    def sample_action(self, state):
        """ Given a state buffer in the past step, returns an action
        to perform.

        Appends to the state/action buffers the state and the
        "correct" action to take according to the expert.
        """

        cwnd = state[3]
        # expert_action = self.expert.sample_action(cwnd)

        # For decision-making, normalize.

        norm_state = normalize(state)

        one_hot_action = one_hot(self.prev_action, self.action_cnt)
        aug_state = norm_state + one_hot_action

        # Fill in state_buf, action_buf
        # self.state_buf.append(aug_state)

        r = self.utility(aug_state) - self.prev_utility
        transition = np.hstack((aug_state, [self.prev_action,
                                            r], self.prev_state))

        # replace the old memory with new memory
        index = self.memory_counter % self.memory_size
        self.memory[index, :] = transition  # sample action
        self.memory_counter += 1
        # refresh previous state and utility
        self.prev_utility = self.utility(aug_state)
        self.prev_state = aug_state

        # sample batch memory from all memory
        if self.memory_counter > self.memory_size:
            sample_index = np.random.choice(self.memory_size,
                                            size=self.batch_size)
        else:
            sample_index = np.random.choice(self.memory_counter,
                                            size=self.batch_size)
        batch_memory = self.memory[sample_index, :]

        # todo : train current network

        # self.action_buf.append(expert_action)

        # Always use the expert on the first episode to get our bearings.
        #if self.curr_ep == 0:
        #   self.prev_action = expert_action
        #   return expert_action

        # Get probability of each action from the local network.
        pi = self.local_network

        feed_dict = {
            pi.input: [[aug_state]],
            pi.state_in: self.lstm_state,
        }
        ops_to_run = [pi.action_probs, pi.state_out]
        action_probs, self.lstm_state = self.sess.run(ops_to_run, feed_dict)

        # Choose an action to take and update current LSTM state
        # action = np.argmax(np.random.multinomial(1, action_probs[0][0] - 1e-5))
        action = np.argmax(action_probs[0][0])
        self.prev_action = action

        return action
Exemplo n.º 9
0
    def run(self):
        TIMEOUT = 1000

        self.poller.modify(self.sock, ALL_FLAGS)
        curr_flags = ALL_FLAGS
        tput_list = []
        delay_list = []
        steps = 1


        while self.running:
            if self.window_is_open():
                if curr_flags != ALL_FLAGS:
                    self.poller.modify(self.sock, ALL_FLAGS)
                    curr_flags = ALL_FLAGS
            else:
                if curr_flags != READ_ERR_FLAGS:
                    self.poller.modify(self.sock, READ_ERR_FLAGS)
                    curr_flags = READ_ERR_FLAGS

            #Polls the set of registered file descriptors, and returns a possibly-empty list containing (fd, event) 2-tuples for the descriptors that have events or errors to report.
            events = self.poller.poll(TIMEOUT)

            if not events:  # timed out
                self.send()

            for fd, flag in events:
                #fileno():Return the socket's file descriptor (a small integer)
                assert self.sock.fileno() == fd

                if flag & ERR_FLAGS:
                    sys.exit('Error occurred to the channel')

                if flag & READ_FLAGS:

                    s0 = self.state
                    norm_state = normalize(s0)
                    one_hot_action = one_hot(self.action, self.action_cnt)
                    s0 = norm_state + one_hot_action

                    step_end ,s1, action, reward, done,tput, perc_delay = self.recv()

                    if step_end:
                        norm_state = normalize(s1)
                        one_hot_action = one_hot(self.action, self.action_cnt)
                        s1 = norm_state + one_hot_action

                        buffer.add([[s0,action,reward,s1,done]])
                        if steps > 500 and steps % 4 == 0:
                            self.update_Qnet(buffer)

                        rList.append(reward)
                        tput_list.append(tput)
                        delay_list.append(perc_delay)

                        #print(reward)

                        if steps % 1000 == 0:
                            r_ave.append(sum(rList)/1000.0)
                            print("average reward on last 1000 steps", r_ave[-1])
                            print("average tput on last 1000 steps", sum(tput_list[-1000:])/1000.0)
                            print("average delay on last 1000 steps", sum(delay_list[-1000:])/1000.0)
                            rList = []

                        steps += 1

                if flag & WRITE_FLAGS:
                    if self.window_is_open():
                        self.send()