Exemplo n.º 1
0
 def test_neigh_indexing(self):
     """If neighs are np arrays, they index differently than tuples"""
     NGF.get_eligible_chs(np.zeros((7, 7, 70), dtype=np.bool), (3, 2))
     somegrid = np.random.uniform(size=(7, 7, 70))
     n1 = somegrid[NGF.neighbors_sep(2, 3, 2, False)]
     n2 = somegrid[GF.neighbors(2, 3, 2, separate=True, include_self=False)]
     assert (n1 == n2).all()
     n1 = somegrid[NGF.neighbors(2, 3, 2, False)[0]]
     n2 = somegrid[GF.neighbors(2, 3, 2, include_self=False)[0]]
     assert (n1 == n2).all()
Exemplo n.º 2
0
    def optimal_ch(self, ce_type, cell) -> int:
        if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
            chs = NGF.get_eligible_chs(self.grid, cell)
            if len(chs) == 0:
                return (None, ) * 5
        else:
            chs = np.nonzero(self.grid[cell])[0]

        qvals_dense, _, freps, astates = self.get_qvals(
            self.grid, cell, ce_type, chs)
        self.qval_means.append(np.mean(qvals_dense))
        if ce_type == CEvent.END:
            idx = max_idx = np.argmax(qvals_dense)
            ch = max_ch = chs[idx]
        else:
            ch, idx, _ = self.exploration_policy(self.epsilon, chs,
                                                 qvals_dense, cell)
            max_idx = np.argmax(qvals_dense)
            max_ch = chs[max_idx]
            self.epsilon *= self.epsilon_decay

        if ch is None:
            self.logger.error(
                f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n")

        return ch, qvals_dense[idx], max_ch, freps, astates
Exemplo n.º 3
0
    def optimal_ch_val(self, ce_type, cell) -> int:
        if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
            chs = NGF.get_eligible_chs(self.grid, cell)
            if len(chs) == 0:
                return (None, ) * 5
        else:
            chs = np.nonzero(self.grid[cell])[0]
        old_frep, freps = self.afterstate_freps(self.grid, cell, ce_type, chs)
        # Q-value for each ch in 'chs'
        qvals_dense = self.net.forward_value(freps).reshape(len(chs))
        if ce_type == CEvent.END:
            idx = np.argmax(qvals_dense)
            ch = chs[idx]
            # max_idx = idx
            # max_ch  = ch
        else:
            ch, idx, _ = self.exploration_policy(self.epsilon, chs,
                                                 qvals_dense, cell)
            # max_idx = np.argmax(qvals_dense)
            # max_ch = chs[max_idx]
            self.epsilon *= self.epsilon_decay

        if ch is None:
            self.logger.error(
                f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n")
        return ch, qvals_dense[idx], old_frep, freps[
            idx], self.net.get_neglogpac(old_frep, cell, ch)
Exemplo n.º 4
0
Arquivo: base.py Projeto: namch29/dca
    def optimal_ch(self, ce_type, cell) -> Tuple[int, float, int]:
        """Select the channel fitting for assignment that
        that has the maximum q-value according to an exploration policy,
        or select the channel for termination that has the minimum
        q-value in a greedy fashion.

        Return (ch, max_ch) where 'ch' is the selected channel according to
        exploration policy and max_ch' is the greedy (still eligible) channel.
        'ch' (and 'max_ch') is None if no channel is eligible for assignment.
        """
        inuse = np.nonzero(self.grid[cell])[0]
        n_used = len(inuse)

        if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
            chs = NGF.get_eligible_chs(self.grid, cell)
            if len(chs) == 0:
                # No channels available for assignment,
                return (None, None, 0)
        else:
            # Channels in use at cell, including channel scheduled
            # for termination. The latter is included because it might
            # be the least valueable channel, in which case no
            # reassignment is done on call termination.
            chs = inuse
            # or no channels in use to reassign
            assert n_used > 0

        # TODO If 'max_ch' turns out not to be useful, then don't return it and
        # avoid running a forward pass through the net if a random action is selected.
        qvals_dense = self.get_qvals(cell=cell,
                                     n_used=n_used,
                                     ce_type=ce_type,
                                     chs=chs)
        # Selecting a ch for reassigment is always greedy because no learning
        # is done on the reassignment actions.
        if ce_type == CEvent.END:
            amin_idx = np.argmin(qvals_dense)
            ch = max_ch = chs[amin_idx]
            p = 1
        else:
            ch, idx, p = self.exploration_policy(self.epsilon, chs,
                                                 qvals_dense, cell)
            if self.eps_log_decay:
                self.epsilon = self.epsilon0 / np.sqrt(
                    self.t * 60 / self.eps_log_decay)
            else:
                self.epsilon *= self.epsilon_decay
            amax_idx = np.argmax(qvals_dense)
            max_ch = chs[amax_idx]

        # If qvals blow up ('NaN's and 'inf's), ch becomes none.
        if ch is None:
            self.logger.error(
                f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n")
            raise Exception
        self.logger.debug(
            f"Optimal ch: {ch} for event {ce_type} of possibilities {chs}")
        return (ch, max_ch, p)
Exemplo n.º 5
0
 def test_get_free_chs(self):
     grid = np.ones((7, 7, 70)).astype(bool)
     chs = [0, 4]
     cell = (3, 4)
     grid[cell][chs] = 0
     for n in NGF.neighbors(2, *cell, False):
         grid[n][chs] = 0
     free = NGF.get_eligible_chs(grid, cell)
     self._li_set_eq(free, chs)
Exemplo n.º 6
0
 def get_action(self, next_cevent, *args):
     ce_type, next_cell = next_cevent[1:3]
     if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
         free = NGF.get_eligible_chs(self.grid, next_cell)
         if len(free) == 0:
             return None
         else:
             return np.random.choice(free)
     elif ce_type == CEvent.END:
         # No rearrangement is done when a call terminates.
         return next_cevent[3]
Exemplo n.º 7
0
 def get_hoff_qvals(self, cell, n_used, ce_type, chs, h_cell, grid):
     """ Look ahead for handoffs """
     end_astates = NGF.afterstates(grid, cell, ce_type, chs)
     h_n_used = np.count_nonzero(self.grid[h_cell])
     qvals_dense = -np.copy(self.qvals[cell][chs])
     for i, astate in enumerate(end_astates):
         h_chs = NGF.get_eligible_chs(astate, h_cell)
         if len(h_chs) > 0:
             h_qvals_dense = self.get_qvals(h_cell, h_n_used, h_chs)
             qvals_dense[i] = np.max(h_qvals_dense)
     return qvals_dense
Exemplo n.º 8
0
    def optimal_ch_pol(self, ce_type, cell) -> int:
        if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
            chs = NGF.get_eligible_chs(self.grid, cell)
            if len(chs) == 0:
                return (None, ) * 5
        else:
            chs = np.nonzero(self.grid[cell])[0]

        frep = self.feature_rep(self.grid)
        ch, neglogpac = self.net.forward_action(frep, cell, ce_type, chs)
        next_frep = NGF.incremental_freps(self.grid, frep, cell, ce_type,
                                          np.array([ch]))
        val = self.net.forward_value(next_frep)
        return ch, val, frep, next_frep, neglogpac
Exemplo n.º 9
0
    def optimal_ch(self, ce_type, cell):
        inuse = np.nonzero(self.grid[cell])[0]
        n_used = len(inuse)
        if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
            chs = NGF.get_eligible_chs(self.grid, cell)
            if len(chs) == 0:
                return (None, None, 0, None, None)
        else:
            chs = inuse
            assert n_used > 0

        if ce_type == CEvent.END:
            next_event = self.env.eventgen.peek()
            if self.pp['hoff_lookahead'] and next_event[1] == CEvent.HOFF:
                qvals_dense = self.get_hoff_qvals(cell=cell,
                                                  n_used=n_used,
                                                  ce_type=ce_type,
                                                  chs=chs,
                                                  h_cell=next_event[2],
                                                  grid=self.grid)
                idx = amax_idx = np.argmax(qvals_dense)
                ch = max_ch = chs[amax_idx]
            else:
                qvals_dense = self.get_qvals(cell=cell,
                                             n_used=n_used,
                                             ce_type=ce_type,
                                             chs=chs)
                idx = amax_idx = np.argmin(qvals_dense)
                ch = max_ch = chs[amax_idx]
            p = 1
        else:
            qvals_dense = self.get_qvals(cell=cell,
                                         n_used=n_used,
                                         ce_type=ce_type,
                                         chs=chs)
            ch, idx, p = self.exploration_policy(self.epsilon, chs,
                                                 qvals_dense, cell)
            if self.eps_log_decay:
                self.epsilon = self.epsilon0 / np.sqrt(
                    self.t * 60 / self.eps_log_decay)
            else:
                self.epsilon *= self.epsilon_decay
            amax_idx = np.argmax(qvals_dense)
            max_ch = chs[amax_idx]

        assert ch is not None
        return (ch, max_ch, p, qvals_dense[idx], qvals_dense[amax_idx])
Exemplo n.º 10
0
    def get_hoff_qvals(self, grid, cell, ce_type, chs, h_cell):
        """ Look ahead for handoffs
        h_cell: target hand-off cell """
        end_astates = NGF.afterstates(grid, cell, ce_type, chs)
        hoff_astates = []
        n_hoff_astates = []  # For a given end_astate, how many hoff astates?
        for astate in end_astates:
            h_chs = NGF.get_eligible_chs(astate, h_cell)
            if len(h_chs) > 0:
                h_astates = NGF.afterstates(astate, h_cell, CEvent.HOFF, h_chs)
                hoff_astates.extend(h_astates)
                n = len(h_astates)
            else:
                n = 0
            n_hoff_astates.append(n)
        cur_frep, freps = self.feature_rep(grid), self.feature_reps(
            end_astates)
        if len(hoff_astates) > 0:
            hoff_astates = np.array(hoff_astates)
            hfreps = self.feature_reps(hoff_astates)
            hqvals_dense = self.net.forward(freps=hfreps, grids=hoff_astates)
            assert hqvals_dense.shape == (
                len(hoff_astates), ), hqvals_dense.shape
            qvals_dense = np.zeros(len(chs))
            t = 0
            for i, n in enumerate(n_hoff_astates):
                if n > 0:
                    qvals_dense[i] = np.max(hqvals_dense[t:t + n])
                t += n
            # Nasty hack for 2-step returns
            # All relevant hoff astates have same count
            reward = np.count_nonzero(hoff_astates[0])
            if self.pp['target'] == 'avg':
                qvals_dense += reward - self.avg_reward
            else:
                qvals_dense *= self.pp['gamma']**2
                qvals_dense += self.pp['gamma'] * reward
        else:
            # Not possible to assign HOFF for any reass on END.
            qvals_dense = self.net.forward(freps=freps, grids=end_astates)

        return qvals_dense, cur_frep, freps
Exemplo n.º 11
0
 def get_action(self, next_cevent, *args):
     ce_type, next_cell = next_cevent[1:3]
     if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
         # When a call arrives in a cell,
         # if any pre-assigned channel is unused;
         # it is assigned, else the call is blocked.
         eligible_chs = NGF.get_eligible_chs(self.grid, next_cell)
         for ch in eligible_chs:
             if GF.nom_chs_mask[next_cell][ch]:
                 return ch
         if len(eligible_chs) == 0:
             return None
         else:
             return np.random.choice(eligible_chs)
     elif ce_type == CEvent.END:
         inuse_chs = np.nonzero(self.grid[next_cell])[0]
         for ch in inuse_chs:
             if GF.nom_chs_mask[next_cell][ch]:
                 return ch
         else:
             return next_cevent[3]
Exemplo n.º 12
0
    def optimal_ch(self, ce_type, cell):
        if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
            # Calls eligible for assignment
            chs = NGF.get_eligible_chs(self.grid, cell)
        else:
            # Calls in progress
            chs = np.nonzero(self.grid[cell])[0]
        if len(chs) == 0:
            assert ce_type != CEvent.END
            return (None, None)

        a_dist, val = self.forward(cell=cell, ce_type=ce_type)
        greedy = True
        if ce_type == CEvent.END:
            if greedy:
                idx = np.argmin(a_dist[chs])
            else:
                valid_a_dist = softmax(1 - a_dist[chs])
                idx = np.random.choice(np.range(len(valid_a_dist)),
                                       p=valid_a_dist)
        else:
            if greedy:
                idx = np.argmax(a_dist[chs])
            else:
                valid_a_dist = softmax(a_dist[chs])
                idx = np.random.choice(np.range(len(valid_a_dist)),
                                       p=valid_a_dist)
        ch = chs[idx]
        # print(ce_type, a_dist, ch, a_dist[ch], chs)
        # TODO NOTE verify the above

        # If vals blow up ('NaN's and 'inf's), ch becomes none.
        if np.isinf(val) or np.isnan(val):
            self.logger.error(f"{ce_type}\n{chs}\n{val}\n\n")
            raise Exception

        self.logger.debug(
            f"Optimal ch: {ch} for event {ce_type} of possibilities {chs}")
        return (ch, val)
Exemplo n.º 13
0
    def optimal_ch(self, ce_type, cell) -> int:
        if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
            chs = NGF.get_eligible_chs(self.grid, cell)
            if len(chs) == 0:
                return (None, ) * 5
        else:
            chs = np.nonzero(self.grid[cell])[0]

        qvals_dense = self.get_qvals(self.grid, cell, ce_type, chs)
        self.qval_means.append(np.mean(qvals_dense))
        if ce_type == CEvent.END:
            idx = max_idx = np.argmin(qvals_dense)
            ch = max_ch = chs[idx]
            p = 1
        else:
            ch, idx, p = self.exploration_policy(self.epsilon, chs,
                                                 qvals_dense, cell)
            max_idx = np.argmax(qvals_dense)
            max_ch = chs[max_idx]
            self.epsilon *= self.epsilon_decay

        assert ch is not None, f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n"
        return ch, qvals_dense[idx], max_ch, qvals_dense[max_idx], p
Exemplo n.º 14
0
    def optimal_ch(self, ce_type, cell) -> int:
        """ Select a channel and return selected channel, greedy channel, qval and frep for both,
        in addition to prob of picking selected channel, and current frep"""
        if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
            chs = NGF.get_eligible_chs(self.grid, cell)
            if len(chs) == 0:
                return Chs(None, None,
                           1), Qvals(None, None), Freps(None, None, None)
        else:
            chs = np.nonzero(self.grid[cell])[0]

        next_event = self.env.eventgen.peek()
        if self.pp['hoff_lookahead'] and next_event[1] == CEvent.HOFF:
            assert ce_type == CEvent.END
            qvals_dense, cur_frep, freps = self.get_hoff_qvals(
                self.grid, cell, ce_type, chs, next_event[2])
        else:
            qvals_dense, cur_frep, freps = self.get_qvals(
                self.grid, cell, ce_type, chs)
        self.qval_means.append(np.mean(qvals_dense))
        if ce_type == CEvent.END:
            idx = max_idx = np.argmax(qvals_dense)
            ch = max_ch = chs[idx]
            p = 1
            if self.pp['debug']:
                val = qvals_dense[idx]
                _, freps1 = self.afterstate_freps(self.grid, cell, ce_type,
                                                  np.array([ch]))
                v1 = self.net.forward(freps1)[0]
                # print("\n", qvals_dense, idx)
                frep2 = GF.afterstate_freps_naive(self.grid, cell, ce_type,
                                                  chs)[idx]
                v2 = self.net.forward([frep2])[0]
                v3 = self.net.forward([freps1[0], freps1[0]])[0]
                v4 = self.net.forward([frep2, frep2])[0]
                # val: multi freps, multi tf
                # v1: single frep, single tf
                # v2: multi freps, single tf
                # v3: single freps, multi tf
                # v4: multi freps, multi tf
                # ON CPU:
                # (val == v3 == v4) != (v1 == v2)
                # ON GPU: All the same...
                # CONCLUSION: Running multiple samples at once through TF
                # yields different (higher accuracy) results
                print(val, v1, v2, v3, v4, "\n")
        else:
            ch, idx, p = self.exploration_policy(self.epsilon, chs,
                                                 qvals_dense, cell)
            max_idx = np.argmax(qvals_dense)
            max_ch = chs[max_idx]
            if self.eps_log_decay:
                self.epsilon = self.epsilon0 / np.sqrt(
                    self.t * 60 / self.eps_log_decay)
            else:
                self.epsilon *= self.epsilon_decay

        assert ch is not None, f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n"
        return Chs(ch, max_ch,
                   p), Qvals(qvals_dense[idx], qvals_dense[max_idx]), Freps(
                       cur_frep, freps[idx], freps[max_idx])