示例#1
0
 def compute(self, tensor_in: Tensor, train_mode=False):
     if train_mode:
         m = Tensor(mean(tensor_in.data), diff=False)
         v = Tensor(var(tensor_in.data, m.data), diff=False)
         self.ravg_mean = running_avg(self.ravg_mean, 0.9, m.data)
         self.ravg_var = running_avg(self.ravg_var, 0.9, v.data)
     else:
         m = Tensor(self.ravg_mean)
         v = Tensor(self.ravg_var)
     x = (tensor_in - m) / op.sqrt(v + self.epsilon)
     y = self.gamma * x + self.beta
     return y
示例#2
0
 def step(self, loss: Tensor):
     loss.calc_gradients()
     self.t = self.t + 1
     for i in range(0, len(self.parameters)):
         self.m[i] = running_avg(self.m[i], self.beta, self.parameters[i].grad)
         m_hat = running_avg_bias_correction(self.m[i], self.beta, self.t) if self.bias_correction else self.m[i]
         self.parameters[i].data -= self.learning_rate * m_hat
示例#3
0
 def step(self, loss: Tensor):
     loss.calc_gradients()
     self.t = self.t + 1
     for i in range(0, len(self.parameters)):
         g = self.parameters[i].grad
         self.m[i] = running_avg(self.m[i], self.beta_1, g)
         self.v[i] = running_avg_squared(self.v[i], self.beta_2, g)
         m_hat = running_avg_bias_correction(
             self.m[i], self.beta_1,
             self.t) if self.bias_correction else self.m[i]
         v_hat = running_avg_bias_correction(
             self.v[i], self.beta_2,
             self.t) if self.bias_correction else self.v[i]
         self.parameters[i].data -= grad_square_delta(
             self.learning_rate, m_hat, v_hat, self.epsilon)
示例#4
0
def train_plan(args, data, DNC, lstm_state, optimizer):
    """
        Things to test after some iterations:
         - on planning phase and on

         with goals - chose a goal and work toward that
        :param args:
        :return:
        """
    criterion = nn.CrossEntropyLoss().cuda(
    ) if args.cuda is True else nn.CrossEntropyLoss()
    cum_correct, cum_total, prob_times, n_success = [], [], [], 0
    penalty = 1.1

    for trial in range(args.iters):
        start_prob = time.time()
        phase_masks = data.make_new_problem()
        n_total, n_correct, prev_action, loss, stats = 0, 0, None, 0, []
        dnc_state = DNC.init_state(grad=False)
        lstm_state = DNC.init_rnn(grad=False)  # lstm_state,
        optimizer.zero_grad()

        for phase_idx in phase_masks:

            if phase_idx == 0 or phase_idx == 1:
                inputs = _variable(data.getitem_combined())
                logits, dnc_state, lstm_state = DNC(inputs, lstm_state,
                                                    dnc_state)
                _, prev_action = data.strip_ix_mask(logits)

            elif phase_idx == 2:
                mask = _variable(data.getmask())
                inputs = torch.cat([mask, prev_action], 1)
                logits, dnc_state, lstm_state = DNC(inputs, lstm_state,
                                                    dnc_state)
                _, prev_action = data.strip_ix_mask(logits)

            else:
                # sample from best moves
                actions_star, all_actions = data.get_actions(mode='both')
                if not actions_star:
                    break
                if args.zero_at == 'step':
                    optimizer.zero_grad()

                mask = data.getmask()
                prev_action = prev_action.cuda(
                ) if args.cuda is True else prev_action
                pr = u.depackage(prev_action)

                final_inputs = _variable(torch.cat([mask, pr], 1))
                logits, dnc_state, lstm_state = DNC(final_inputs, lstm_state,
                                                    dnc_state)
                exp_logits = data.ix_input_to_ixs(logits)

                guided = random.random() < args.beta
                # thing 1
                if guided:  # guided loss
                    final_action, lstep = L.naive_loss(exp_logits,
                                                       actions_star,
                                                       criterion,
                                                       log=True)
                else:  # pick own move
                    final_action, lstep = L.naive_loss(exp_logits,
                                                       all_actions,
                                                       criterion,
                                                       log=True)

                # penalty for todo tests this !!!!
                action_own = u.get_prediction(exp_logits)
                if args.penalty and not [tuple(flat(t)) for t in all_actions]:
                    final_loss = lstep * _variable([args.penalty])
                else:
                    final_loss = lstep

                if args.opt_at == 'problem':
                    loss += final_loss
                else:

                    final_loss.backward(retain_graph=args.ret_graph)
                    if args.clip:
                        torch.nn.utils.clip_grad_norm(DNC.parameters(),
                                                      args.clip)
                    optimizer.step()
                    loss = lstep

                data.send_action(final_action)

                if (trial + 1) % args.show_details == 0:
                    action_accs = u.human_readable_res(data, all_actions,
                                                       actions_star,
                                                       action_own, guided,
                                                       lstep.data[0])
                    stats.append(action_accs)
                n_total, _ = tick(n_total, n_correct, action_own,
                                  flat(final_action))
                n_correct += 1 if action_own in [
                    tuple(flat(t)) for t in actions_star
                ] else 0
                prev_action = data.vec_to_ix(final_action)

        if stats:
            arr = np.array(stats)
            correct = len([
                1 for i in list(arr.sum(axis=1)) if i == len(stats[0])
            ]) / len(stats)
            sl.log_acc(list(arr.mean(axis=0)), correct)

        if args.opt_at == 'problem':
            floss = loss / n_total
            floss.backward(retain_graph=args.ret_graph)
            if args.clip:
                torch.nn.utils.clip_grad_norm(DNC.parameters(), args.clip)
            optimizer.step()
            sl.writer.add_scalar('losses.end', floss.data[0], sl.global_step)

        n_success += 1 if n_correct / n_total > args.passing else 0
        cum_total.append(n_total)
        cum_correct.append(n_correct)
        sl.add_scalar('recall.pct_correct', n_correct / n_total,
                      sl.global_step)
        print(
            "trial {}, step {} trial accy: {}/{}, {:0.2f}, running total {}/{}, running avg {:0.4f}, loss {:0.4f}  "
            .format(trial, sl.global_step, n_correct, n_total,
                    n_correct / n_total, n_success, trial,
                    running_avg(cum_correct, cum_total), loss.data[0]))
        end_prob = time.time()
        prob_times.append(start_prob - end_prob)
    print("solved {} out of {} -> {}".format(n_success, args.iters,
                                             n_success / args.iters))
    return DNC, optimizer, lstm_state, running_avg(cum_correct, cum_total)
示例#5
0
def train_qa2(args, data, DNC, optimizer):
    """
        I am jacks liver. This is a sanity test

        0 - describe state.
        1 - describe goal.
        2 - do actions.
        3 - ask some questions
        :param args:
        :return:
        """
    criterion = nn.CrossEntropyLoss()
    cum_correct, cum_total = [], []

    for trial in range(args.iters):
        phase_masks = data.make_new_problem()
        n_total, n_correct, loss = 0, 0, 0
        dnc_state = DNC.init_state(grad=False)
        optimizer.zero_grad()

        for phase_idx in phase_masks:
            if phase_idx == 0 or phase_idx == 1:
                inputs = _variable(data.getitem_combined())
                logits, dnc_state = DNC(inputs, dnc_state)
            else:
                final_moves = data.get_actions(mode='one')
                if final_moves == []:
                    break
                data.send_action(final_moves[0])
                mask = data.phase_oh[2].unsqueeze(0)
                inputs2 = _variable(
                    torch.cat([mask, data.vec_to_ix(final_moves[0])], 1))
                logits, dnc_state = DNC(inputs2, dnc_state)

                for _ in range(args.num_tests):
                    # ask where is ---?
                    if args.zero_at == 'step':
                        optimizer.zero_grad()
                    masked_input, mask_chunk, ground_truth = data.masked_input(
                    )
                    logits, dnc_state = DNC(_variable(masked_input), dnc_state)
                    expanded_logits = data.ix_input_to_ixs(logits)

                    # losses
                    lstep = L.action_loss(expanded_logits,
                                          ground_truth,
                                          criterion,
                                          log=True)
                    if args.opt_at == 'problem':
                        loss += lstep
                    else:
                        lstep.backward(retain_graph=args.ret_graph)
                        optimizer.step()
                        loss = lstep

                    # update counters
                    prediction = u.get_prediction(expanded_logits, [3, 4])
                    n_total, n_correct = tick(n_total, n_correct, mask_chunk,
                                              prediction)

        if args.opt_at == 'problem':
            loss.backward(retain_graph=args.ret_graph)
            optimizer.step()
            sl.writer.add_scalar('losses.end', loss.data[0], sl.global_step)

        cum_total.append(n_total)
        cum_correct.append(n_correct)
        sl.writer.add_scalar('recall.pct_correct', n_correct / n_total,
                             sl.global_step)
        print(
            "trial: {}, step:{}, accy {:0.4f}, cum_score {:0.4f}, loss: {:0.4f}"
            .format(trial, sl.global_step, n_correct / n_total,
                    running_avg(cum_correct, cum_total), loss.data[0]))
    return DNC, optimizer, dnc_state, running_avg(cum_correct, cum_total)
示例#6
0
def play_qa_readable(args, data, DNC):
    criterion = nn.CrossEntropyLoss()
    cum_correct, cum_total = [], []

    for trial in range(args.iters):
        phase_masks = data.make_new_problem()
        n_total, n_correct, loss = 0, 0, 0
        dnc_state = DNC.init_state(grad=False)

        for phase_idx in phase_masks:
            if phase_idx == 0 or phase_idx == 1:

                inputs, msk = data.getitem()
                print(data.human_readable(inputs, msk))

                inputs = Variable(torch.cat([msk, inputs], 1))
                logits, dnc_state = DNC(inputs, dnc_state)
            else:
                final_moves = data.get_actions(mode='one')
                if final_moves == []:
                    break
                data.send_action(final_moves[0])
                mask = data.phase_oh[2].unsqueeze(0)
                vec = data.vec_to_ix(final_moves[0])
                print('\n')
                print(data.human_readable(vec, mask))

                inputs2 = Variable(torch.cat([mask, vec], 1))
                logits, dnc_state = DNC(inputs2, dnc_state)

                for _ in range(args.num_tests):
                    # ask where is ---?

                    masked_input, mask_chunk, ground_truth = data.masked_input(
                    )
                    print("Context:", data.human_readable(ground_truth))
                    print("Q:")

                    logits, dnc_state = DNC(Variable(masked_input), dnc_state)
                    expanded_logits = data.ix_input_to_ixs(logits)

                    #losses
                    lstep = l.action_loss(expanded_logits,
                                          ground_truth,
                                          criterion,
                                          log=True)

                    #update counters
                    prediction = u.get_prediction(expanded_logits, [3, 4])
                    print("A:")
                    n_total, n_correct = tick(n_total, n_correct, mask_chunk,
                                              prediction)
                    print("correct:", mask_chunk == prediction)

        cum_total.append(n_total)
        cum_correct.append(n_correct)
        sl.writer.add_scalar('recall.pct_correct', n_correct / n_total,
                             sl.global_step)
        print(
            "trial: {}, step:{}, accy {:0.4f}, cum_score {:0.4f}, loss: {:0.4f}"
            .format(trial, sl.global_step, n_correct / n_total,
                    u.running_avg(cum_correct, cum_total), loss.data[0]))
    return DNC, dnc_state, u.running_avg(cum_correct, cum_total)
示例#7
0
文件: SE_UL.py 项目: szhang104/pycomm
def SE_UL(Hhat: ndarray, C: ndarray, R, tau_c, tau_p, realization_cnt, M, K, L,
          p):
    """
    calculates the uplink spectral efficiency for different receive combining
    Parameters
    ----------
    Hhat (M, realization_cnt, K, L, L)
        MMSE channel estimates
    C (M, M, K, L, L)
        estimation error correlation matrix with MMSE estimation
    R
    tau_c
    tau_p
    realization_cnt
    M
    K
    L
    p: float
        uplink transmit power (same for all here)

    Returns
    -------

    """
    methods = ['MR', 'RZF', 'MMMSE', 'ZF', 'SMMSE']
    V = {}
    # sum of all estimation error correlation matrices at every BS
    # shape (M, M, L)
    C_totM = np.reshape(p * C.sum(axis=(2, 3)), (M, M, L))
    # sum of intra-cell estimation error correlation matrices at every BS
    CR_totS = np.zeros([M, M, L], dtype=np.complex)
    for j in range(L):
        all_other_cells = np.ones((L, ), dtype=np.bool)
        all_other_cells[j] = False
        CR_totS[:, :,
                j] = p * (C[:, :, :, j, j].sum(axis=2) +
                          R[:, :, :, all_other_cells, j].sum(axis=(2, 3)))

    prelog_factor = (tau_c - tau_p) / tau_c
    SE = {}
    for method in methods:
        SE[method] = np.zeros([K, L])

    for n in range(realization_cnt):
        for j in range(L):
            # matlab uses F order, shape(M, KL)
            Hhat_allj = Hhat[:, n, :, :, j].reshape(M, K * L, order='F')
            ue = np.arange(K * j, K * j + K)
            Hhat_j = Hhat_allj[:, K * j:K * j + K]
            V['MR'] = mr_combining(Hhat_allj, ue)
            V['RZF'] = rzf_combining(Hhat_allj, ue, p)
            V['ZF'] = zf_combining(Hhat_allj, ue)
            V['MMMSE'] = mmmse_combining(Hhat_allj, ue, p, C_totM[:, :, j])
            V['SMMSE'] = smmse_combining(Hhat_allj, ue, p, CR_totS[:, :, j])

            for k in range(K):
                for method in methods:
                    v = V[method][:, k]
                    # v: (M, ), Hhat: (M,)
                    numerator = p * (np.abs(v.conj() @ Hhat[:, n, k, j, j])**2)
                    # Hhat_allj: (M, K*L)
                    denominator = p * np.sum(np.abs(v.conj() @ Hhat_allj) ** 2) + \
                                  v.conj() @ (C_totM[:, :, j] + np.eye(M)) @ v - numerator
                    SE[method][k, j] = running_avg(
                        n, SE[method][k, j],
                        prelog_factor *
                        np.log2(1 + numerator / denominator).real)
    return SE