def compute(self, tensor_in: Tensor, train_mode=False): if train_mode: m = Tensor(mean(tensor_in.data), diff=False) v = Tensor(var(tensor_in.data, m.data), diff=False) self.ravg_mean = running_avg(self.ravg_mean, 0.9, m.data) self.ravg_var = running_avg(self.ravg_var, 0.9, v.data) else: m = Tensor(self.ravg_mean) v = Tensor(self.ravg_var) x = (tensor_in - m) / op.sqrt(v + self.epsilon) y = self.gamma * x + self.beta return y
def step(self, loss: Tensor): loss.calc_gradients() self.t = self.t + 1 for i in range(0, len(self.parameters)): self.m[i] = running_avg(self.m[i], self.beta, self.parameters[i].grad) m_hat = running_avg_bias_correction(self.m[i], self.beta, self.t) if self.bias_correction else self.m[i] self.parameters[i].data -= self.learning_rate * m_hat
def step(self, loss: Tensor): loss.calc_gradients() self.t = self.t + 1 for i in range(0, len(self.parameters)): g = self.parameters[i].grad self.m[i] = running_avg(self.m[i], self.beta_1, g) self.v[i] = running_avg_squared(self.v[i], self.beta_2, g) m_hat = running_avg_bias_correction( self.m[i], self.beta_1, self.t) if self.bias_correction else self.m[i] v_hat = running_avg_bias_correction( self.v[i], self.beta_2, self.t) if self.bias_correction else self.v[i] self.parameters[i].data -= grad_square_delta( self.learning_rate, m_hat, v_hat, self.epsilon)
def train_plan(args, data, DNC, lstm_state, optimizer): """ Things to test after some iterations: - on planning phase and on with goals - chose a goal and work toward that :param args: :return: """ criterion = nn.CrossEntropyLoss().cuda( ) if args.cuda is True else nn.CrossEntropyLoss() cum_correct, cum_total, prob_times, n_success = [], [], [], 0 penalty = 1.1 for trial in range(args.iters): start_prob = time.time() phase_masks = data.make_new_problem() n_total, n_correct, prev_action, loss, stats = 0, 0, None, 0, [] dnc_state = DNC.init_state(grad=False) lstm_state = DNC.init_rnn(grad=False) # lstm_state, optimizer.zero_grad() for phase_idx in phase_masks: if phase_idx == 0 or phase_idx == 1: inputs = _variable(data.getitem_combined()) logits, dnc_state, lstm_state = DNC(inputs, lstm_state, dnc_state) _, prev_action = data.strip_ix_mask(logits) elif phase_idx == 2: mask = _variable(data.getmask()) inputs = torch.cat([mask, prev_action], 1) logits, dnc_state, lstm_state = DNC(inputs, lstm_state, dnc_state) _, prev_action = data.strip_ix_mask(logits) else: # sample from best moves actions_star, all_actions = data.get_actions(mode='both') if not actions_star: break if args.zero_at == 'step': optimizer.zero_grad() mask = data.getmask() prev_action = prev_action.cuda( ) if args.cuda is True else prev_action pr = u.depackage(prev_action) final_inputs = _variable(torch.cat([mask, pr], 1)) logits, dnc_state, lstm_state = DNC(final_inputs, lstm_state, dnc_state) exp_logits = data.ix_input_to_ixs(logits) guided = random.random() < args.beta # thing 1 if guided: # guided loss final_action, lstep = L.naive_loss(exp_logits, actions_star, criterion, log=True) else: # pick own move final_action, lstep = L.naive_loss(exp_logits, all_actions, criterion, log=True) # penalty for todo tests this !!!! action_own = u.get_prediction(exp_logits) if args.penalty and not [tuple(flat(t)) for t in all_actions]: final_loss = lstep * _variable([args.penalty]) else: final_loss = lstep if args.opt_at == 'problem': loss += final_loss else: final_loss.backward(retain_graph=args.ret_graph) if args.clip: torch.nn.utils.clip_grad_norm(DNC.parameters(), args.clip) optimizer.step() loss = lstep data.send_action(final_action) if (trial + 1) % args.show_details == 0: action_accs = u.human_readable_res(data, all_actions, actions_star, action_own, guided, lstep.data[0]) stats.append(action_accs) n_total, _ = tick(n_total, n_correct, action_own, flat(final_action)) n_correct += 1 if action_own in [ tuple(flat(t)) for t in actions_star ] else 0 prev_action = data.vec_to_ix(final_action) if stats: arr = np.array(stats) correct = len([ 1 for i in list(arr.sum(axis=1)) if i == len(stats[0]) ]) / len(stats) sl.log_acc(list(arr.mean(axis=0)), correct) if args.opt_at == 'problem': floss = loss / n_total floss.backward(retain_graph=args.ret_graph) if args.clip: torch.nn.utils.clip_grad_norm(DNC.parameters(), args.clip) optimizer.step() sl.writer.add_scalar('losses.end', floss.data[0], sl.global_step) n_success += 1 if n_correct / n_total > args.passing else 0 cum_total.append(n_total) cum_correct.append(n_correct) sl.add_scalar('recall.pct_correct', n_correct / n_total, sl.global_step) print( "trial {}, step {} trial accy: {}/{}, {:0.2f}, running total {}/{}, running avg {:0.4f}, loss {:0.4f} " .format(trial, sl.global_step, n_correct, n_total, n_correct / n_total, n_success, trial, running_avg(cum_correct, cum_total), loss.data[0])) end_prob = time.time() prob_times.append(start_prob - end_prob) print("solved {} out of {} -> {}".format(n_success, args.iters, n_success / args.iters)) return DNC, optimizer, lstm_state, running_avg(cum_correct, cum_total)
def train_qa2(args, data, DNC, optimizer): """ I am jacks liver. This is a sanity test 0 - describe state. 1 - describe goal. 2 - do actions. 3 - ask some questions :param args: :return: """ criterion = nn.CrossEntropyLoss() cum_correct, cum_total = [], [] for trial in range(args.iters): phase_masks = data.make_new_problem() n_total, n_correct, loss = 0, 0, 0 dnc_state = DNC.init_state(grad=False) optimizer.zero_grad() for phase_idx in phase_masks: if phase_idx == 0 or phase_idx == 1: inputs = _variable(data.getitem_combined()) logits, dnc_state = DNC(inputs, dnc_state) else: final_moves = data.get_actions(mode='one') if final_moves == []: break data.send_action(final_moves[0]) mask = data.phase_oh[2].unsqueeze(0) inputs2 = _variable( torch.cat([mask, data.vec_to_ix(final_moves[0])], 1)) logits, dnc_state = DNC(inputs2, dnc_state) for _ in range(args.num_tests): # ask where is ---? if args.zero_at == 'step': optimizer.zero_grad() masked_input, mask_chunk, ground_truth = data.masked_input( ) logits, dnc_state = DNC(_variable(masked_input), dnc_state) expanded_logits = data.ix_input_to_ixs(logits) # losses lstep = L.action_loss(expanded_logits, ground_truth, criterion, log=True) if args.opt_at == 'problem': loss += lstep else: lstep.backward(retain_graph=args.ret_graph) optimizer.step() loss = lstep # update counters prediction = u.get_prediction(expanded_logits, [3, 4]) n_total, n_correct = tick(n_total, n_correct, mask_chunk, prediction) if args.opt_at == 'problem': loss.backward(retain_graph=args.ret_graph) optimizer.step() sl.writer.add_scalar('losses.end', loss.data[0], sl.global_step) cum_total.append(n_total) cum_correct.append(n_correct) sl.writer.add_scalar('recall.pct_correct', n_correct / n_total, sl.global_step) print( "trial: {}, step:{}, accy {:0.4f}, cum_score {:0.4f}, loss: {:0.4f}" .format(trial, sl.global_step, n_correct / n_total, running_avg(cum_correct, cum_total), loss.data[0])) return DNC, optimizer, dnc_state, running_avg(cum_correct, cum_total)
def play_qa_readable(args, data, DNC): criterion = nn.CrossEntropyLoss() cum_correct, cum_total = [], [] for trial in range(args.iters): phase_masks = data.make_new_problem() n_total, n_correct, loss = 0, 0, 0 dnc_state = DNC.init_state(grad=False) for phase_idx in phase_masks: if phase_idx == 0 or phase_idx == 1: inputs, msk = data.getitem() print(data.human_readable(inputs, msk)) inputs = Variable(torch.cat([msk, inputs], 1)) logits, dnc_state = DNC(inputs, dnc_state) else: final_moves = data.get_actions(mode='one') if final_moves == []: break data.send_action(final_moves[0]) mask = data.phase_oh[2].unsqueeze(0) vec = data.vec_to_ix(final_moves[0]) print('\n') print(data.human_readable(vec, mask)) inputs2 = Variable(torch.cat([mask, vec], 1)) logits, dnc_state = DNC(inputs2, dnc_state) for _ in range(args.num_tests): # ask where is ---? masked_input, mask_chunk, ground_truth = data.masked_input( ) print("Context:", data.human_readable(ground_truth)) print("Q:") logits, dnc_state = DNC(Variable(masked_input), dnc_state) expanded_logits = data.ix_input_to_ixs(logits) #losses lstep = l.action_loss(expanded_logits, ground_truth, criterion, log=True) #update counters prediction = u.get_prediction(expanded_logits, [3, 4]) print("A:") n_total, n_correct = tick(n_total, n_correct, mask_chunk, prediction) print("correct:", mask_chunk == prediction) cum_total.append(n_total) cum_correct.append(n_correct) sl.writer.add_scalar('recall.pct_correct', n_correct / n_total, sl.global_step) print( "trial: {}, step:{}, accy {:0.4f}, cum_score {:0.4f}, loss: {:0.4f}" .format(trial, sl.global_step, n_correct / n_total, u.running_avg(cum_correct, cum_total), loss.data[0])) return DNC, dnc_state, u.running_avg(cum_correct, cum_total)
def SE_UL(Hhat: ndarray, C: ndarray, R, tau_c, tau_p, realization_cnt, M, K, L, p): """ calculates the uplink spectral efficiency for different receive combining Parameters ---------- Hhat (M, realization_cnt, K, L, L) MMSE channel estimates C (M, M, K, L, L) estimation error correlation matrix with MMSE estimation R tau_c tau_p realization_cnt M K L p: float uplink transmit power (same for all here) Returns ------- """ methods = ['MR', 'RZF', 'MMMSE', 'ZF', 'SMMSE'] V = {} # sum of all estimation error correlation matrices at every BS # shape (M, M, L) C_totM = np.reshape(p * C.sum(axis=(2, 3)), (M, M, L)) # sum of intra-cell estimation error correlation matrices at every BS CR_totS = np.zeros([M, M, L], dtype=np.complex) for j in range(L): all_other_cells = np.ones((L, ), dtype=np.bool) all_other_cells[j] = False CR_totS[:, :, j] = p * (C[:, :, :, j, j].sum(axis=2) + R[:, :, :, all_other_cells, j].sum(axis=(2, 3))) prelog_factor = (tau_c - tau_p) / tau_c SE = {} for method in methods: SE[method] = np.zeros([K, L]) for n in range(realization_cnt): for j in range(L): # matlab uses F order, shape(M, KL) Hhat_allj = Hhat[:, n, :, :, j].reshape(M, K * L, order='F') ue = np.arange(K * j, K * j + K) Hhat_j = Hhat_allj[:, K * j:K * j + K] V['MR'] = mr_combining(Hhat_allj, ue) V['RZF'] = rzf_combining(Hhat_allj, ue, p) V['ZF'] = zf_combining(Hhat_allj, ue) V['MMMSE'] = mmmse_combining(Hhat_allj, ue, p, C_totM[:, :, j]) V['SMMSE'] = smmse_combining(Hhat_allj, ue, p, CR_totS[:, :, j]) for k in range(K): for method in methods: v = V[method][:, k] # v: (M, ), Hhat: (M,) numerator = p * (np.abs(v.conj() @ Hhat[:, n, k, j, j])**2) # Hhat_allj: (M, K*L) denominator = p * np.sum(np.abs(v.conj() @ Hhat_allj) ** 2) + \ v.conj() @ (C_totM[:, :, j] + np.eye(M)) @ v - numerator SE[method][k, j] = running_avg( n, SE[method][k, j], prelog_factor * np.log2(1 + numerator / denominator).real) return SE