def train(model, params, optimizer, q_a_data, q_target_data, answer_data): N = int(math.floor(len(q_a_data) / params.batch_size)) shuffle_index = np.random.permutation(q_a_data.shape[0]) q_a_data = q_a_data[shuffle_index] q_target_data = q_target_data[shuffle_index] answer_data = answer_data[shuffle_index] pred_list = [] target_list = [] epoch_loss = 0 model.train() for idx in range(N): q_a_seq = q_a_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] q_target_seq = q_target_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] answer_seq = answer_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = (answer_seq - 1) / params.n_question target = np.floor(target) input_q_target = utils.variable(torch.LongTensor(q_target_seq), params.gpu) input_x = utils.variable(torch.LongTensor(q_a_seq), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat( [target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) input_q_target_to_1d = torch.chunk(input_q_target, params.batch_size, 0) input_q_target_1d = torch.cat( [input_q_target_to_1d[i] for i in range(params.batch_size)], 1) input_q_target_1d = input_q_target_1d.permute(1, 0) model.zero_grad() loss, filtered_pred, filtered_target = model(input_x, input_q_target_1d, target_1d) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), params.max_grad_norm) optimizer.step() epoch_loss += utils.to_scalar(loss) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) return epoch_loss / N, accuracy, auc
def eval_ICNN_WDR_Omer(icnn, extremums_of_action_space, states_sequence, actions_sequence, rewards_sequence, fence_posts, trans_as_tuples, gamma, state_values=None): if state_values == None: state_values = np.load('cluster_centers_750.npy') q_clinician = np.load('q_clinician.npy') num_states = q_clinician.shape[0] num_actions = q_clinician.shape[1] pi_behavior_table = np.zeros((num_states,num_actions)) pi_evaluation_table = np.zeros((num_states,num_actions)) min0, max0, min1, max1 = extremums_of_action_space for i,s in enumerate(state_values): # print('computing integral for state:',s) Q_integral = ICNN.compute_integral(icnn, variable(s), min0, max0, min1, max1) # print('Q_integral for state', i, ':',Q_integral.data.numpy()) # print('type Q_integral', type(Q_integral), ' ---- Value Q_integral:',Q_integral) pi_behavior_table[i,np.argmax(q_clinician[i,:])]=1 # We want pi_evaluation to be a discretized policy. It will take a state as input # and will return a discrete proba distro over the 25 possible actions. ''' IMPORTANT TODO: discretize actions following the bins used in HW3 ''' pi_evaluation_table[i,:] = ICNN.discretize_Q(icnn, variable(s), Q_integral, min0, max0, min1, max1).squeeze().data.numpy() print('pi_eval computed. First row:',pi_evaluation_table[0,:]) print('How close is each row to an actual distribution? Lets see if the first 20 rows sum to one:', [sum(pi_evaluation_table[i,:] for i in range(20))]) print('Now starting WDR calculations') return off_policy_per_decision_weighted_doubly_robust(states_sequence, actions_sequence, rewards_sequence, fence_posts, trans_as_tuples, gamma, pi_evaluation_table, pi_behavior_table)
def zero_state(self, batch_size): """ Create an initial hidden state of zeros :param batch_size: the batch size :return: A tuple of two tensors (h and c) of zeros of the shape of (batch_size x hidden_size) """ # The axes semantics are (num_layers, batch_size, hidden_dim) nb_layers = 1 state_shape = (nb_layers, batch_size, self.hidden_size) ############################## ### Insert your code below ### ############################## #weight = next(self.parameters()).data #h0 = Variable(weight.new(nb_layers, batch_size, self.hidden_size).zero_()) h0 = variable(torch.zeros(state_shape)) #c0 = Variable(weight.new(nb_layers, batch_size, self.hidden_size).zero_()) c0 = variable(torch.zeros(state_shape)) ############################### ### Insert your code above #### ############################### return h0, c0
def validation(model: nn.Module, criterion, valid_loader) -> Dict[str, float]: ''' Calculates the validation error based on validation data. Returns Validation loss (incl. dice loss) as avg loss. :param model: :param criterion: :param valid_loader: :return: ''' model.eval() losses = [] dice = [] for inputs, targets in valid_loader: inputs = utils.variable(inputs, volatile=True) targets = utils.variable(targets) outputs = model(inputs) loss = criterion(outputs, targets) losses.append(loss.data[0]) dice += [get_dice(targets, (outputs > 0.5).float()).data[0]] valid_loss = np.mean(losses) # type: float valid_dice = np.mean(dice) print('Valid loss: {:.5f}, dice: {:.5f}'.format(valid_loss, valid_dice)) metrics = {'valid_loss': valid_loss, 'dice_loss': valid_dice} return metrics
def validate(args, model: CharRNN, criterion, char_to_id, pbar=False): model.eval() valid_corpus = Path(args.valid_corpus).read_text(encoding='utf8') batch_size = 1 window_size = 4096 hidden = model.init_hidden(batch_size) total_loss = n_chars = 0 total_word_loss = n_words = 0 r = tqdm.trange if pbar else range for idx in r( 0, min(args.valid_chars or len(valid_corpus), len(valid_corpus) - 1), window_size): chunk = valid_corpus[idx:idx + window_size + 1] inputs = variable(char_tensor(chunk[:-1], char_to_id).unsqueeze(0), volatile=True) targets = variable(char_tensor(chunk[1:], char_to_id).unsqueeze(0)) losses = [] for c in range(inputs.size(1)): output, hidden = model(inputs[:, c], hidden) loss = criterion(output.view(batch_size, -1), targets[:, c]) losses.append(loss.data[0]) n_chars += 1 total_loss += np.sum(losses) word_losses = word_loss(chunk, losses) total_word_loss += np.sum(word_losses) n_words += len(word_losses) mean_loss = total_loss / n_chars mean_word_perplexity = np.exp(total_word_loss / n_words) print('Validation loss: {:.3}, word perplexity: {:.1f}'.format( mean_loss, mean_word_perplexity)) return { 'valid_loss': mean_loss, 'valid_word_perplexity': mean_word_perplexity }
def validation_binary(model: nn.Module, criterion, valid_loader, num_classes=None): model.eval() losses = [] jaccard = [] for inputs, targets in valid_loader: inputs = utils.variable(inputs, volatile=True) targets = utils.variable(targets) outputs = model(inputs) loss = criterion(outputs, targets) losses.append(loss.data[0]) jaccard += [get_jaccard(targets, (outputs > 0).float()).data[0]] valid_loss = np.mean(losses) # type: float valid_jaccard = np.mean(jaccard) print('Valid loss: {:.5f}, jaccard: {:.5f}'.format(valid_loss, valid_jaccard)) metrics = {'valid_loss': valid_loss, 'jaccard_loss': valid_jaccard} return metrics
def train_on_task(self, train_loader, ind_task, epoch, additional_loss): self.net.train() epoch_loss = 0 correct = 0 train_loader.shuffle_task() for data, target in train_loader: data, target = variable(data), variable(target) if self.gpu_mode: data, target = data.cuda(self.device), target.cuda(self.device) self.optimizer.zero_grad() output = self.net(data) loss = F.cross_entropy(output, target) epoch_loss += loss.item() if additional_loss is not None: regularization = additional_loss(self.net) if regularization is not None: loss += regularization loss.backward() self.optimizer.step() correct += (output.max(dim=1)[1] == target).data.sum() if self.verbose: print('Train eval : task : ' + str(ind_task) + " - correct : " + str(correct) + ' / ' + str(len(train_loader))) return epoch_loss / np.float( len(train_loader)), 100. * correct / np.float(len(train_loader))
def visualize_results(self, epoch, classe=None, fix=True): sample_size = 100 # index allows, if there 5 task, to plot 2 classes for first task index = int(self.num_classes / self.num_task) * (classe + 1) self.G.eval() dir_path = self.result_dir if classe is not None: dir_path = self.result_dir + '/classe-' + str(classe) if not os.path.exists(dir_path): os.makedirs(dir_path) image_frame_dim = int(np.floor(np.sqrt(self.sample_num))) if self.conditional: y = torch.LongTensor(range(self.sample_num)) % self.num_classes y = y.view(self.sample_num, 1) y_onehot = torch.FloatTensor(self.sample_num, self.num_classes) y_onehot.zero_() y_onehot.scatter_(1, y, 1.0) y_onehot = variable(y_onehot) else: y_onehot = None if fix: """ fixed noise """ if self.conditional: samples = self.G(self.sample_z_, y_onehot) else: samples = self.G(self.sample_z_) else: """ random noise """ sample_z_ = variable(self.random_tensor(self.sample_num, self.z_dim), volatile=True) if self.conditional: samples = self.G(sample_z_, y_onehot) else: samples = self.G(self.sample_z_) if self.input_size == 1: if self.gpu_mode: samples = samples.cpu().data.numpy() else: samples = samples.data.numpy() samples = samples.transpose(0, 2, 3, 1) save_images( samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], dir_path + '/' + self.model_name + '_epoch%03d' % epoch + '.png') else: save_image(samples[:self.sample_num].data, dir_path + '/' + self.model_name + '_epoch%03d' % epoch + '.png', padding=0)
def predict(model, test_iter, cuda=CUDA_DEFAULT): # Monitoring loss total_loss = 0 count = 0 criterion = get_criterion(model.model_str) for batch in test_iter: # Get data img, label = batch label = one_hot(label) img = img.view(img.size(0), -1) img, label = variable(img, cuda=cuda), variable(label, to_float=False, cuda=cuda) batch_size = img.size(0) if cuda: img = img.cuda() label = label.cuda() # predict kwargs = _get_kwargs(model.model_str, img, label) output = model.forward(**kwargs) output = (output[0].view(batch_size, -1), output[1], output[2]) loss = criterion(img, output) # monitoring count += batch_size total_loss += t.sum(loss.data) # cut graph with .data # monitoring avg_loss = total_loss / count print("Validation loss is %.4f" % avg_loss) return avg_loss
def validation_binary(model: nn.Module, criterion, valid_loader): model.eval() losses = [] jaccard = [] dice = [] for inputs, targets in valid_loader: inputs = utils.variable(inputs, volatile=True) targets = utils.variable(targets) outputs = model(inputs) loss = criterion(outputs, targets) losses.append(loss.data[0]) jaccard += get_jaccard(targets, outputs) dice += get_dice(targets, outputs) valid_loss = np.mean(losses) # type: float valid_jaccard = np.mean(jaccard).astype(np.float64) valid_dice = np.mean(dice).astype(np.float64) print('Valid loss: {:.5f}, jaccard: {:.5f}, dice: {:.5f}'.format( valid_loss, valid_jaccard, valid_dice)) metrics = { 'valid_loss': valid_loss, 'jaccard_loss': valid_jaccard, 'dice_loss': valid_dice } return metrics
def train(model, params, optimizer, q_data, qa_data): N = int(math.floor(len(q_data) / params.batch_size)) # batch的数量 # shuffle data shuffle_index = np.random.permutation(q_data.shape[0]) q_data = q_data[shuffle_index] qa_data = qa_data[shuffle_index] pred_list = [] target_list = [] epoch_loss = 0 model.train() for idx in range(N): q_one_seq = q_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = (target - 1) / params.n_question target = np.floor(target) # 向下取整 input_q = utils.variable(torch.LongTensor(q_one_seq), params.gpu) input_qa = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat( [target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) # 维度换位 model.zero_grad() loss, filtered_pred, filtered_target = model(input_q, input_qa, target_1d) loss.backward() # 每一个batch做一次反向传播 nn.utils.clip_grad_norm_(model.parameters(), params.max_grad_norm) optimizer.step() epoch_loss += utils.to_scalar(loss) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) # if (idx + 1) % params.decay_epoch == 0: # utils.adjust_learning_rate(optimizer, params.init_lr * params.lr_decay) # print('lr: ', params.init_lr / (1 + 0.75)) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) # f1 = metrics.f1_score(all_target, all_pred) return epoch_loss / N, accuracy, auc
def random_batch(corpus: str, *, batch_size: int, window_size: int, char_to_id: CharToId) -> Tuple[Variable, Variable]: inputs = torch.LongTensor(batch_size, window_size) targets = torch.LongTensor(batch_size, window_size) for bi in range(batch_size): idx = random.randint(0, len(corpus) - window_size) chunk = corpus[idx:idx + window_size + 1] inputs[bi] = char_tensor(chunk[:-1], char_to_id) targets[bi] = char_tensor(chunk[1:], char_to_id) return variable(inputs), variable(targets)
def argmin(icnn, s, min0, max0, min1, max1): """ The minimum of the icnn on the rectangle is attained in one of the corners because it is concave """ return float(min([ icnn.forward(s, variable([min0, min1])).data.numpy()[0], icnn.forward(s, variable([min0, max1])).data.numpy()[0], icnn.forward(s, variable([max0, min1])).data.numpy()[0], icnn.forward(s, variable([max0, max1])).data.numpy()[0] ]))
def produce(self, start_tokens=None, max_len=20): """ Generate a tweet using the provided start tokens at the inputs on the initial timesteps :param start_tokens: A tensor of the shape (n,) where n is the number of start tokens :param max_len: Maximum length of the tweet :return: Indices of the tokens of the generated tweet """ hidden = self.cell_zero_state(1) x_i = variable(np.full((1, ), self.init_token)) if start_tokens is not None: start_tokens = variable(start_tokens) outputs = [] #print start_tokens for i in range(max_len): ############################## ### Insert your code below ### # `x_i` should be the output of the network at the current timestep ############################## if i == 0: inputs = self.embedding(x_i) elif i < start_tokens.size(0): inputs = self.embedding(start_tokens[i]) else: inputs = self.embedding(x_i) inputs = inputs.view(1, 1, 200) output, hn = self.gru(inputs, hidden) hidden = hn output = self.projection(output) output = F.softmax(output, dim=2) #print output.size() x_i = torch.multinomial(output.squeeze(), 1) ############################### ### Insert your code above #### ############################## outputs.append(x_i) if x_i == self.eos_token: break outputs = torch.cat(outputs) return outputs
def validation_multi(model: nn.Module, criterion, valid_loader, num_classes): model.eval() losses = [] jaccard = [] # confusion_matrix = np.zeros( # (num_classes, num_classes), dtype=np.uint32) for inputs, targets in valid_loader: inputs = utils.variable(inputs, volatile=True) targets = utils.variable(targets) outputs = model(inputs) loss = criterion(outputs, targets) losses.append(loss.data[0]) for cls in range(num_classes): if cls == 0: jaccard_target = (targets[:, 0] == cls).float() else: jaccard_target = (targets[:, cls - 1] == 1).float() # jaccard_output = outputs[:, cls].exp() jaccard_output = F.sigmoid(outputs[:, cls]) jaccard += [get_jaccard(jaccard_target, jaccard_output)] # intersection = (jaccard_output * jaccard_target).sum() # # union = jaccard_output.sum() + jaccard_target.sum() + eps # output_classes = outputs[:, 0].data.cpu().numpy().argmax(axis=1) # target_classes = targets[:, 0].data.cpu().numpy() # confusion_matrix += calculate_confusion_matrix_from_arrays( # output_classes, target_classes, num_classes) # confusion_matrix = confusion_matrix[1:, 1:] # exclude background valid_loss = np.mean(losses) # type: float valid_jaccard = np.mean(jaccard) # ious = {'iou_{}'.format(cls + 1): iou # for cls, iou in enumerate(calculate_iou(confusion_matrix))} # # dices = {'dice_{}'.format(cls + 1): dice # for cls, dice in enumerate(calculate_dice(confusion_matrix))} # # average_iou = np.mean(list(ious.values())) # average_dices = np.mean(list(dices.values())) # print( # 'Valid loss: {:.4f}, average IoU: {:.4f}, average Dice: {:.4f}'.format(valid_loss, average_iou, average_dices)) # print( # 'Valid loss: {:.4f}'.format(valid_loss)) print('Valid loss: {:.5f}, jaccard: {:.5f}'.format(valid_loss, valid_jaccard.data[0])) # metrics = {'valid_loss': valid_loss, 'iou': average_iou} # metrics = {'valid_loss': valid_loss} metrics = {'valid_loss': valid_loss, 'jaccard_loss': valid_jaccard.data[0]} # metrics.update(ious) # metrics.update(dices) return metrics
def test(model, params, optimizer, q_data, qa_data, a_data): N = int(math.floor(len(q_data) / params.batch_size)) pred_list = [] target_list = [] epoch_loss = 0 model.eval() for idx in range(N): q_one_seq = q_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] a_batch_seq = a_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = (target - 1) / params.n_question target = np.floor(target) input_q = utils.variable(torch.LongTensor(q_one_seq), params.gpu) input_qa = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu) input_a = utils.variable(torch.LongTensor(a_batch_seq), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat( [target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) loss, filtered_pred, filtered_target = model.forward( input_q, input_qa, input_a, target_1d) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) epoch_loss += utils.to_scalar(loss) # print("testing : batch " + str(idx) + " finished!") all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) return epoch_loss / N, accuracy, auc
def Q_minimize(Q_func, s, extremums_of_action_space): """ The minimum of the icnn on the action space is attained on one of the edges of that space, because it is concave """ min0, max0, min1, max1 = extremums_of_action_space return float(min([ Q_func(s, variable([min0, min1])).data.numpy()[0], Q_func(s, variable([min0, max1])).data.numpy()[0], Q_func(s, variable([max0, min1])).data.numpy()[0], Q_func(s, variable([max0, max1])).data.numpy()[0] ]))
def train(num_epochs, model, params, optimizer, q_data, qa_data): N = len(q_data) // params.batch_size pred_list = [] target_list = [] epoch_loss = 0 # turn the status of model to the train status model.train() for idx in range(N): q_one_seq = q_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = (target - 1) / params.n_question target = np.floor(target) input_q = utils.variable(torch.LongTensor(q_one_seq), params.gpu) input_qa = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat( [target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) model.zero_grad() loss, filtered_pred, filtered_target = model.forward( input_q, input_qa, target_1d) loss.backward() nn.utils.clip_grad_norm(model.parameters(), params.maxgradnorm) optimizer.step() epoch_loss += utils.to_scalar(loss) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) return epoch_loss / N, accuracy, auc
def zero_state(self, batch_size): """ Create an initial hidden state of zeros :param batch_size: the batch size :return: A tuple of two tensors (h and c) of zeros of the shape of (batch_size x hidden_size) """ # The axes semantics are (num_layers, batch_size, hidden_dim) nb_layers = 2 state_shape = (nb_layers, batch_size, self.hidden_size) h0 = variable(torch.zeros(state_shape)) c0 = variable(torch.zeros(state_shape)) return h0, c0
def validation(model, criterion, valid_loader): model.eval() losses = [] f2_scores = [] for inputs, targets in valid_loader: inputs = utils.variable(inputs, volatile=True) targets = utils.variable(targets) outputs = model(inputs) loss = criterion(outputs, targets) losses.append(loss.data[0]) f2_scores.append(f2_score(y_true=targets.data.cpu().numpy(), y_pred=F.sigmoid(outputs).data.cpu().numpy() > 0.2)) valid_loss = np.mean(losses) # type: float valid_f2 = np.mean(f2_scores) # type: float print('Valid loss: {:.4f}, F2: {:.4f}'.format(valid_loss, valid_f2)) return {'valid_loss': valid_loss, 'valid_f2': valid_f2}
def validation(model, criterion, valid_loader): model.eval() losses = [] accuracy_scores = [] for inputs, targets in valid_loader: inputs = utils.variable(inputs, volatile=True) targets = utils.variable(targets) outputs = model(inputs) loss = criterion(outputs, targets) losses.append(loss.data[0]) accuracy_scores += list(targets.data.cpu().numpy() == np.argmax(outputs.data.cpu().numpy(), axis=1)) valid_loss = np.mean(losses) # type: float valid_accuracy = np.mean(accuracy_scores) # type: float print('Valid loss: {:.4f}, accuracy: {:.4f}'.format(valid_loss, valid_accuracy)) return {'valid_loss': valid_loss, 'accuracy': valid_accuracy}
def forward(self, q_data, qa_data, target, student_id=None): batch_size = q_data.shpae[0] seqlen = q_data.shape[1] q_embed_data = self.q_embed(q_data) qa_embed_data = self.qa_embed(qa_data) memory_value = nn.Parameter( torch.cat([ self.init_memory_value.unsqueeze(0) for _ in range(batch_size) ], 0).data) self.mem.set_memory_value(memory_value) slice_q_data = torch.chunk(q_data, seqlen, 1) slice_q_embed_data = torch.chunk(q_embed_data, seqlen, 1) slice_qa_embed_data = torch.chunk(qa_embed_data, seqlen, 1) value_read_content_1 = [] input_embed_1 = [] predict_logs = [] for i in range(seqlen): # attention q = slice_q_embed_data[i].squeeze(1) correlation_weight = self.mem.attention(q) if_memory_write = slice_q_data[i].squeeze(1).ge(1) if_memory_write = utils.variable( torch.FloatTensor(if_memory_write.data.tolist()), 1) # read read_content = self.mem.read(correlation_weight) value_read_content_1.append(read_content) input_embed_1.append(q) # write qa = slice_qa_embed_data[i].squeeze(1) new_memory_value = self.mem.write(correlation_weight, qa) all_read_value_content = torch.cat( [value_read_content_1[i].squeeze(1) for i in range(seqlen)], 1) input_embed_content = torch.cat( [input_embed_1[i].squeeze(1) for i in range(seqlen)], 1) predict_input = torch.cat( [all_read_value_content, input_embed_content], 2) read_content_embed = torch.tanh( self.read_embed_linear( predict_input.reshape(batch_size * seqlen, -1))) pred = self.predict_linear(read_content_embed) target_1d = target mask = target_1d.ge(0) pred_1d = pred.reshape(-1, 1) filtered_pred = torch.masked_select(pred_1d, mask) filtered_target = torch.masked_select(target_1d, mask) loss = nn.functional.binary_cross_entropy_with_logits( filtered_pred, filtered_target) return loss, torch.sigmoid(filtered_pred), filtered_target
def validation(model, criterion, valid_loader): model.eval() losses = [] accuracy_scores = [] for inputs, targets in valid_loader: inputs = utils.variable(inputs, volatile=True) targets = utils.variable(targets) outputs = model(inputs) loss = criterion(outputs, targets) losses.append(loss.data[0]) accuracy_scores += list(targets.data.cpu().numpy() == np.argmax(outputs.data.cpu().numpy(), axis=1)) valid_loss = np.mean(losses) # type: float valid_accuracy = np.mean(accuracy_scores) # type: float print('Valid loss: {:.4f}, accuracy: {:.4f}'.format(valid_loss, valid_accuracy)) return {'valid_loss': valid_loss, 'accuracy': valid_accuracy}
def predict(model, from_file_names, batch_size: int, to_path, problem_type): loader = DataLoader( dataset=RoboticsDataset(from_file_names, transform=img_transform, mode='predict', problem_type=problem_type), shuffle=False, batch_size=batch_size, num_workers=args.workers, pin_memory=torch.cuda.is_available() ) for batch_num, (inputs, paths) in enumerate(tqdm(loader, desc='Predict')): inputs = utils.variable(inputs, volatile=True) outputs = model(inputs) for i, image_name in enumerate(paths): if problem_type == 'binary': factor = prepare_data.binary_factor t_mask = (F.sigmoid(outputs[i, 0]).data.cpu().numpy() * factor).astype(np.uint8) elif problem_type == 'parts': factor = prepare_data.parts_factor t_mask = (outputs[i].data.cpu().numpy().argmax(axis=0) * factor).astype(np.uint8) elif problem_type == 'instruments': factor = prepare_data.instrument_factor t_mask = (outputs[i].data.cpu().numpy().argmax(axis=0) * factor).astype(np.uint8) h, w = t_mask.shape full_mask = np.zeros((original_height, original_width)) full_mask[h_start:h_start + h, w_start:w_start + w] = t_mask instrument_folder = Path(paths[i]).parent.parent.name (to_path / instrument_folder).mkdir(exist_ok=True, parents=True) cv2.imwrite(str(to_path / instrument_folder / (Path(paths[i]).stem + '.png')), full_mask)
def predict(model, from_file_names, batch_size: int, to_path): loader = DataLoader(dataset=AngyodysplasiaDataset(from_file_names, transform=img_transform, mode='predict'), shuffle=False, batch_size=batch_size, num_workers=args.workers, pin_memory=torch.cuda.is_available()) for batch_num, (inputs, paths) in enumerate(tqdm(loader, desc='Predict')): inputs = utils.variable(inputs, volatile=True) outputs = model(inputs) for i, image_name in enumerate(paths): mask = (F.sigmoid(outputs[i, 0]).data.cpu().numpy() * 255).astype( np.uint8) h, w = mask.shape full_mask = np.zeros((576, 576)) full_mask[32:32 + h, 32:32 + w] = mask (to_path / args.model_type).mkdir(exist_ok=True, parents=True) cv2.imwrite( str(to_path / args.model_type / (Path(paths[i]).stem + '.png')), full_mask)
def predict(arg): img_meta, img = arg h, w = img.shape[:2] s = patch_size # step = s // 2 - 32 step = s - 64 xs = list(range(0, w - s, step)) + [w - s] ys = list(range(0, h - s, step)) + [h - s] all_xy = [(x, y) for x in xs for y in ys] pred_img = np.zeros((utils.N_CLASSES + 1, h, w), dtype=np.float32) pred_count = np.zeros((h, w), dtype=np.int32) def make_batch(xy_batch_): return (xy_batch_, torch.stack([ utils.img_transform(img[y: y + s, x: x + s]) for x, y in xy_batch_])) for xy_batch, inputs in utils.imap_fixed_output_buffer( make_batch, tqdm.tqdm(list(utils.batches(all_xy, batch_size))), threads=1): outputs = model(utils.variable(inputs, volatile=True)) outputs_data = np.exp(outputs.data.cpu().numpy()) for (x, y), pred in zip(xy_batch, outputs_data): pred_img[:, y: y + s, x: x + s] += pred pred_count[y: y + s, x: x + s] += 1 pred_img /= np.maximum(pred_count, 1) return img_meta, pred_img
def decoder_initial_inputs(self, batch_size): """ Create initial input the decoder on the first timestep :param inputs: The size of the batch :return: A vector of size (batch_size, ) filled with the index of self.init_idx """ inputs = variable(np.full((1,), self.init_idx)).expand((batch_size,))
def predict(arg): img_meta, img = arg h, w = img.shape[:2] s = patch_size def make_batch(xy_batch_): return (xy_batch_, torch.stack([utils.img_transform(img)])) pred_img = np.zeros((2, s, s), dtype=np.float32) # np.zeros((utils.N_CLASSES + 1, h, w), dtype=np.float32) # pred_count = np.zeros((s, s), dtype=np.int32) inputs = torch.stack([utils.img_transform(img)]) outputs = model(utils.variable(inputs, volatile=True)) # print("outputs", outputs.shape) outputs_data = np.exp(outputs.data.cpu().numpy()) # print("o_data", outputs_data.shape) for pred in outputs_data: pred_img += pred # print("pred", pred) # print("pred_img", pred_img) # for idx, i in enumerate(pred_img): # utils.save_image('_runs/pred-{}-{}.png'.format(img_meta[0].stem, idx), (i > 0.25+idx*0.5).astype(np.float32)) utils.save_image('_runs/pred-{}.png'.format(img_meta[0].stem), colored_prediction(outputs_data[0])) utils.save_image( '_runs/{}-input.png'.format(prefix), skimage.exposure.rescale_intensity(img, out_range=(0, 1))) # utils.save_image( # '_runs/{}-target.png'.format(prefix), # colored_prediction(target_one_hot.astype(np.float32))) return img_meta, pred_img
def predict(model, paths, batch_size: int, aug=False, transform=None): loader = DataLoader( dataset=PredictionDataset(paths, aug, transform), shuffle=False, batch_size=batch_size, num_workers=args.workers, pin_memory=torch.cuda.is_available() ) threshold = 1e-5 # will cut off 99% of the data model.eval() all_outputs = [] all_stems = [] for inputs, stems in tqdm(loader, desc='Predict'): inputs = utils.variable(inputs, volatile=True) outputs = F.softmax(model(inputs)) outputs[outputs < threshold] = 0 sparse_outputs = csr_matrix(outputs.data.cpu().numpy()) all_outputs.append(sparse_outputs) all_stems.extend(stems) return vstack(all_outputs), all_stems
def pi_times_Q(s,a): arr=[] # print('len(s):',len(s)) # print('len(a):',len(a)) for i in range(len(s)): arr.append(variable(pi_evaluation_func(s[i],a[i]))*Q_func(s[i],a[i])) return torch.cat(arr)
def reparameterize(self, mu, logvar): if self.training: std = t.exp(.5 * logvar) eps = variable(np.random.normal(0, 1, (len(mu), self.latent_dim)), cuda=self.is_cuda) return mu + std * eps else: return mu
def generate_digit(model, n, digit): # @todo: modify the function so that it can save the generated images # generate new samples figure = np.zeros((28 * n, 28 * n)) sample = variable(t.randn(n * n, model.latent_dim)) digits = variable(one_hot_np(np.array(n * n * [digit]))) model.eval() sample = model.decode(sample, digits).cpu() model.train() for k, s in enumerate(sample): i = k // n j = k % n digit = s.data.numpy().reshape(28, 28) figure[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = digit plt.figure(figsize=(10, 10)) plt.imshow(figure, cmap='Greys_r') plt.show()
def run_batch(self, x_, t_, additional_loss=None): for p in self.D.parameters(): # reset requires_grad p.requires_grad = True # they are set to False below in netG update self.G.train() self.D.train() x_ = x_.view((-1, 1, 28, 28)) y_onehot = variable(self.get_one_hot(t_)) z_ = variable(torch.rand((x_.size(0), self.z_dim))) # update D network self.D_optimizer.zero_grad() D_real = self.D(x_, y_onehot) D_real_loss = self.BCELoss(D_real[0], self.y_real_[:x_.size(0)]) G_ = self.G(z_, y_onehot) D_fake = self.D(G_, y_onehot) D_fake_loss = self.BCELoss(D_fake[0], self.y_fake_[:x_.size(0)]) D_loss = D_real_loss + D_fake_loss D_loss.backward() self.D_optimizer.step() for p in self.D.parameters(): # reset requires_grad p.requires_grad = False # they are set to False below in netG update # update G network self.G_optimizer.zero_grad() G_ = self.G(z_, y_onehot) D_fake = self.D(G_, y_onehot) G_loss = self.BCELoss(D_fake[0], self.y_real_[:x_.size(0)]) if additional_loss is not None: regularization = additional_loss(self) G_loss += regularization G_loss.backward() self.G_optimizer.step() return G_loss.item()