def __init__(self, labels, img_train, labels_train, img_test, labels_test, **kwargs): self.labels = labels self.x_train = flatten(img_train) self.y_train = to_one_hot(labels_train, len(labels)) self.x_test = flatten(img_test) self.y_test = to_one_hot(labels_test, len(labels)) self.x_train, self.x_val, self.y_train, self.y_val = \ train_test_split(self.x_train, self.y_train, test_size=0.1) self.n = self.x_train.shape[1] self.m = self.y_train.shape[1] self.k = kwargs.pop('hidden_layer_size', 256) self.lr = kwargs.pop('learning_rate', 1e-3) self.dropout = kwargs.pop('dropout', False) self.regularization = kwargs.pop('regularization', False) self.adaptive_lr = kwargs.pop('adaptive_lr', False) self.history_path = kwargs.pop('results_path') self.model_path = kwargs.pop('model_path') self.init_nn() self.saver = tf.train.Saver()
def fit(self, train_lbs): _, loss = self.sess.run( [self.train, self.loss], { self.x: to_one_hot(add_swap_noise(train_lbs)), self.y: to_one_hot(train_lbs) }) return loss
def filter_pair(union_bbox, subject_bbox, object_bbox, subject_id, object_id, img): union_mask = torch.zeros(*img.shape[:2]) union_mask[union_bbox[0]:union_bbox[1], union_bbox[2]:union_bbox[3]] = 1 subject_mask = torch.zeros(*img.shape[:2]) subject_mask[subject_bbox[0]:subject_bbox[1], subject_bbox[2]:subject_bbox[3]] = 1 object_mask = torch.zeros(*img.shape[:2]) object_mask[object_bbox[0]:object_bbox[1], object_bbox[2]:object_bbox[3]] = 1 mask = torch.cat([ union_mask.unsqueeze(0), subject_mask.unsqueeze(0), object_mask.unsqueeze(0) ], 0) mask = spatial_transform(mask).unsqueeze(0) object_dist = torch.FloatTensor([to_one_hot(object_id, 100)]) subject_dist = torch.FloatTensor([to_one_hot(subject_id, 100)]) inputs = [mask, subject_dist, object_dist] inputs = [torch.autograd.Variable(x.cuda(), volatile=True) for x in inputs] output = model.net.forward(inputs) _, result = output.max(1) if result.data[0] == 1: return True else: return False
def source_target_separate(datasets, sources, targets): N1 = len(sources.keys()) N_domain = N1 + len(targets.keys()) domain_idx = 0 sets = {} for key in sources.keys(): sources[key] = domain_idx sets[key] = domain_idx domain_idx = domain_idx + 1 for key in targets.keys(): targets[key] = domain_idx sets[key] = domain_idx domain_idx = domain_idx + 1 for key in datasets.keys(): datasets[key]['train']['domains'] = to_one_hot( sets[key] * np.ones((datasets[key]['train']['images'].shape[0], )).astype('float32'), N_domain) datasets[key]['valid']['domains'] = to_one_hot( sets[key] * np.ones((datasets[key]['valid']['images'].shape[0], )).astype('float32'), N_domain) datasets[key]['test']['domains'] = to_one_hot( sets[key] * np.ones(( datasets[key]['test']['images'].shape[0], )).astype('float32'), N_domain) return datasets
def _data(data_pth, split_val=True, verbose=1): data = np.load(data_pth, allow_pickle=True) x, y = data['x'], data['y'] x = x[:, :, np.newaxis] x_train, y_train, x_test, y_test = split_data(x, y) class_weights_dict = calc_class_weights(y_train) if split_val: x_train, y_train, x_val, y_val = split_data(x_train, y_train) y_train = to_one_hot(y_train, dimension=10) y_test = to_one_hot(y_test, dimension=10) y_val = to_one_hot(y_val, dimension=10) if verbose: print('\nx_train shape: %s' '\ny_train shape: %s' '\nx_test shape: %s' '\ny_test shape: %s' '\nx_val shape: %s' '\ny_val shape: %s' % (x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape)) return x_train, y_train, x_test, y_test, x_val, y_val, class_weights_dict else: y_train = to_one_hot(y_train, dimension=10) y_test = to_one_hot(y_test, dimension=10) if verbose: print('\nx_train shape: %s' '\ny_train shape: %s' '\nx_test shape: %s' '\ny_test shape: %s' % (x_train.shape, y_train.shape, x_test.shape, y_test.shape)) return x_train, y_train, x_test, y_test, class_weights_dict
def forward(self, states, action): cuda = states.is_cuda batch_size = states.size(0) num_nodes = states.size(1) if self.immovable_bit: # add movable/immovable bit (the first two objects are immovable, this is hardcoded for now) tmp = torch.zeros_like(states[:, :, 0:1]) tmp[:, :2, :] = 1.0 states = torch.cat([states, tmp], dim=2) # states: [batch_size (B), num_objects, embedding_dim] # node_attr: Flatten states tensor to [B * num_objects, embedding_dim] node_attr = states.view(-1, self.input_dim) edge_attr = None edge_index = None if num_nodes > 1: # edge_index: [B * (num_objects*[num_objects-1]), 2] edge list edge_index = self._get_edge_list_fully_connected( batch_size, num_nodes, cuda) row, col = edge_index edge_attr = self._edge_model( node_attr[row], node_attr[col], edge_attr, source_indices=(row % self.num_objects).cpu().numpy(), target_indices=(col % self.num_objects).cpu().numpy()) if not self.ignore_action: if self.copy_action: action_vec = utils.to_one_hot( action, self.action_dim).repeat(1, self.num_objects) action_vec = action_vec.view(-1, self.action_dim) else: if not self.factored_continuous_action: action_vec = utils.to_one_hot(action, self.action_dim * num_nodes) else: action_vec = action action_vec = action_vec.view(-1, self.action_dim) # Attach action to each state node_attr = torch.cat([node_attr, action_vec], dim=-1) node_attr = self._node_model( node_attr, edge_index, edge_attr) # [batch_size, num_nodes, hidden_dim] node_attr = node_attr.view(batch_size, num_nodes, -1) if self.immovable_bit: # object embeddings have an additional bit for movable/immovable objects # we do not need to predict that node_attr = node_attr[:, :, :self.input_dim - 1] return node_attr
def fit(self, mov_ims, fix_ims, mov_lbs, fix_lbs): _, loss = self.sess.run( [self.train, self.loss], { self.x: mov_ims, self.y: fix_ims, self.xlabel: to_one_hot(mov_lbs), self.ylabel: to_one_hot(fix_lbs) }) return loss
def forward(self, states, action): cuda = states.is_cuda batch_size = states.size(0) num_nodes = states.size(1) # states: [batch_size (B), num_objects, embedding_dim] # node_attr: Flatten states tensor to [B * num_objects, embedding_dim] node_attr = states.view(-1, self.input_dim) edge_attr = None edge_index = None if num_nodes > 1: # edge_index: [B * (num_objects*[num_objects-1]), 2] edge list edge_index = self._get_edge_list_fully_connected( batch_size, num_nodes, cuda) row, col = edge_index edge_attr = self._edge_model(node_attr[row], node_attr[col], edge_attr) if not self.ignore_action: if self.action_type == "action_one_hot": if self.copy_action: action_vec = utils.to_one_hot(action, self.action_dim).repeat( 1, self.num_objects) action_vec = action_vec.view(-1, self.action_dim) else: action_vec = utils.to_one_hot(action, self.action_dim * num_nodes) action_vec = action_vec.view(-1, self.action_dim) else: assert self.action_type == "action_image" assert not self.copy_action # here each object's action vec is learned separately # action_vec = self.act_extractor(action) action_vec = self.act_encoder(action) action_vec = action_vec.view(-1, self.action_dim).repeat( self.num_objects, 1) # Attach action to each state node_attr = torch.cat([node_attr, action_vec], dim=-1) node_attr = self._node_model(node_attr, edge_index, edge_attr) # [batch_size, num_nodes, hidden_dim] return node_attr.view(batch_size, num_nodes, -1)
def cw(model, X, y, verbose=False, params={}): C = params.get('C', 0.0001) niters = params.get('niters', 50) step_size = params.get('step_size', 0.01) confidence = params.get('confidence', 0.0001) img_min = params.get('img_min', 0.0) img_max = params.get('img_max', 1.0) Xt = arctanh_rescale(X, img_min, img_max) Xt_adv = Variable(Xt.data, requires_grad=True) y_onehot = to_one_hot(y, model.num_actions).float() optimizer = optim.Adam([Xt_adv], lr=step_size) for i in range(niters): logits = model.forward(tanh_rescale(Xt_adv, img_min, img_max)) real = (y_onehot * logits).sum(dim=1) other = ((1.0 - y_onehot) * logits - (y_onehot * TARGET_MULT)).max(1)[0] loss1 = torch.clamp(real - other + confidence, min=0.) loss2 = torch.sum((X - tanh_rescale(Xt_adv, img_min, img_max)).pow(2), dim=[1, 2, 3]) loss = loss1 + loss2 * C optimizer.zero_grad() model.features.zero_grad() loss.backward() optimizer.step() # if verbose: # print('loss1: {}, loss2: {}'.format(loss1, loss2)) return tanh_rescale(Xt_adv, img_min, img_max).data
def do_test(model, data, measures): start_time = time.time() input_size = len(_g.vocab) if not _g.args.quiet: print('Testing...') criterion = nn.NLLLoss(ignore_index=_g.vocab.stoi[_g.padding_symbol]) losses = None so_far = 0 try: for i, batch in zip(range(len(data)), data): # TODO necessary for now to do it this way loss = _t.evaluate(model, criterion, _u.to_one_hot(batch.before, input_size), batch.after, measures) loss = loss.unsqueeze(dim=1) losses = loss if losses is None else torch.cat((losses, loss), dim=1) so_far = i+1 if not _g.args.quiet: print('Testing done successfully') except KeyboardInterrupt: print('\nExiting earlier than expected. Wait a moment!') losses = losses.mean(dim=1) text = 'Test {} elements in {}.'.format(so_far * data.batch_size, _u.pretty_print_time(time.time() - start_time)) eval_measures = _u.to_builtin({n: (x,y) for n,x,y in zip(['loss'] + list(measures.keys()), losses[::2], losses[1::2])}) for i, j in eval_measures.items(): text += ' ' + i + ' {:5.6f}({:5.6f}).'.format(j[0], j[1]) if not _g.args.quiet: print(text)
def test_text_disciminator(): d_batch = 4 d_max_seq_len = 52 quora_default_args = torch.load( QUORA_PARAPHRASE_PRETRAINED_DEFAULT_CONFIG_PATH) quora_word_id_to_word = torch.load( QUORA_PARAPHRASE_PRETRAINED_WORD_ID_TO_WORD_PATH) word_id_to_word = quora_word_id_to_word d_vocab = len(word_id_to_word) args = edict(quora_default_args) d_text_feature = args.txtSize dis_dropout = args.drop_prob_lm d_dis_cnn = args.cnn_dim device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') text_dis = TextDiscriminator(d_vocab=d_vocab, d_text_feature=d_text_feature, dis_dropout=dis_dropout, d_dis_cnn=d_dis_cnn).to(device).train() text_dis.load_state_dict(torch.load('faster_text_dis_v1.pth')) assert text_dis texts = torch.randint(low=0, high=d_vocab, size=(d_batch, d_max_seq_len)).to(device) text_features, valids = text_dis(to_one_hot(texts, d_vocab)) assert text_features.size() == ( d_batch, d_text_feature) and text_features.dtype == torch.float assert valids.size() == (d_batch, ) and valids.dtype == torch.float text_log_probs = -torch.rand(d_batch, d_max_seq_len, d_vocab).to(device) text_features, valids = text_dis(text_log_probs) assert text_features.size() == ( d_batch, d_text_feature) and text_features.dtype == torch.float assert valids.size() == (d_batch, ) and valids.dtype == torch.float
def train_one_epoch(self, dataloader): self.net.train() self.hook.flag_hook = False for mb, (x, y) in enumerate(dataloader): x, y = x.to(self.device), y.to(self.device) y_one_hot = utils.to_one_hot(y, self.c_dim) if self.cfg.train_random > 0 and (mb + 1) % 10 == 0: with torch.no_grad(): x_rand = torch.randn(size=x.shape).to(self.device) logits_rand = self.net(x_rand) entropy_rand = metrics.entropy( utils.logits_to_probs(logits_rand)) if torch.mean(entropy_rand).item() <= self.thresh_entropy: print( 'training on random inputs & random labels for minibatch {}' .format(mb + 1)) x = torch.randn(size=x.shape).to(self.device) y_one_hot = torch.ones(size=(x.shape[0], self.c_dim)).to( self.device) / self.c_dim self.optimizer.zero_grad() logits = self.net(x) losses = self.criterion(logits, y_one_hot) torch.mean(losses).backward() self.optimizer.step()
def predict(self, thetas, x_test, y_test, modeldir='../model/LR', Onsave=True): x = np.insert(x_test, 0, 1, axis=1) y_real = y_test y_pred = [ np.argmax([self._sigmoid(xi @ theta) for theta in thetas]) for xi in x ] p_real = to_one_hot(y_test) p_pred = [[self._sigmoid(xi @ theta) for theta in thetas] for xi in x] # # save thetas, p_pred of each bi-classifiers # if Onsave: try: np.savez('%s/test_rst.npz' % modeldir, y_real=y_real, y_pred=y_pred, p_real=p_real, p_pred=p_pred) np.savez('%s/thetas.npz' % modeldir, thetas=thetas) except: print('save thetas and test_rst failed') return y_pred
def test_simple(self): mask: torch.Tensor = torch.randint(low=0, high=2, size=(2, 15, 10)) num_classes: int = 3 oh_mask: torch.Tensor = utils.to_one_hot(mask, num_classes) self.assertEqual(oh_mask.size(), torch.Size((2, num_classes, 15, 10))) self.assertTrue(_is_same_tensor(oh_mask.argmax(dim=1), mask))
def forward(self, text_features): ''' encoded : (batch_size, feat_size) seq: (batch_size, seq_len) lengths: (batch_size, ) ''' d_batch = text_features.size(0) device = text_features.device d_max_seq_len = self.d_max_seq_len text_log_probs = torch.zeros(d_batch, d_max_seq_len, self.d_vocab, device=device) text_log_probs[:, 0, :] = torch.log(to_one_hot(torch.tensor(self.start_token), self.d_vocab))[None, :] texts = torch.full((d_batch, d_max_seq_len), self.pad_token, dtype=torch.long, device=device) texts[:, 0] = self.start_token hs = None for i in range(1, d_max_seq_len): if i > 1: word_embeddings = self.embed(words) # (d_batch, 1) -> (d_batch, 1, d_text_feature) else: word_embeddings = text_features.unsqueeze(1) # (d_batch, d_text_feature) -> (d_batch, 1, d_text_feature) log_probs, hs = self.step(word_embeddings, hs) log_probs = log_probs.squeeze(1) # (d_batch, 1, d_vocab) -> (d_batch, 1, d_vocab) text_log_probs[:, i] = log_probs words = torch.multinomial(torch.exp(log_probs), 1) texts[:, i] = words.squeeze(1) # (d_batch, 1) -> (d_batch,) texts, text_log_probs, text_lens = mask_in_place_and_calc_length(texts, text_log_probs, self.end_token, self.pad_token) return texts, text_log_probs, text_lens
def validate(val_loader, model, log): '''evaluate trained model''' losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # Switch to evaluate mode model.eval() for i, (input, target) in enumerate(val_loader): if args.use_cuda: input = input.cuda() target = target.cuda() with torch.no_grad(): output = model(input) target_reweighted = to_one_hot(target, args.num_classes) loss = bce_loss(softmax(output), target_reweighted) # Measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) print_log( '**Test ** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Loss: {losses.avg:.3f} ' .format(top1=top1, top5=top5, error1=100 - top1.avg, losses=losses), log) return top1.avg, losses.avg
def forward_n_layers(self, x, target=None, mixup=False, mixup_hidden=False, mixup_alpha=None, layer_num=None): #print x.shape if self.per_img_std: x = per_image_standardization(x) out = x if target is not None: target_reweighted = to_one_hot(target, self.num_classes) out = self.conv1(out) out = self.layer1(out) if layer_num == 1: return out, target_reweighted out = self.layer2(out) if layer_num == 2: return out, target_reweighted out = self.layer3(out) if layer_num == 3: return out, target_reweighted out = act(self.bn1(out)) out = F.avg_pool2d(out, 8) out = out.view(out.size(0), -1) out = self.linear(out) return out, target_reweighted
def get_latents(self, encodings, probs_b): """Read out latents (z) form input encodings for a single segment.""" readout_mask = probs_b[:, 1:, None] # Offset readout by 1 to left. readout = (encodings[:, :-1] * readout_mask).sum(1) hidden = F.relu(self.head_z_1(readout)) logits_z = self.head_z_2(hidden) # Gaussian latents. if self.latent_dist == 'gaussian': if self.training: mu, log_var = torch.split(logits_z, self.latent_dim, dim=1) sample_z = utils.gaussian_sample(mu, log_var) else: sample_z = logits_z[:, :self.latent_dim] # Concrete / Gumbel softmax latents. elif self.latent_dist == 'concrete': if self.training: sample_z = utils.gumbel_softmax_sample(logits_z, temp=self.temp_z) else: sample_z_idx = torch.argmax(logits_z, dim=1) sample_z = utils.to_one_hot(sample_z_idx, logits_z.size(1)) else: raise ValueError('Invalid argument for `latent_dist`.') return logits_z, sample_z
def update_sr(sr, sequence, discount, learning_rate): """Update SR matrix. Args: sr: n_state x n_state matrix sequence: iterable containing sequence of state inds discount: scalar discount factor between [0 inclusive, 1 exclusive) learning_rate: scalar learning rate between [0 inclusive, 1 inclusive] Returns: updated sr matrix """ n_states = sr.shape[0] for state_ind, state_ind_next in zip(sequence[:-1], sequence[1:]): if (state_ind is not None) and (state_ind_next is not None): state_vec = to_one_hot(state_ind, n_states) # compute successor prediction error: # state observed + discount * SR at next state, minus previous estimate pred_err = state_vec + discount * sr[state_ind_next, :] - sr[ state_ind, :] # use prediction error to update sr[state_ind, :] = sr[state_ind, :] + learning_rate * pred_err return sr
def forward(self, x): state, action = x action = utils.to_one_hot(action, self.action_size) # flatten state batch_size = state.size(0) obj_size = state.size(1) state_r = state.reshape(batch_size * obj_size, state.size(2)) # create keys and queries key_r = self.fc_key(state_r) query = self.fc_query(action) value = self.fc_value(action) key = key_r.reshape(batch_size, obj_size, self.key_query_size) # compute a vector of attention weights, one for each object slot if self.sqrt_scale: weights = F.softmax((key * query[:, None]).sum(dim=2) * (1 / np.sqrt(self.key_query_size)), dim=-1) else: weights = F.softmax((key * query[:, None]).sum(dim=2), dim=-1) if self.ablate_weights: # set uniform weights to check if they provide any benefit weights = torch.ones_like(weights) / weights.shape[1] # create a separate action for each object slot # weights: [|B|, |O|], value: [|B|, value_size] # => [|B|, |O|, value_size] return weights[:, :, None] * value[:, None, :]
def __init__(self, labels, img_train, labels_train, img_test, labels_test, **kwargs): self.labels = labels self.x_train = np.reshape(img_train, img_train.shape + (1, )) self.y_train = to_one_hot(labels_train, len(labels)) self.x_test = np.reshape(img_test, img_test.shape + (1, )) self.y_test = to_one_hot(labels_test, len(labels)) self.n = self.x_train.shape[1:] self.x_train, self.x_val, self.y_train, self.y_val = \ train_test_split(self.x_train, self.y_train, test_size=0.1) self.lr = kwargs.pop('learning_rate', 1e-3) self.history_path = kwargs.pop('results_path') self.model_path = kwargs.pop('model_path') self.init_nn()
def _get_files_and_labels(img_dir): files = utils.list_files(img_dir) labels = [CLASS_MAP[x.split('_')[2][:-4]] for x in files] files, labels = utils.resample_unbalanced_data(files, labels) unique, counts = np.unique(labels, return_counts=True) print '{} labels has counts as: {}'.format(unique, counts) return [os.path.join(img_dir, x) for x in files], [utils.to_one_hot(x, 3) for x in labels]
def train_tcn(in_path,nn_path,n_epochs=5): dataset=data.seqs.read_seqs(in_path) train,test=dataset.split() X,y,params=to_dataset(train) if("n_cats" in params ): y=utils.to_one_hot(y,params["n_cats"]) model=make_tcn(params) model.fit(X,y,epochs=n_epochs,batch_size=32) model.save_weights(nn_path)
def forward(self, ins): obj_ids = torch.arange(self.num_objects) obj_ids = utils.to_one_hot(obj_ids, self.num_objects).unsqueeze(0) obj_ids = obj_ids.repeat((ins.size(0), 1, 1)).to(ins.get_device()) h = torch.cat((ins, obj_ids), -1) h = self.act1(self.fc1(h)) h = self.act2(self.fc2(h)) h = self.fc3(h).sum(1) return h.view(-1, self.output_size[0], self.output_size[1], self.output_size[2])
def preprocess_save_to_queue(preprocess_fn, q, list_of_lists, output_files, segs_from_prev_stage, classes, transpose_forward): errors_in = [] for i, l in enumerate(list_of_lists): try: output_file = output_files[i] print("preprocessing", output_file) d, _, dct = preprocess_fn(l) print(output_file, dct) if segs_from_prev_stage[i] is not None: assert isfile( segs_from_prev_stage[i] ) and segs_from_prev_stage[i].endswith( ".nii.gz" ), "segs_from_prev_stage must point to a segmentation file" seg_prev = sitk.GetArrayFromImage( sitk.ReadImage(segs_from_prev_stage[i])) # check to see if shapes match img = sitk.GetArrayFromImage(sitk.ReadImage(l[0])) assert all([i == j for i, j in zip(seg_prev.shape, img.shape)]), "image and segmentation from previous " \ "stage don't have the same pixel array " \ "shape! image: %s, seg_prev: %s" % \ (l[0], segs_from_prev_stage[i]) seg_prev = seg_prev.transpose(transpose_forward) seg_reshaped = resize_segmentation(seg_prev, d.shape[1:], order=1, cval=0) seg_reshaped = to_one_hot(seg_reshaped, classes) d = np.vstack((d, seg_reshaped)).astype(np.float32) print(d.shape) if np.prod(d.shape) > ( 2e9 / 4 * 0.85 ): # *0.85 just to be save, 4 because float32 is 4 bytes print( "This output is too large for python process-process communication. " "Saving output temporarily to disk") np.save(output_file[:-7] + ".npy", d) d = output_file[:-7] + ".npy" q.put((output_file, (d, dct))) except KeyboardInterrupt: raise KeyboardInterrupt except Exception as e: print("error in", l) print(e) q.put("end") if len(errors_in) > 0: print("There were some errors in the following cases:", errors_in) print("These cases were ignored.") else: print("This worker has ended successfully, no errors to report")
def forward(self, encoder_outputs, inputs, final_encoder_hidden, targets=None, keep_prob=1.0, teacher_forcing=0.0): batch_size = encoder_outputs.data.shape[0] seq_length = encoder_outputs.data.shape[1] #hidden = Variable(torch.zeros(1, batch_size, self.hidden_size)) hidden = final_encoder_hidden if next(self.parameters()).is_cuda: hidden = hidden.cuda() else: hidden = hidden # every decoder output seq starts with <SOS> sos_output = Variable(torch.zeros((batch_size, self.embedding.num_embeddings + seq_length))) sampled_idx = Variable(torch.ones((batch_size, 1)).long()) if next(self.parameters()).is_cuda: sos_output = sos_output.cuda() sampled_idx = sampled_idx.cuda() sos_output[:, 1] = 1.0 # index 1 is the <SOS> token, one-hot encoding decoder_outputs = [sos_output] sampled_idxs = [sampled_idx] if keep_prob < 1.0: dropout_mask = (Variable(torch.rand(batch_size, 1, 2 * self.hidden_size + self.embedding.embedding_dim)) < keep_prob).float() / keep_prob else: dropout_mask = None selective_read = Variable(torch.zeros(batch_size, 1, self.hidden_size)) one_hot_input_seq = to_one_hot(inputs, len(self.lang.tok_to_idx) + seq_length) if next(self.parameters()).is_cuda: selective_read = selective_read.cuda() one_hot_input_seq = one_hot_input_seq.cuda() for step_idx in range(1, self.max_length): if targets is not None and teacher_forcing > 0.0 and step_idx < targets.shape[1]: # replace some inputs with the targets (i.e. teacher forcing) teacher_forcing_mask = Variable((torch.rand((batch_size, 1)) < teacher_forcing), requires_grad=False) if next(self.parameters()).is_cuda: teacher_forcing_mask = teacher_forcing_mask.cuda() sampled_idx = sampled_idx.masked_scatter(teacher_forcing_mask, targets[:, step_idx-1:step_idx]) sampled_idx, output, hidden, selective_read = self.step(sampled_idx, hidden, encoder_outputs, selective_read, one_hot_input_seq, dropout_mask=dropout_mask) decoder_outputs.append(output) sampled_idxs.append(sampled_idx) decoder_outputs = torch.stack(decoder_outputs, dim=1) sampled_idxs = torch.stack(sampled_idxs, dim=1) return decoder_outputs, sampled_idxs
def deploy(self, dir_path, test_lbs): z = to_hard_seg(self.sess.run(self.z, {self.x: to_one_hot(test_lbs)})) if dir_path is not None: temp_path = dir_path + '/{:02d}_{}.png' for i in range(z.shape[0]): save_image(temp_path.format(i + 1, 'x'), test_lbs[i, :, :, 0], is_integer=True) save_image(temp_path.format(i + 1, 'y'), z[i, :, :, 0], is_integer=True) return z
def new_canvas(self, goal): self.goal = goal self.canvas = np.zeros(self.dim) self.previous_score = None self.stroke_count = 0 self.terminal = False # print(self.goal) # print(self.classifier.number_of_goals) return U.to_one_hot( self.goal, self.classifier.number_of_goals), self.canvas, 0, self.terminal
def test_text_encoder(): d_batch = 2 d_max_seq_len = 26 d_vocab = 27699 d_text_feature = 512 text_enc_dropout = 0.5 d_text_enc_cnn = 512 text_enc = TextEncoder(d_vocab=d_vocab, d_text_feature=d_text_feature, text_enc_dropout=text_enc_dropout, d_text_enc_cnn=d_text_enc_cnn) text_enc.load_state_dict(torch.load('new_text_enc.pth')) texts = torch.randint(low=0, high=d_vocab, size=(d_batch, d_max_seq_len)) text_features = text_enc(to_one_hot(texts, d_vocab)) assert text_features.size() == (d_batch, d_text_feature) and text_features.dtype == torch.float
def run_test_with_mixup(cuda, C, loader, mix_rate, mix_layer, num_trials=1): correct = 0 total = 0 loss = 0.0 softmax = torch.nn.Softmax() bce_loss = torch.nn.CrossEntropyLoss() #torch.nn.BCELoss() lam = np.array(mix_rate) lam = Variable(torch.from_numpy(np.array([lam]).astype('float32')).cuda()) for i in range(0, num_trials): for batch_idx, (data, target) in enumerate(loader): if cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output, reweighted_target = C(data, lam=lam, target=target, layer_mix=mix_layer) '''take original with probability lam. First goal is to recover the target indices for the other batch. ''' pred = output.data.max( 1, keepdim=True)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).cpu().numpy().sum() total += target.size(0) '''These are the original targets in a one-hot space''' target1_onehot = to_one_hot(target, 10) #print lam #print reweighted_target[0:3] target2 = (reweighted_target - target1_onehot * (lam)).max(1)[1] #print "reweighted target should put probability", lam, "on first set of indexed-values" #print target[0:3] #print target2[0:3] loss += mixup_criterion(target, target2, lam)( bce_loss, output) * target.size(0) #t_loss /= total t_accuracy = 100. * correct / total average_loss = (loss / total) #print "Test with mixup", mix_rate, "on layer", mix_layer, ', loss: ', average_loss #print "Accuracy", t_accuracy return t_accuracy, average_loss
def compute_y(self,x,targets,mixup=False, visible_mixup=False): target_soft = to_one_hot(targets,10) if visible_mixup: lam = Variable(torch.from_numpy(np.array([np.random.beta(0.5,0.5)]).astype('float32')).cuda()) x, target_soft = mixup_process(x, target_soft, lam=lam) h = self.h1(x) if mixup: lam = Variable(torch.from_numpy(np.array([np.random.beta(0.5,0.5)]).astype('float32')).cuda()) h, target_soft = mixup_process(h, target_soft, lam=lam) y = self.sm(self.h2(h)) return y, target_soft
def run_test_with_mixup(cuda, C, loader,mix_rate,mix_layer,num_trials=1): correct = 0 total = 0 loss = 0.0 softmax = torch.nn.Softmax() bce_loss = torch.nn.CrossEntropyLoss()#torch.nn.BCELoss() lam = np.array(mix_rate) lam = Variable(torch.from_numpy(np.array([lam]).astype('float32')).cuda()) for i in range(0,num_trials): for batch_idx, (data, target) in enumerate(loader): if cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output,reweighted_target = C(data, lam=lam, target=target, layer_mix=mix_layer) '''take original with probability lam. First goal is to recover the target indices for the other batch. ''' pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).cpu().sum() total += target.size(0) '''These are the original targets in a one-hot space''' target1_onehot = to_one_hot(target,10) target2 = (reweighted_target - target1_onehot*(lam)).max(1)[1] loss += mixup_criterion(target, target2, lam)(bce_loss,output) * target.size(0) t_accuracy = 100. * correct / total average_loss = (loss / total) return t_accuracy, average_loss
# trian_dataset's length print(len(train_data)) # test_dataset's length print(len(test_data)) # decoding a sequence print(decoding_newswires(reuters, train_data[0])) # the label is an integer between 0 and 45. # preparing the data x_train = vectorize_sequences(train_data) x_test = vectorize_sequences(test_data) # set the labels to one_hot encoding one_hot_train_labels = to_one_hot(train_labels) one_hot_test_labels = to_one_hot(test_labels) # there is a built-in way to do this in keras one_hot_train_labels_ = to_categorical(train_labels) one_hot_test_labels_ = to_categorical(test_labels) # model definition def model(x, y, x_val, y_val): model_ = models.Sequential() model_.add(layers.Dense(64, activation='relu', input_shape=(10000,))) model_.add(layers.Dense(64, activation='relu')) model_.add(layers.Dense(46, activation='softmax')) # compile model_.compile(optimizer=optimizers.Adam(lr=0.001),