def generate_integer_tuples(self, means, rng=None, use_cuda=False, relative_range=None, seed=None): if seed is not None: torch.manual_seed(seed) batchsize, n, rank = means.size() """ Generate the neighboring integers """ fm = self.floor_mask.unsqueeze(0).unsqueeze(0).expand( batchsize, n, 2**rank, rank) neighbor_ints = means.data.unsqueeze(2).expand(batchsize, n, 2**rank, rank).contiguous() neighbor_ints[fm] = neighbor_ints[fm].floor() neighbor_ints[~fm] = neighbor_ints[~fm].ceil() neighbor_ints = neighbor_ints.long() """ Sample uniformly from a small range around the given index tuple """ rr_ints = torch.cuda.FloatTensor(batchsize, n, self.radditional, rank) if use_cuda \ else FloatTensor(batchsize, n, self.radditional, rank) rr_ints.uniform_() rr_ints *= (1.0 - EPSILON) rng = torch.cuda.FloatTensor(rng) if use_cuda else FloatTensor(rng) rngxp = rng.unsqueeze(0).unsqueeze(0).unsqueeze(0).expand_as( rr_ints) # bounds of the tensor rrng = torch.cuda.FloatTensor( self.region) if use_cuda else FloatTensor( self.region) # bounds of the range from which to sample rrng = rrng.unsqueeze(0).unsqueeze(0).unsqueeze(0).expand_as(rr_ints) mns_expand = means.round().unsqueeze(2).expand_as(rr_ints) # upper and lower bounds lower = mns_expand - rrng * 0.5 upper = mns_expand + rrng * 0.5 # check for any ranges that are out of bounds idxs = lower < 0.0 lower[idxs] = 0.0 idxs = upper > rngxp lower[idxs] = rngxp[idxs] - rrng[idxs] rr_ints = (rr_ints * rrng + lower).long() """ Sample uniformly from all possible index-tuples, with replacement """ sampled_ints = torch.cuda.FloatTensor( batchsize, n, self.gadditional, rank) if use_cuda else FloatTensor( batchsize, n, self.gadditional, rank) sampled_ints.uniform_() sampled_ints *= (1.0 - EPSILON) rngxp = rng.unsqueeze(0).unsqueeze(0).unsqueeze(0).expand_as( sampled_ints) sampled_ints = torch.floor(sampled_ints * rngxp).long() ints = torch.cat([neighbor_ints, sampled_ints, rr_ints], dim=2) return ints.view(batchsize, -1, rank)
def variablelize(instances): max_num_sent = 0 max_length_sent = 0 for ins in instances: max_num_sent = max(max_num_sent, len(ins[0])) max_length_sent = max(max_length_sent, len(max(ins[0], key=len))) config.max_num_sent = max_num_sent config.max_length_sent = max_length_sent data = [[ np.pad(sent, (0, config.max_length_sent - len(sent)), 'constant', constant_values=0) for sent in essay[0] ] for essay in instances] data = reduce(lambda x, y: x + y, data) # data = [np.pad(essay, ((0, config.max_num_sent - len(essay)), (0, 0)), 'constant', constant_values=0) for essay in data] # data = np.asarray(list(instances[:,0]), dtype=np.int) data = Variable(LongTensor(np.asarray(data, dtype=np.int))) data = data.cuda() if use_cuda else data num_sent = [essay[4] for essay in instances] sent_lengths = [essay[5] for essay in instances] sent_lengths = reduce(lambda x, y: x + y, sent_lengths) num_sent = np.array(num_sent) sent_lengths = np.array(sent_lengths) mask = [[ np.pad(sent, (0, config.max_length_sent - len(sent)), 'constant', constant_values=0) for sent in essay[3] ] for essay in instances] mask = [ np.pad(essay, ((0, config.max_num_sent - len(essay)), (0, 0)), 'constant', constant_values=0) for essay in mask ] mask = Variable(FloatTensor(np.asarray(mask, dtype=np.float))) mask = mask.cuda() if use_cuda else mask label = np.asarray([ins[1] - 1 for ins in instances]) label = Variable(LongTensor(label)) inp = torch.unsqueeze(label, 1) label = label.cuda() if use_cuda else label # inp = inp.cuda() if use_cuda else inp one_hot_label = torch.FloatTensor(len(instances), 4).zero_() # one_hot_label = one_hot_label.cuda() if use_cuda else one_hot_label one_hot_label.scatter_(1, inp.data, 1) one_hot_label = Variable(FloatTensor(one_hot_label)) one_hot_label = one_hot_label.cuda() if use_cuda else one_hot_label # label = np.asarray([(ins[1] - 1) / 3.0 for ins in instances], dtype=np.float) # label = Variable(FloatTensor(label)) # label = label.cuda() if use_cuda else label return data, mask, label, num_sent, sent_lengths, one_hot_label
def _train_a(self, s): self.optimizer_a.zero_grad() loss_a = -self.critic_e(Variable(FloatTensor(s)), self.actor_e(Variable(FloatTensor(s)))).mean() loss_a.backward() self.optimizer_a.step()
def gabor_training_wrapper(x: np.ndarray, y: np.ndarray, class_this, class_params=None, num_batch=None, batch_size=None, num_epoch=1000, lr=0.01, verbose=False, seed_bias=1000, # making complex and simple different seeds. optimizer='adam'): """ :param x: :param y: :param class_this: :param class_params: :param num_batch: how many batches. more means potentially more accurate fitting. :param batch_size: how large is each batch. if None, it's designed to run on a 8G card, with 20x20 input, 10000 images. :return: """ num_im, num_c, height, width = x.shape assert num_c == 1 and num_im > 0 and height == width and height > 0 assert y.shape == (num_im,) class_this_to_use = __gabor_class_dict[class_this] if class_this in {'complex', 'simple'}: if class_params is None: class_params = {'num_unit': 1} else: assert class_this == 'multi' assert class_params is not None if class_this == 'complex': class_params_to_use = { 'batch_size': 192 // class_params['num_unit'], # 6G } elif class_this == 'simple': class_params_to_use = { 'batch_size': 384 // class_params['num_unit'], # 6G } else: assert class_this == 'multi' # this way, it will consume same amount of memory as complex and simple. # When one of them is zero, it degrades to complex and simple. class_params_to_use = { 'batch_size': 384 // (class_params['num_simple'] + 2 * class_params['num_complex']), } class_params_to_use.update(class_params) if batch_size is not None: class_params_to_use['batch_size'] = batch_size if num_batch is None: # 192 for complex and 384 for simple. should take similar amount of time. if class_this in {'complex', 'simple'}: num_batch = class_params['num_unit'] else: assert class_this == 'multi' num_batch = class_params['num_simple'] + class_params['num_complex'] if class_this in {'simple', 'complex'}: assert 'seed' not in class_params_to_use else: assert class_this == 'multi' assert 'seed_complex' not in class_params_to_use assert 'seed_simple' not in class_params_to_use pytorch_input_x = Variable(FloatTensor(x).cuda()) pytorch_input_y = Variable(FloatTensor(y).cuda()) best_corr = -np.inf best_params = None best_predict = None for i_batch in range(num_batch): if class_this in {'simple', 'complex'}: net_this: GaborBase = class_this_to_use(imsize=height, seed=i_batch, **class_params_to_use) else: assert class_this == 'multi' net_this: GaborBase = class_this_to_use(imsize=height, seed_complex=i_batch + seed_bias, seed_simple=i_batch, **class_params_to_use) # net_this: GaborBase = class_this_to_use(imsize=height, seed=i_batch, **class_params_to_use) # intialize net_this.init_output_bias(x, y) # training. net_this.cuda() assert optimizer == 'adam' optimizer_this = Adam(net_this.parameters(), lr=lr) # else: # assert optimizer == 'lbfgs' # optimizer_this = LBFGS(net_this.parameters(), lr=lr) criterion_this = MSELoss(size_average=True) loss = None # if optimizer == 'adam': for epoch in range(num_epoch): # loop over the dataset multiple times # zero the parameter gradients optimizer_this.zero_grad() # forward + backward + optimize outputs = net_this(pytorch_input_x) loss = criterion_this(outputs, pytorch_input_y.expand_as(outputs)) loss.backward() optimizer_this.step() net_this.adjust_params() if verbose and (epoch + 1) % 200 == 0: print('epoch {}, loss {}'.format(epoch + 1, loss)) # else: # # THIS DOESN'T WORK, PERIOD. # # it may work if it can exploit the block structure in the hessian. # # as I actually train many sets of parameters together. # # however, I don't know how to do it. # assert optimizer == 'lbfgs' # loss_dict = {'loss': None} # # def closure(): # # correct the values of updated input image # del loss_dict['loss'] # net_this.adjust_params() # optimizer_this.zero_grad() # # forward + backward + optimize # outputs = net_this(pytorch_input_x) # loss_this = criterion_this(outputs, pytorch_input_y.expand_as(outputs)) # loss_this.backward() # loss_dict['loss'] = loss_this # return loss_this # # for epoch in range(num_epoch): # optimizer_this.step(closure=closure) # if verbose and (epoch + 1) % 20 == 0: # print('epoch {}, loss {}'.format(epoch + 1, loss_dict['loss'])) # del loss_dict del loss # final adjustment. net_this.adjust_params() outputs = net_this(pytorch_input_x).data.cpu().numpy() all_corrs = np.array([pearsonr(x, y)[0] for x in outputs]) all_corrs[np.logical_not(np.isfinite(all_corrs))] = 0 # then take the index of the max. # save the params. assert np.all(np.isfinite(all_corrs)) best_idx = np.argmax(all_corrs) best_corr_this = all_corrs[best_idx] if best_corr_this > best_corr: if verbose: print(f'update best corr from {best_corr} to {best_corr_this}') best_params = fetch_params(net_this, best_idx) best_corr = best_corr_this best_predict = outputs[best_idx].copy() else: if verbose: print(f'no update best corr as {best_corr} >= {best_corr_this}') if verbose: print(f'batch {i_batch+1}/{num_batch}, high corr cases {(all_corrs>0.99).sum()}/{all_corrs.size}') print(f'best corr up to now {best_corr}') del net_this del optimizer_this del criterion_this assert np.isfinite(best_corr) and best_params is not None # maybe more del helps memory releasing. del pytorch_input_x del pytorch_input_y return best_corr, best_params, best_predict
f_atoms=FloatTensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1201, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1201, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1201, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1201, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1201, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1600, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]),
def build_model(cfg: dict = None, src_vocab: Vocabulary = None, trg_vocab: Vocabulary = None, trv_vocab: Vocabulary = None, canonizer=None) -> Model: """ Build and initialize the model according to the configuration. :param cfg: dictionary configuration containing model specifications :param src_vocab: source vocabulary :param trg_vocab: target vocabulary :param trv_vocab: kb true value lookup vocabulary :return: built and initialized model """ src_padding_idx = src_vocab.stoi[PAD_TOKEN] trg_padding_idx = trg_vocab.stoi[PAD_TOKEN] if "embedding_files" in cfg.keys(): #init from pretrained assert not cfg.get( "tied_embeddings", False ), "TODO implement tied embeddings along with pretrained initialization" raise NotImplementedError( "TODO implement kbsrc embed loading for embedding files") weight_tensors = [] for weight_file in cfg["embedding_files"]: with open(weight_file, "r") as f: weight = [] for line in f.readlines(): line = line.split() line = [float(x) for x in line] weight.append(line) weight = FloatTensor(weight) weight_tensors.append(weight) # Set source Embeddings to Pretrained Embeddings src_embed = Embeddings( int(weight_tensors[0][0].shape[0]), False, #TODO transformer: change to True len(weight_tensors[0]), ) src_embed.lut.weight.data = weight_tensors[0] # Set target Embeddings to Pretrained Embeddings trg_embed = Embeddings( int(weight_tensors[1][0].shape[0]), False, #TODO transformer: change to True len(weight_tensors[1]), ) trg_embed.lut.weight.data = weight_tensors[1] else: src_embed = Embeddings(**cfg["encoder"]["embeddings"], vocab_size=len(src_vocab), padding_idx=src_padding_idx) if cfg.get("kb_embed_separate", False): kbsrc_embed = Embeddings(**cfg["encoder"]["embeddings"], vocab_size=len(src_vocab), padding_idx=src_padding_idx) else: kbsrc_embed = src_embed # this ties source and target embeddings # for softmax layer tying, see further below if cfg.get("tied_embeddings", False): if src_vocab.itos == trg_vocab.itos: # share embeddings for src and trg trg_embed = src_embed else: raise ConfigurationError( "Embedding cannot be tied since vocabularies differ.") else: # Latest TODO: init embeddings with vocab_size = len(trg_vocab joined with kb_vocab) trg_embed = Embeddings(**cfg["decoder"]["embeddings"], vocab_size=len(trg_vocab), padding_idx=trg_padding_idx) # build encoder enc_dropout = cfg["encoder"].get("dropout", 0.) enc_emb_dropout = cfg["encoder"]["embeddings"].get("dropout", enc_dropout) if cfg["encoder"].get("type", "recurrent") == "transformer": assert cfg["encoder"]["embeddings"]["embedding_dim"] == \ cfg["encoder"]["hidden_size"], \ "for transformer, emb_size must be hidden_size" encoder = TransformerEncoder(**cfg["encoder"], emb_size=src_embed.embedding_dim, emb_dropout=enc_emb_dropout) else: encoder = RecurrentEncoder(**cfg["encoder"], emb_size=src_embed.embedding_dim, emb_dropout=enc_emb_dropout) # retrieve kb task info kb_task = bool(cfg.get("kb", False)) k_hops = int( cfg.get("k_hops", 1) ) # k number of kvr attention layers in decoder (eric et al/default: 1) same_module_for_all_hops = bool(cfg.get("same_module_for_all_hops", False)) do_postproc = bool(cfg.get("do_postproc", True)) copy_from_source = bool(cfg.get("copy_from_source", True)) canonization_func = None if canonizer is None else canonizer( copy_from_source=copy_from_source) kb_input_feeding = bool(cfg.get("kb_input_feeding", True)) kb_feed_rnn = bool(cfg.get("kb_feed_rnn", True)) kb_multihead_feed = bool(cfg.get("kb_multihead_feed", False)) posEncKBkeys = cfg.get("posEncdKBkeys", False) tfstyletf = cfg.get("tfstyletf", True) infeedkb = bool(cfg.get("infeedkb", False)) outfeedkb = bool(cfg.get("outfeedkb", False)) add_kb_biases_to_output = bool(cfg.get("add_kb_biases_to_output", True)) kb_max_dims = cfg.get("kb_max_dims", (16, 32)) # should be tuple double_decoder = cfg.get("double_decoder", False) tied_side_softmax = cfg.get( "tied_side_softmax", False) # actually use separate linear layers, tying only the main one do_pad_kb_keys = cfg.get( "pad_kb_keys", True ) # doesnt need to be true for 1 hop (=>BIG PERFORMANCE SAVE), needs to be true for >= 2 hops if hasattr(kb_max_dims, "__iter__"): kb_max_dims = tuple(kb_max_dims) else: assert type(kb_max_dims) == int, kb_max_dims kb_max_dims = (kb_max_dims, ) assert cfg["decoder"]["hidden_size"] dec_dropout = cfg["decoder"].get("dropout", 0.) dec_emb_dropout = cfg["decoder"]["embeddings"].get("dropout", dec_dropout) if cfg["decoder"].get("type", "recurrent") == "transformer": if tfstyletf: decoder = TransformerDecoder( **cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout, kb_task=kb_task, kb_key_emb_size=kbsrc_embed.embedding_dim, feed_kb_hidden=kb_input_feeding, infeedkb=infeedkb, outfeedkb=outfeedkb, double_decoder=double_decoder) else: decoder = TransformerKBrnnDecoder( **cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout, kb_task=kb_task, k_hops=k_hops, kb_max=kb_max_dims, same_module_for_all_hops=same_module_for_all_hops, kb_key_emb_size=kbsrc_embed.embedding_dim, kb_input_feeding=kb_input_feeding, kb_feed_rnn=kb_feed_rnn, kb_multihead_feed=kb_multihead_feed) else: if not kb_task: decoder = RecurrentDecoder(**cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout) else: decoder = KeyValRetRNNDecoder( **cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout, k_hops=k_hops, kb_max=kb_max_dims, same_module_for_all_hops=same_module_for_all_hops, kb_key_emb_size=kbsrc_embed.embedding_dim, kb_input_feeding=kb_input_feeding, kb_feed_rnn=kb_feed_rnn, kb_multihead_feed=kb_multihead_feed, do_pad_kb_keys=do_pad_kb_keys) # specify generator which is mostly just the output layer generator = Generator(dec_hidden_size=cfg["decoder"]["hidden_size"], vocab_size=len(trg_vocab), add_kb_biases_to_output=add_kb_biases_to_output, double_decoder=double_decoder) model = Model( encoder=encoder, decoder=decoder, generator=generator, src_embed=src_embed, trg_embed=trg_embed, src_vocab=src_vocab, trg_vocab=trg_vocab,\ kb_key_embed=kbsrc_embed,\ trv_vocab=trv_vocab, k_hops=k_hops, do_postproc=do_postproc, canonize=canonization_func, kb_att_dims=len(kb_max_dims), posEncKBkeys=posEncKBkeys ) # tie softmax layer with trg embeddings if cfg.get("tied_softmax", False): if trg_embed.lut.weight.shape == \ model.generator.output_layer.weight.shape: # (also) share trg embeddings and softmax layer: model.generator.output_layer.weight = trg_embed.lut.weight if model.generator.double_decoder: # (also also) share trg embeddings and side softmax layer assert hasattr(model.generator, "side_output_layer") if tied_side_softmax: # because of distributivity this becomes O (x_1+x_2) instead of O_1 x_1 + O_2 x_2 model.generator.side_output_layer.weight = trg_embed.lut.weight else: raise ConfigurationError( "For tied_softmax, the decoder embedding_dim and decoder " "hidden_size must be the same." "The decoder must be a Transformer.") # custom initialization of model parameters initialize_model(model, cfg, src_padding_idx, trg_padding_idx) return model
def _initialize_params(self, init_dict): self.loc.data[...] = FloatTensor(np.asarray(init_dict['loc'], dtype=np.float32)) self.orientation.data[...] = FloatTensor(np.asarray(init_dict['orientation'], dtype=np.float32)) self.sigma.data[...] = FloatTensor(np.asarray(init_dict['sigma'], dtype=np.float32)) self.frequency.data[...] = FloatTensor(np.array([init_dict['frequency']], dtype=np.float32))
def test_orthonormal(self): w = init.orthonormal(FloatTensor(5, 5), gain=1) m = w.numpy() s = np.linalg.svd(m)[1] self.assertTrue(np.allclose(s, 1))
def test_sparse(self): shapes = [(1, 1), (2, 1), (1, 2), (10, 7), (7, 10)] for shape in shapes: w = init.sparse(FloatTensor(*shape).zero_()) self.assertFalse(np.any(np.isclose(np.abs(w.numpy()).sum(1), 0))) self.assertFalse(np.any(np.isclose(np.abs(w.numpy()).sum(0), 0)))
def eval_quality(self, state, action): return self.quality_function(Variable(FloatTensor(state)), Variable(FloatTensor([action])))
def eval_salience(self, state, action): return self.salience_function(Variable(FloatTensor(state)), Variable(FloatTensor([action])))
def train(self, tasks, model_name, criterion, n_epoch=30, lr=0.1, device = "cpu", verbose=True, inner_lr = 0.1, inner_iter = 10, meta_size = 50): """ trains the given model args: task: task to train the model on criterion: loss for train model_name: name of the model to train n_epoch: number of epochs for training lr: learning rate for the optimization device: whether to use CPU or GPU verbose: if set to True prints additional info inner_lr: if using a meta-learning algorithm, the learning rate of the inner loop inner_iter: if using a meta-learning algorithm, the iterations of the inner loop meta_size: if using a meta-learning algorithm, how big to set the meta samples size returns: the obtained metrics, the final predictions and the wrong ones """ total_loss_train = [] total_loss_val = [] total_accuracy_val = [] total_accuracy_train = [] total_accuracy_val_pix = [] total_accuracy_train_pix = [] total_predictions = [] total_wrong_predictions = [] criterion = criterion() sh1_big = 0 sh2_big = 0 for task in tasks: for i in range(len(task['train'])): sh1 = task['train'][i]['input'].shape[0] sh2 = task['train'][i]['input'].shape[1] if sh1 > sh1_big: sh1_big = sh1 if sh2 > sh2_big: sh2_big = sh2 for i in range(len(task['test'])): sh1 = task['test'][i]['input'].shape[0] sh2 = task['test'][i]['input'].shape[1] if sh1 > sh1_big: sh1_big = sh1 if sh2 > sh2_big: sh2_big = sh2 net = model_name(device, sh1_big, sh2_big).to(device) optimizer = Adam(net.parameters(), lr = lr) for epoch in tqdm(range(n_epoch)): losses = [] for task in tasks: loss_train = [] loss_val = [] accuracy_val = [] accuracy_train = [] accuracy_val_pix = [] accuracy_train_pix = [] inputs = [] outputs = [] for sample in task["train"]: x = Tensor(sample['input']) x = pad_crop(x, sh1_big, sh2_big, x.shape[0], x.shape[1], goal = "pad") inputs.append(FloatTensor(expand(x).float()).to(device)) y = Tensor(sample['output']) y = pad_crop(y, sh1_big, sh2_big, y.shape[0], y.shape[1], goal = "pad") outputs.append(LongTensor(y.long()).unsqueeze(0).to(device)) inputs_train = inputs[:meta_size] inputs_val = inputs[meta_size:] outputs_train = outputs[:meta_size] outputs_val = outputs[meta_size:] fast_weights = OrderedDict(net.named_parameters()) for _ in range(inner_iter): grads = [] loss = 0 for x,y in zip(inputs_train, outputs_train): logits = net._forward(x.to(device), fast_weights) loss += criterion(logits.to(device), y.to(device)) loss /= len(inputs_train) gradients = torch.autograd.grad(loss, fast_weights.values(), create_graph=True) fast_weights = OrderedDict((name, param - inner_lr * grad) for ((name, param), grad) in zip(fast_weights.items(), gradients)) loss = 0 for x,y in zip(inputs_val, outputs_val): logits = net._forward(x.to(device), fast_weights) loss += criterion(logits.to(device), y.to(device)) loss /= len(inputs_val) loss.backward(retain_graph=True) losses.append(loss.float()) gradients = torch.autograd.grad(loss, fast_weights.values(), create_graph=True) net.train() optimizer.zero_grad() meta_loss = torch.stack(losses).mean() meta_loss.backward() optimizer.step() net.eval() with torch.no_grad(): correct_val = 0 correct_val_pix = 0 total_val = 0 loss_iter_val = 0 predictions = [] wrong_pred = [] n_pixels_val = 0 for sample in task['test']: img = FloatTensor(expand(sample['input'])).to(device) y = LongTensor(sample['output']) labels = pad_crop(y, sh1_big, sh2_big, y.shape[0], y.shape[1], "pad").unsqueeze(0).to(device) outputs = net(img) _, pred = torch.max(outputs.data, 1) predictions.append((img, pred)) n_pixels_val += pred.shape[1]*pred.shape[2] total_val += labels.size(0) flag = (torch.all(torch.eq(pred, labels))).sum().item() correct_val += flag if flag == 0: wrong_pred.append((img, pred)) correct_val_pix += (torch.eq(pred, labels)).sum().item() loss = criterion(outputs, labels) loss_iter_val += loss.item() correct_train = 0 correct_train_pix = 0 total_train = 0 loss_iter_train = 0 n_pixels_train = 0 for sample in task['train']: img = FloatTensor(expand(sample['input'])).to(device) y = LongTensor(sample['output']) labels = pad_crop(y, sh1_big, sh2_big, y.shape[0], y.shape[1], "pad").unsqueeze(0).to(device) outputs = net(img) _, pred = torch.max(outputs.data, 1) n_pixels_train += pred.shape[1]*pred.shape[2] total_train += labels.size(0) correct_train += (torch.all(torch.eq(pred, labels))).sum().item() correct_train_pix += (torch.eq(pred, labels)).sum().item() loss = criterion(outputs, labels) loss_iter_train += loss.item() loss_train.append(loss_iter_train/len(task['train'])) loss_val.append(loss_iter_val/len(task['test'])) val_accuracy = 100 * correct_val / total_val val_accuracy_pix = 100 * correct_val_pix/(n_pixels_val) accuracy_val.append(val_accuracy) accuracy_val_pix.append(val_accuracy_pix) train_accuracy = 100 * correct_train / total_train train_accuracy_pix = 100 * correct_train_pix/(n_pixels_train) accuracy_train.append(train_accuracy) accuracy_train_pix.append(train_accuracy_pix) if verbose: print('\nEpoch: ['+str(epoch+1)+'/'+str(n_epoch)+']') print('Train loss is: {}'.format(loss_train[-1])) print('Validation loss is: {}'.format(loss_val[-1])) print('Train accuracy is: {} %'.format(accuracy_train[-1])) print('Train accuracy for pixels is: {} %'.format(accuracy_train_pix[-1])) print('Validation accuracy is: {} %'.format(accuracy_val[-1])) print('Validation accuracy for pixels is: {} %'.format(accuracy_val_pix[-1])) total_loss_train.append(loss_train) total_loss_val.append(loss_val) total_accuracy_train.append(accuracy_train) total_accuracy_train_pix.append(accuracy_train_pix) total_accuracy_val.append(accuracy_val) total_accuracy_val_pix.append(accuracy_val_pix) total_predictions.append(total_predictions) total_wrong_predictions.append(wrong_pred) metrics = {'loss_train': total_loss_train, 'loss_val': total_loss_val, 'accuracy_train':total_accuracy_train, 'accuracy_train_pix': total_accuracy_train_pix, 'accuracy_val':total_accuracy_val, 'accuracy_val_pix': total_accuracy_val_pix} final_pred = total_predictions return metrics, final_pred, total_wrong_predictions
def train(self, tasks, model_name, criterion, attention, device, n_epoch=30, lr=0.1, verbose=True): """ trains the given model args: task: task to train the model on criterion: loss for train attention: whether to use attention mechanism model_name: name of the model to train n_epoch: number of epochs for training lr: learning rate for the optimization device: whether to use CPU or GPU verbose: if set to True prints additional info returns: the obtained metrics, the final predictions and the wrong ones """ total_loss_train = [] total_loss_val = [] total_accuracy_val = [] total_accuracy_train = [] total_accuracy_val_pix = [] total_accuracy_train_pix = [] total_predictions = [] total_wrong_predictions = [] criterion = criterion() for task in tasks: sh1_big = 0 sh2_big = 0 for i in range(len(task['train'])): sh1 = task['train'][i]['input'].shape[0] sh2 = task['train'][i]['input'].shape[1] if sh1 > sh1_big: sh1_big = sh1 if sh2 > sh2_big: sh2_big = sh2 for i in range(len(task['test'])): sh1 = task['test'][i]['input'].shape[0] sh2 = task['test'][i]['input'].shape[1] if sh1 > sh1_big: sh1_big = sh1 if sh2 > sh2_big: sh2_big = sh2 net = model_name(task['train'], sh1_big, sh2_big, attention).to(device) optimizer = Adam(net.parameters(), lr = lr) loss_train = [] loss_val = [] accuracy_val = [] accuracy_train = [] accuracy_val_pix = [] accuracy_train_pix = [] for epoch in tqdm(range(n_epoch)): net.train() loss_iter = 0 for sample in task['train']: img = FloatTensor(expand(sample['input'])).to(device) labels = LongTensor(sample['output']).unsqueeze(dim=0).to(device) optimizer.zero_grad() outputs = net(img) loss = criterion(outputs, labels) loss.backward() optimizer.step() net.eval() with torch.no_grad(): correct_val = 0 correct_val_pix = 0 total_val = 0 loss_iter_val = 0 predictions = [] wrong_pred = [] n_pixels_val = 0 for sample in task['test']: img = FloatTensor(expand(sample['input'])).to(device) labels = LongTensor(sample['output']).unsqueeze(dim=0).to(device) outputs = net(img) _, pred = torch.max(outputs.data, 1) predictions.append((img, pred)) n_pixels_val += pred.shape[1]*pred.shape[2] total_val += labels.size(0) flag = (torch.all(torch.eq(pred, labels))).sum().item() correct_val += flag if flag == 0: wrong_pred.append((img, pred)) correct_val_pix += (torch.eq(pred, labels)).sum().item() loss = criterion(outputs, labels) loss_iter_val += loss.item() correct_train = 0 correct_train_pix = 0 total_train = 0 loss_iter_train = 0 n_pixels_train = 0 for sample in task['train']: img = FloatTensor(expand(sample['input'])).to(device) labels = LongTensor(sample['output']).unsqueeze(dim=0).to(device) outputs = net(img) _, pred = torch.max(outputs.data, 1) n_pixels_train += pred.shape[1]*pred.shape[2] total_train += labels.size(0) correct_train += (torch.all(torch.eq(pred, labels))).sum().item() correct_train_pix += (torch.eq(pred, labels)).sum().item() loss = criterion(outputs, labels) loss_iter_train += loss.item() loss_train.append(loss_iter_train/len(task['train'])) loss_val.append(loss_iter_val/len(task['test'])) val_accuracy = 100 * correct_val / total_val val_accuracy_pix = 100 * correct_val_pix/(n_pixels_val) accuracy_val.append(val_accuracy) accuracy_val_pix.append(val_accuracy_pix) train_accuracy = 100 * correct_train / total_train train_accuracy_pix = 100 * correct_train_pix/(n_pixels_train) accuracy_train.append(train_accuracy) accuracy_train_pix.append(train_accuracy_pix) if verbose: print('\nEpoch: ['+str(epoch+1)+'/'+str(n_epoch)+']') print('Train loss is: {}'.format(loss_train[-1])) print('Validation loss is: {}'.format(loss_val[-1])) print('Train accuracy is: {} %'.format(accuracy_train[-1])) print('Train accuracy for pixels is: {} %'.format(accuracy_train_pix[-1])) print('Validation accuracy is: {} %'.format(accuracy_val[-1])) print('Validation accuracy for pixels is: {} %'.format(accuracy_val_pix[-1])) total_loss_train += loss_train total_loss_val += loss_val total_accuracy_train += accuracy_train total_accuracy_train_pix += accuracy_train_pix total_accuracy_val += accuracy_val total_accuracy_val_pix += accuracy_val_pix total_predictions += predictions total_wrong_predictions += wrong_pred metrics = {'loss_train': total_loss_train, 'loss_val': total_loss_val, 'accuracy_train':total_accuracy_train, 'accuracy_train_pix': total_accuracy_train_pix, 'accuracy_val':total_accuracy_val, 'accuracy_val_pix': total_accuracy_val_pix} final_pred = total_predictions return metrics, final_pred, total_wrong_predictions
def main(): global args args = parser.parse_args() cuda = args.cuda if cuda == 'true': cuda = True else: cuda = False device_id = args.device_id task_name = args.task_name if task_name == 'flickr8k' or task_name == 'pascal_sentences': input_size_A, input_size_B = 2048, 300 latent_size = args.latent_size epoch_size = args.epoch_size batch_size = args.batch_size learning_rate = args.learning_rate model_path = args.model_path if not os.path.exists(model_path): os.makedirs(model_path) train_A, train_B, test_A, test_B = get_data(task_name) n_batches = int(math.ceil(train_A.size / float(batch_size))) encoder_A = EncoderFC(input_size_A, [500, 100], latent_size) decoder_A = DecoderFC(latent_size, [100, 500], input_size_A) encoder_B = EncoderFC(input_size_B, [50, 10], latent_size) decoder_B = DecoderFC(latent_size, [10, 50], input_size_B) discriminator = DiscriminatorFC(latent_size, [50, 10], 1) discriminator_A = DiscriminatorFC(input_size_A, [500, 100], 1) discriminator_B = DiscriminatorFC(input_size_B, [500, 100], 1) if cuda: encoder_A.cuda(device_id) decoder_A.cuda(device_id) encoder_B.cuda(device_id) decoder_B.cuda(device_id) discriminator.cuda(device_id) discriminator_A.cuda(device_id) discriminator_B.cuda(device_id) recon_criterion = nn.MSELoss(reduction='sum') gan_criterion = nn.BCELoss() gen_params = list(encoder_A.parameters()) + list(encoder_B.parameters()) +\ list(decoder_A.parameters()) + list(decoder_B.parameters()) dis_params = list(discriminator.parameters()) + list(discriminator_A.parameters()) + \ list(discriminator_B.parameters()) optim_gen = optim.Adam(gen_params, lr=learning_rate, betas=(0.5, 0.999), weight_decay=0.00001) optim_dis = optim.Adam(dis_params, lr=learning_rate, betas=(0.5, 0.999), weight_decay=0.00001) iters = 0 for epoch in range(epoch_size): for i in range(n_batches): encoder_A.zero_grad() decoder_A.zero_grad() encoder_B.zero_grad() decoder_B.zero_grad() discriminator.zero_grad() discriminator_A.zero_grad() discriminator_B.zero_grad() A = Variable(FloatTensor(train_A.next_items(batch_size))) B = Variable(FloatTensor(train_B.next_items(batch_size))) if cuda: A = A.cuda(device_id) B = B.cuda(device_id) latent_A = encoder_A(A) latent_B = encoder_B(B) A_from_latent_A = decoder_A(latent_A) A_from_latent_B = decoder_A(latent_B) B_from_latent_B = decoder_B(latent_B) B_from_latent_A = decoder_B(latent_A) # Reconstruction Loss recon_loss_A = recon_criterion(A_from_latent_A, A) recon_loss_B = recon_criterion(B_from_latent_B, B) # Cycle Loss ABA = decoder_A(encoder_B(B_from_latent_A)) BAB = decoder_B(encoder_A(A_from_latent_B)) cycle_loss_A = recon_criterion(ABA, A) cycle_loss_B = recon_criterion(BAB, B) # Gan Loss dis_loss_latent, gen_loss_latent = get_gan_loss( discriminator, latent_A, latent_B, gan_criterion) dis_loss_A, gen_loss_A = get_gan_loss(discriminator_A, A_from_latent_A, A_from_latent_B, gan_criterion) dis_loss_B, gen_loss_B = get_gan_loss(discriminator_B, B_from_latent_A, B_from_latent_B, gan_criterion) dis_loss_total = dis_loss_latent + dis_loss_A + dis_loss_B gen_loss_total = 0.001 * (recon_loss_A + recon_loss_B) + \ 0.001 * (cycle_loss_A + cycle_loss_B) + \ (gen_loss_latent + gen_loss_A + gen_loss_B) if iters % args.update_interval == 0: dis_loss_total.backward() optim_dis.step() else: gen_loss_total.backward() optim_gen.step() if iters % args.log_interval == 0: print("---------------------") print("iters:", iters) print("GEN Total Loss:", as_np(gen_loss_total.mean())) print("DIS Total Loss:", as_np(dis_loss_total.mean())) print("RECON Loss:", as_np(recon_loss_A.mean()), as_np(recon_loss_B.mean())) print("CYCLE Loss:", as_np(cycle_loss_A.mean()), as_np(cycle_loss_B.mean())) encoder_A.eval() encoder_B.eval() domainA_data = Variable(FloatTensor(np.asarray(test_A.items))) domainB_data = Variable(FloatTensor(np.asarray(test_B.items))) if cuda: domainA_data = domainA_data.cuda(device_id) domainB_data = domainB_data.cuda(device_id) domainA_latents = as_np(encoder_A(domainA_data)) domainB_latents = as_np(encoder_B(domainB_data)) mean_correlation = calculate_mean_correlation( domainA_latents, domainB_latents) auc_score = calculate_auc_score(domainA_latents, domainB_latents) print("Mean Correlation: {}".format(mean_correlation)) print("AUC Score: {}".format(auc_score)) encoder_A.train() encoder_B.train() sys.stdout.flush() if iters > 0 and iters % args.model_save_interval == 0: torch.save(encoder_A, os.path.join(model_path, 'model_encoder_A')) torch.save(encoder_B, os.path.join(model_path, 'model_encoder_B')) torch.save(decoder_A, os.path.join(model_path, 'model_decoder_A')) torch.save(decoder_B, os.path.join(model_path, 'model_decoder_B')) torch.save(discriminator, os.path.join(model_path, 'model_dis')) torch.save(discriminator_A, os.path.join(model_path, 'model_dis_A')) torch.save(discriminator_B, os.path.join(model_path, 'model_dis_B')) iters += 1
def sync_between_gpu(val): val = FloatTensor([val]).cuda(non_blocking=True) dist.all_reduce(val, op=dist.ReduceOp.SUM) return val.item()
def sample_genpareto(size): probs = torch.rand(size) * 0.95 return FloatTensor(rv.ppf(probs)) + threshold
net_gener = Generator(output_shape=input_shape).to(device_) objective = BCELoss() gen_optimizer = Adam(params=net_gener.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999)) dis_optimizer = Adam(params=net_discr.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999)) gen_losses = [] dis_losses = [] iter_no = 0 true_labels_v = ones(BATCH_SIZE, dtype=torch.float32, device=device_) fake_labels_v = zeros(BATCH_SIZE, dtype=torch.float32, device=device_) for batch_v in iterate_batches(envs): # generate extra fake samples, input is 4D: batch, filters, x, y gen_input_v = FloatTensor(BATCH_SIZE, LATENT_VECTOR_SIZE, 1, 1).normal_(0, 1).to(device_) batch_v = batch_v.to(device_) gen_output_v = net_gener(gen_input_v) # train discriminator dis_optimizer.zero_grad() dis_output_true_v = net_discr(batch_v) dis_output_fake_v = net_discr(gen_output_v.detach()) dis_loss = objective(dis_output_true_v, true_labels_v) + objective(dis_output_fake_v, fake_labels_v) dis_loss.backward() dis_optimizer.step() dis_losses.append(dis_loss.item()) # train generator gen_optimizer.zero_grad() dis_output_v = net_discr(gen_output_v)
def sample_image(batches_done): static_z = Variable(FloatTensor(torch.randn( (81, latentdim, 1, 1)))).cuda(cudanum) static_sample = G(static_z, static_code).detach().cpu() static_sample = (static_sample + 1) / 2.0 save_image(static_sample, DIRNAME + "%d.png" % batches_done, nrow=9)
def __init__(self, pattern_specs, mlp_hidden_dim, num_mlp_layers, num_classes, embeddings, vocab, semiring, bias_scale_param, gpu=False, rnn=None, pre_computed_patterns=None, no_sl=False, shared_sl=False, no_eps=False, eps_scale=None, self_loop_scale=None): super(SoftPatternClassifier, self).__init__() self.semiring = semiring self.vocab = vocab self.embeddings = embeddings self.to_cuda = to_cuda(gpu) self.total_num_patterns = sum(pattern_specs.values()) print(self.total_num_patterns, pattern_specs) self.rnn = rnn self.mlp = MLP(self.total_num_patterns, mlp_hidden_dim, num_mlp_layers, num_classes) if self.rnn is None: self.word_dim = len(embeddings[0]) else: self.word_dim = self.rnn.num_directions * self.rnn.hidden_dim self.num_diags = 1 # self-loops and single-forward-steps self.no_sl = no_sl self.shared_sl = shared_sl self.pattern_specs = pattern_specs self.max_pattern_length = max(list(pattern_specs.keys())) self.no_eps = no_eps self.bias_scale_param = bias_scale_param # Shared parameters between main path and self loop. # 1 -- one parameter per state per pattern # 2 -- a single global parameter if self.shared_sl > 0: if self.shared_sl == SHARED_SL_PARAM_PER_STATE_PER_PATTERN: shared_sl_data = randn(self.total_num_patterns, self.max_pattern_length) elif self.shared_sl == SHARED_SL_SINGLE_PARAM: shared_sl_data = randn(1) self.self_loop_scale = Parameter(shared_sl_data) elif not self.no_sl: if self_loop_scale is not None: self.self_loop_scale = self.semiring.from_float( self.to_cuda(fixed_var(FloatTensor([self_loop_scale])))) else: self.self_loop_scale = self.to_cuda(fixed_var(semiring.one(1))) self.num_diags = 2 # end state index for each pattern end_states = [[ end ] for pattern_len, num_patterns in self.pattern_specs.items() for end in num_patterns * [pattern_len - 1]] self.end_states = self.to_cuda(fixed_var(LongTensor(end_states))) diag_data_size = self.total_num_patterns * self.num_diags * self.max_pattern_length diag_data = randn(diag_data_size, self.word_dim) bias_data = randn(diag_data_size, 1) normalize(diag_data) if pre_computed_patterns is not None: diag_data, bias_data = self.load_pre_computed_patterns( pre_computed_patterns, diag_data, bias_data, pattern_specs) self.diags = Parameter(diag_data) # Bias term self.bias = Parameter(bias_data) if not self.no_eps: self.epsilon = Parameter( randn(self.total_num_patterns, self.max_pattern_length - 1)) # TODO: learned? hyperparameter? # since these are currently fixed to `semiring.one`, they are not doing anything. if eps_scale is not None: self.epsilon_scale = self.semiring.from_float( self.to_cuda(fixed_var(FloatTensor([eps_scale])))) else: self.epsilon_scale = self.to_cuda(fixed_var(semiring.one(1))) print("# params:", sum(p.nelement() for p in self.parameters()))
def __init__(self, input_size): self.hidden_size = input_size self.input = FloatTensor(input_size) self.output = FloatTensor(input_size) self.grad_wrt_input = FloatTensor(input_size)
def _initialize_params_bias(self, init_dict): if self.output_a is not None and 'output_a' in init_dict: self.output_a.data[...] = FloatTensor(np.asarray(init_dict['output_a'], dtype=np.float32)) if self.output_b is not None and 'output_b' in init_dict: self.output_b.data[...] = FloatTensor(np.asarray(init_dict['output_b'], dtype=np.float32))
def __getitem__(self, index): if self.use_cache: try: data = self._load_tensor(index) return data except: # unsucessful at loading pass t0 = time() # original image target_path = os.path.join(self.data_root, self.file_list[index] + '.pth') # img = np.load(target_path).astype('float32') img = (np.array(Image.open(target_path)) / 255.0).astype(np.float32) # degradation pipeline, only one needing for N frame t1_load = time() degrade_param = self._randomize_parameter() degrade_pipeline, target_pipeline = self._create_pipeline(**{**self.pipeline_configs, **degrade_param}) t2_create_pipeline = time() # Actually process image. img = FloatTensor(img).permute(2, 0, 1) # Crop first so that we don't waste computation on the whole image. # image with big jitter on original image img_big_jitter = bayer_crop_tensor( img, self.im_size_upscale, self.im_size_upscale, self.cropping ) if len(img_big_jitter.size()) == 3: img_big_jitter = img_big_jitter.unsqueeze(0) # get N frames with big or small jitters burst_jitter = [] for i in range(self.burst_length): # this is the ref. frame without shift if i == 0: burst_jitter.append( F.interpolate( img_big_jitter[:, :, self.big_restore_upscale:-self.big_restore_upscale, self.big_restore_upscale:-self.big_restore_upscale], scale_factor=1 / self.down_sample ) ) else: # whether to flip the coin big_jitter = np.random.binomial(1, np.random.poisson(lam=1.5) / self.burst_length) if big_jitter: burst_jitter.append( F.interpolate( bayer_crop_tensor( img_big_jitter, self.im_size_extra, self.im_size_extra, self.cropping ), scale_factor=1 / self.down_sample ) ) else: img_small_jitter = img_big_jitter[:, :, self.big2small_upscale:-self.big2small_upscale, self.big2small_upscale:-self.big2small_upscale] burst_jitter.append( F.interpolate( bayer_crop_tensor( img_small_jitter, self.im_size_extra, self.im_size_extra, self.cropping ), scale_factor=1 / self.down_sample ) ) burst_jitter = torch.cat(burst_jitter, dim=0) degraded = torch.zeros_like(burst_jitter) for i in range(self.burst_length): degraded[i, ...] = degrade_pipeline(burst_jitter[i, ...]) # degraded = degrade_pipeline(target) target = burst_jitter[0, ...] # if not blind estimation, compute the estimated noise if not self.blind: read_sigma, poisson_k = degrade_param['read_noise_sigma'], degrade_param['poisson_k'] noise = torch.sqrt( read_sigma ** 2 + poisson_k ** 2 * degraded[0, ...] ).unsqueeze(0) degraded = torch.cat([degraded, noise], dim=0) # If not exposure correction, also apply exposure adjustment to the image. if not self.pipeline_configs["exposure_correction"]: target = target_pipeline(target).squeeze() t3_degrade = time() exp_adjustment = degrade_param['exp_adjustment'] # Bayer phase selection target = target.unsqueeze(0) im = torch.cat([degraded, target], 0) if self.pipeline_configs["bayer_crop_phase"] is None: # There are 4 phases of Bayer mosaick. phase = np.random.choice(4) else: phase = self.pipeline_configs["bayer_crop_phase"] x = phase % 2 y = (phase // 2) % 2 im = im[:, :, y:(y + self.im_size), x:(x + self.im_size)] degraded, target = torch.split(im, self.burst_length if self.blind else self.burst_length + 1, dim=0) t4_bayerphase = time() t5_resize = time() vis_exposure = 0 if self.pipeline_configs["exposure_correction"] else -exp_adjustment t6_bayermask = time() if DEBUG_TIME: # report print("--------------------------------------------") t_total = (t6_bayermask - t0) / 100.0 t_load = t1_load - t0 t_create_pipeline = t2_create_pipeline - t1_load t_process = t3_degrade - t2_create_pipeline t_bayercrop = t4_bayerphase - t3_degrade t_resize = t5_resize - t4_bayerphase t_bayermask = t6_bayermask - t5_resize print("load: {} ({}%)".format(t_load, t_load / t_total)) print("create_pipeline: {} ({}%)".format(t_create_pipeline, t_create_pipeline / t_total)) print("process: {} ({}%)".format(t_process, t_process / t_total)) print("bayercrop: {} ({}%)".format(t_bayercrop, t_bayercrop / t_total)) print("resize: {} ({}%)".format(t_resize, t_resize / t_total)) print("bayermask: {} ({}%)".format(t_bayermask, t_bayermask / t_total)) print("--------------------------------------------") data = {'degraded_img': degraded, 'original_img': target.squeeze(), 'vis_exposure': FloatTensor([vis_exposure]), } if self.use_cache: # TODO: Start a new thread to save. self._save_tensor(data, index) return data
def update_dict(self, embeddings, targets): # attention les targets doivent surement rester fixes if self.memory is None: self.memory = FloatTensor(embeddings) pass
from Estimator import Agent import gym from utils import preprocess import torch.autograd as autograd from torch.autograd import Variable from torch import FloatTensor, LongTensor import numpy as np import torch agent = Agent(2) env = gym.make("Pong-v4") state = np.expand_dims(np.expand_dims(preprocess(env.reset()), 0), 0) #inp = autograd.Variable(torch.FloatTensor([[state.tolist()]])) inp = Variable(torch.from_numpy(state).float()) print(inp) print(inp.size()) q_pred = agent.predict_q_values(inp) print(q_pred) targ = Variable(FloatTensor([[1]])) actions = Variable(LongTensor([[1]])) for i in range(100): agent.accumulate_gradients(inp, targ, actions) agent.update_parameters()
def __getitem__(self, idx): wav_file_path = self.wav_files[idx] voice_data = VoiceData(wav_file_path, self.mode, init_all=self.init_all) return FloatTensor(voice_data.mfcc()), LongTensor(voice_data.phn())
def train(self): noise = Variable(FloatTensor(self.batch_size, 100, 1, 1)).cuda() real = Variable( FloatTensor(self.batch_size, self.nc, self.image_size, self.image_size)).cuda() label = Variable(FloatTensor(self.batch_size)).cuda() nepoch = 1000 real_label, fake_label = 1, 0 bce = nn.BCELoss() for epoch in range(1, nepoch + 1): for i, data in enumerate(self.data_loader): """Gradient of Discriminator""" self.d.zero_grad() images, _ = data real.data.resize_(images.size()).copy_(images) label.data.resize_(images.size(0)).fill_(real_label) # train Discriminator with real image output = self.d(real) errD_r = bce(output, label) errD_r.backward() # train Discriminator with fake image label.data.fill_(fake_label) noise.data.resize_(images.size(0), 100, 1, 1) noise.data.normal_(0, 1) # generate fake image fake = self.g(noise) # train output = self.d(fake.detach()) errD_f = bce(output, label) errD_f.backward(retain_graph=True) errD = errD_r + errD_f self.opt_d.step() """Gradient of Generator""" self.g.zero_grad() label.data.fill_(real_label) output = self.d(fake) errG = bce(output, label) errG.backward() self.opt_g.step() """Output Log""" sys.stdout.write('\r') sys.stdout.write( '| Epoch [%2d/%2d] Iter[%5d/%5d] Loss(D): %.4f Loss(G): %.4f' % (epoch, nepoch, i, len(self.data_loader), errD, errG)) sys.stdout.flush() """Visualize""" if i % 10 == 0: f_noise = Variable( FloatTensor(self.batch_size, 100, 1, 1).normal_(0, 1)).cuda() f_fake = self.g(f_noise) dir = 'Data_and_Results/DCGAN_Result/{0}_{1}.jpg'.format( epoch, i) print(' | Saving result') uts.save_image(tensor=f_fake.data, filename=dir, nrow=int(math.sqrt(self.batch_size)), normalize=True) # save the model torch.save(self.g, 'Data_and_Results/DCGAN_model/net_g.pt') torch.save(self.d, 'Data_and_Results/DCGAN_model/net_d.pt')
def predict(self, s): a_prob = self.actor_e.forward(Variable(FloatTensor(s))) return a_prob.data.numpy()
def new_parameter(self, *size): out = Parameter(FloatTensor(*size)) torch.nn.init.xavier_normal(out) return out
def update(self, curr_states, next_states, actions, rewards, terminals, discount, curr_model, eval_model, optimizer): reward_major = {} # batch_size = len(actions) for r_map in rewards: for r_type, obs in r_map.items(): if r_type not in reward_major: reward_major[r_type] = [] reward_major[r_type].append(obs) r_type_qs = {} # policy_qs, policy_next_qs = None, None non_final_mask = 1 - torch.ByteTensor(terminals) non_final_next_states = next_states[non_final_mask] # for r_type in self.reward_types: # input = Variable(curr_states.data.clone(), requires_grad=True) # r_type_qs[r_type] = curr_model(input, r_type) # r_type_qs[r_type] = r_type_qs[r_type].gather(1, torch.LongTensor(actions).unsqueeze(1)) # # policy_qs = r_type_qs[r_type] + (policy_qs if policy_qs is not None else 0) # policy_next_qs = curr_model(next_states, r_type) + (policy_next_qs if policy_next_qs is not None else 0) # # for r_type, model in self.models.items(): # # r_type_qs[r_type] = model['nn'](Variable(curr_states.data.clone(), requires_grad=True)) # # policy_qs = r_type_qs[r_type] + (policy_qs if policy_qs is not None else 0) # # policy_next_qs = model['nn'](next_states) + (policy_next_qs if policy_next_qs is not None else 0) # # # _, policy_actions = policy_qs.max(1) # policy_next_qs, policy_next_actions = policy_next_qs.max(1) policy_next_qs = None for r_type in self.reward_types: next_r_qs = curr_model(next_states,r_type) policy_next_qs = next_r_qs + (policy_next_qs if policy_next_qs is not None else 0) policy_next_actions = policy_next_qs.max(1)[1].unsqueeze(1) policy_next_actions = policy_next_actions[non_final_mask] # # policy_next_qs[r_type] = curr_model(next_states,r_type) + (policy_next_qs[r_type] if policy_next_qs is not None else 0) # policy_next_qs = policy_next_qs.max(1)[0].unsqueeze(1) actions = torch.LongTensor(actions).unsqueeze(1) loss = 0 for r_type in self.reward_types: input = Variable(curr_states.data.clone(), requires_grad=True) predicted = curr_model(input, r_type).gather(1, actions) # typed_eval_model = getattr(eval_model, 'model_{}'.format(r_type)) reward_batch = FloatTensor(reward_major[r_type]).unsqueeze(1) # target_q = Variable( # r_type_qs[r_type].data.clone(), requires_grad=False) target_q = Variable(torch.zeros(predicted.shape), requires_grad=False) target_q[non_final_mask] = curr_model(non_final_next_states, r_type).gather(1,policy_next_actions) # target_q[non_final_mask] = eval_model(non_final_next_states,r_type).max(1)[0].detach() target_q = discount * (reward_batch + target_q) # eval_next_qs = eval_model(next_states, r_type).data # for i, rwd in enumerate(reward_batch): # # act_i = policy_actions[i].data.item() # act_i = policy_next_actions[i].data.item() # target_q[i, act_i] = rwd # if not terminals[i]: # target_q[i, act_i] += discount * eval_next_qs[i, act_i] # # if not terminals[i]: # # target_q[i, act_i] += discount * curr_model[i, act_i] loss += MSELoss()(predicted, target_q) # loss /= curr_states.shape[0] optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(curr_model.parameters(), 100) # for param in curr_model.parameters(): # param.grad.data.clamp_(100, 100) optimizer.step() self.looses.append(loss.data.item())
def python_to_tensor(a): if isinstance(a, numbers.Number): return FloatTensor([a]) return a