Exemplo n.º 1
0
    def forward(self, loss):
        inp, out = loss.sample()
        inp = w(Variable(inp.view(inp.size()[0], 28 * 28)))
        out = w(Variable(out))

        cur_layer = 0
        while f'mat_{cur_layer}' in self.layers:
            inp = self.activation(self.layers[f'mat_{cur_layer}'](inp))
            cur_layer += 1

        inp = f.log_softmax(self.layers['final_mat'](inp), dim=1)
        l = self.loss(inp, out)
        return l
Exemplo n.º 2
0
def fit_normal(target_cls,
               target_to_opt,
               opt_class,
               n_tests=100,
               n_epochs=100,
               batch_size=128,
               tracker='default',
               **kwargs):
    results = []
    for i in tqdm(range(n_tests), 'tests'):
        target = target_cls(training=False, batch_size=batch_size)
        optimizee = w(target_to_opt())

        if tracker is not None:
            optimizee.register_forward_hook(trackers[tracker].forward)

        optimizer = opt_class(optimizee.parameters(), **kwargs)
        total_loss = []
        for _ in range(n_epochs):
            trackers[tracker].start_timer('optimizee_predict')
            loss = optimizee(target)
            trackers[tracker].stop_timer('optimizee_predict')

            total_loss.append(loss.data.cpu().numpy())

            trackers[tracker].start_timer('train_optimizer')
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            trackers[tracker].stop_timer('train_optimizer')
        results.append(total_loss)
    return results
Exemplo n.º 3
0
def get_fit_dict_test(n_tests,
                      opt_dict,
                      *args,
                      **kwargs):
    opt = w(Optimizer(preproc=True))
    opt.load_state_dict(opt_dict)
    np.random.seed(0)
    tracker = kwargs.get('tracker', 'default')
    trackers[tracker].start_timer('optimize')
    res = [do_fit(opt, *args, **kwargs) for _ in tqdm(range(n_tests), 'optimizer')]
    trackers[tracker].stop_timer('optimize')
    return res
Exemplo n.º 4
0
    def forward(self, inp, hidden, cell):
        if self.preproc:
            # Implement preproc described in Appendix A

            # Note: we do all this work on tensors, which means
            # the gradients won't propagate through inp. This
            # should be ok because the algorithm involves
            # making sure that inp is already detached.
            inp = inp.data
            inp2 = w(torch.zeros(inp.size()[0], 2))
            keep_grads = (torch.abs(inp) >= self.preproc_threshold).squeeze()
            inp2[:, 0][keep_grads] = (
                torch.log(torch.abs(inp[keep_grads]) + 1e-8) /
                self.preproc_factor).squeeze()
            inp2[:, 1][keep_grads] = torch.sign(inp[keep_grads]).squeeze()

            inp2[:, 0][~keep_grads] = -1
            inp2[:, 1][~keep_grads] = (float(np.exp(self.preproc_factor)) *
                                       inp[~keep_grads]).squeeze()
            inp = w(Variable(inp2))
        hidden0, cell0 = self.recurs(inp, (hidden[0], cell[0]))
        hidden1, cell1 = self.recurs2(hidden0, (hidden[1], cell[1]))
        return self.output(hidden1), (hidden0, hidden1), (cell0, cell1)
Exemplo n.º 5
0
def fit_optimizer(target_cls,
                  target_to_opt,
                  preproc=False,
                  unroll=20,
                  optim_it=100,
                  n_epochs=20,
                  n_tests=100,
                  lr=0.001,
                  batch_size=128,
                  iterations=20,
                  out_mul=1.0,
                  tracker='default'):
    opt_net = w(Optimizer(preproc=preproc))
    meta_opt = optim.Adam(opt_net.parameters(), lr=lr)

    best_net = None
    best_loss = 100000000000000000

    for _ in tqdm(range(n_epochs), 'epochs'):
        for _ in tqdm(range(iterations), 'iterations'):
            trackers[tracker].start_timer('fit_optimizer')
            do_fit(opt_net, meta_opt, target_cls, target_to_opt, unroll, optim_it, out_mul,
                   batch_size=batch_size,
                   should_train=True,
                   tracker=tracker)
            trackers[tracker].stop_timer('fit_optimizer')

        trackers[tracker].start_timer('calculate_loss')
        loss = (np.mean([
            np.sum(do_fit(opt_net, meta_opt, target_cls, target_to_opt, unroll, optim_it, out_mul,
                          batch_size=batch_size,
                          should_train=False,
                          tracker=tracker))
            for _ in tqdm(range(n_tests), 'tests')
        ]))
        trackers[tracker].stop_timer('calculate_loss')

        print(loss)
        if loss < best_loss:
            print(best_loss, loss)
            best_loss = loss
            best_net = copy.deepcopy(opt_net.state_dict())

    return best_loss, best_net
Exemplo n.º 6
0
def do_fit(opt_net,
           meta_opt,
           target_cls,
           target_to_opt,
           unroll,
           optim_it,
           out_mul,
           batch_size=128,
           should_train=True,
           tracker='default'):
    if should_train:
        opt_net.train()
    else:
        opt_net.eval()
        unroll = 1

    target = target_cls(training=should_train, batch_size=batch_size)
    optimizee = w(target_to_opt())

    optimizee.register_forward_hook(trackers[tracker].forward)

    n_params = 0
    for name, p in optimizee.all_named_parameters():
        n_params += int(np.prod(p.size()))
    hidden_states = [w(Variable(torch.zeros(n_params, opt_net.hidden_sz))) for _ in range(2)]
    cell_states = [w(Variable(torch.zeros(n_params, opt_net.hidden_sz))) for _ in range(2)]
    all_losses_ever = []
    if should_train:
        meta_opt.zero_grad()
    all_losses = None
    for iteration in range(1, optim_it + 1):
        trackers[tracker].start_timer('optimizee_predict')
        loss = optimizee_forward(optimizee, target)
        trackers[tracker].stop_timer('optimizee_predict')

        if all_losses is None:
            all_losses = loss
        else:
            all_losses += loss

        all_losses_ever.append(loss.data.cpu().numpy())

        trackers[tracker].start_timer('calculate_gradients')
        calculate_gradients(loss)
        trackers[tracker].stop_timer('calculate_gradients')

        offset = 0
        result_params = {}
        hidden_states2 = [w(Variable(torch.zeros(n_params, opt_net.hidden_sz))) for _ in range(2)]
        cell_states2 = [w(Variable(torch.zeros(n_params, opt_net.hidden_sz))) for _ in range(2)]

        trackers[tracker].start_timer('update_hidden_states')
        for name, p in optimizee.all_named_parameters():
            cur_sz = int(np.prod(p.size()))
            # We do this so the gradients are disconnected from the graph but we still get
            # gradients from the rest
            gradients = detach_var(p.grad.view(cur_sz, 1))
            hs = [h[offset:offset + cur_sz] for h in hidden_states]
            cs = [c[offset:offset + cur_sz] for c in cell_states]
            trackers[tracker].start_timer('optimizer_predict')
            updates, new_hidden, new_cell = optimizer_forward(opt_net, gradients, hs, cs)
            trackers[tracker].stop_timer('optimizer_predict')
            for i in range(len(new_hidden)):
                hidden_states2[i][offset:offset + cur_sz] = new_hidden[i]
                cell_states2[i][offset:offset + cur_sz] = new_cell[i]
            result_params[name] = p + updates.view(*p.size()) * out_mul
            result_params[name].retain_grad()

            offset += cur_sz
        trackers[tracker].stop_timer('update_hidden_states')

        if iteration % unroll == 0:
            if should_train:
                trackers[tracker].start_timer('train_optimizer')
                meta_opt.zero_grad()
                all_losses.backward()
                meta_opt.step()
                trackers[tracker].stop_timer('train_optimizer')

            all_losses = None

            optimizee = w(target_to_opt())
            optimizee.load_state_dict(result_params)
            optimizee.zero_grad()
            hidden_states = [detach_var(v) for v in hidden_states2]
            cell_states = [detach_var(v) for v in cell_states2]

        else:
            trackers[tracker].start_timer('update_optimizee')
            update_optimizee(optimizee, result_params)
            trackers[tracker].stop_timer('update_optimizee')
            assert len(list(optimizee.all_named_parameters()))
            hidden_states = hidden_states2
            cell_states = cell_states2

    return all_losses_ever