def test_MultiCriterion(self): input = torch.rand(2, 10) target = torch.LongTensor((1, 8)) nll = nn.ClassNLLCriterion() nll2 = nn.CrossEntropyCriterion() mc = nn.MultiCriterion().add(nll, 0.5).add(nll2) output = mc.forward(input, target) output2 = nll.forward(input, target) / 2 + nll2.forward(input, target) self.assertEqual(output, output2) gradInput = mc.backward(input, target) gradInput2 = nll.backward(input, target).clone().div(2).add( nll2.backward(input, target)) self.assertEqual(gradInput, gradInput2) # test type mc.float() gradInput = gradInput.clone() input3 = input.float() target3 = target output3 = mc.forward(input3, target3) gradInput3 = mc.backward(input3, target3) self.assertEqual(output, output3) self.assertEqual(gradInput.float(), gradInput3) # Check that these don't raise errors mc.__repr__() str(mc)
def ClassificationTrainValidate(model, dataset, p): t = model.type() if 'nEpochs' not in p: p['nEpochs'] = 100 if 'initial_LR' not in p: p['initial_LR'] = 1e-1 if 'LR_decay' not in p: p['LR_decay'] = 4e-2 if 'weightDecay' not in p: p['weightDecay'] = 1e-4 if 'momentum' not in p: p['momentum'] = 0.9 if 'checkPoint' not in p: p['checkPoint'] = False optimState = { 'learningRate': p['initial_LR'], 'learningRateDecay': 0.0, 'momentum': p['momentum'], 'nesterov': True, 'dampening': 0.0, 'weightDecay': p['weightDecay'], 'epoch': 1 } if os.path.isfile('epoch.pth'): optimState['epoch'] = torch.load('epoch.pth') + 1 print('Restarting at epoch ' + str(optimState['epoch']) + ' from model.pickle ..') model = torch.load('model.pth') print(p) criterion = nn.CrossEntropyCriterion() criterion.type(model.type()) params, gradParams = model.flattenParameters() print('#parameters', params.nelement()) for epoch in range(optimState['epoch'], p['nEpochs'] + 1): model.training() stats = {'top1': 0, 'top5': 0, 'n': 0, 'nll': 0} optimState['learningRate'] = p['initial_LR'] * \ math.exp((1 - epoch) * p['LR_decay']) start = time.time() for batch in dataset['train'](): batch['input'].type(t) batch['target'] = batch['target'].type(t) model.forward(batch['input']) criterion.forward(model.output, batch['target']) updateStats(stats, model.output, batch['target'], criterion.output) gradParams.zero_() # model:zeroGradParameters() criterion.backward(model.output, batch['target']) model.backward(batch['input'], criterion.gradInput) def feval(x): return criterion.output, gradParams optim.sgd(feval, params, optimState) print(epoch, 'train: top1=%.2f%% top5=%.2f%% nll:%.2f time:%.1fs' % (100 * (1 - 1.0 * stats['top1'] / stats['n']), 100 * (1 - 1.0 * stats['top5'] / stats['n']), stats['nll'] / stats['n'], time.time() - start)) if p['checkPoint']: model.modules[0].clearState() torch.save(model, 'model.pth') torch.save(epoch, 'epoch.pth') model.evaluate() s.forward_pass_multiplyAdd_count = 0 s.forward_pass_hidden_states = 0 stats = {'top1': 0, 'top5': 0, 'n': 0, 'nll': 0} start = time.time() for batch in dataset['val'](): batch['input'].type(t) batch['target'] = batch['target'].type(t) model.forward(batch['input']) criterion.forward(model.output, batch['target']) updateStats(stats, model.output, batch['target'], criterion.output) print(epoch, 'test: top1=%.2f%% top5=%.2f%% nll:%.2f time:%.1fs' % (100 * (1 - 1.0 * stats['top1'] / stats['n']), 100 * (1 - 1.0 * stats['top5'] / stats['n']), stats['nll'] / stats['n'], time.time() - start)) print( '%.3e MultiplyAdds/sample %.3e HiddenStates/sample' % (s.forward_pass_multiplyAdd_count / stats['n'], s.forward_pass_hidden_states / stats['n']))