def add(self, *modules): for m in modules: assert isinstance( m, df.Module ), "`{}`s can only contain objects subtyping `df.Module`. You tried to add the following `{}`: {}".format( df.typename(self), df.typename(m), m) self.modules += modules
def testIgnoreBorder(self): X = np.pad(self.X, ((0, 1), (0, 2), (0, 3)), mode='constant', constant_values=999) X = X[None, :, None, :, :] ZT = self.Z[None, :, None, :, :] ZF = np.pad(self.Z, ((0, 1), (0, 1), (0, 1)), mode='constant', constant_values=999) ZF = ZF[None, :, None, :, :] P = df.SpatialMaxPooling3D(4, 3, 2, ignore_border=True).forward(X) np.testing.assert_array_equal(P, ZT) P = df.SpatialMaxPooling3D(4, 3, 2, ignore_border=False).forward(X) np.testing.assert_array_equal(P, ZF)
def main(): X, y, n = pickle.load(gzip.open('data/TownCentre.pkl.gz', 'rb')) (Xtr, ytr, ntr), (Xte, yte, nte) = split_dataset(X, y, n, split=0.9) Xtr, ytr = Xtr.astype(df.floatX) / 255, ytr.astype(df.floatX) Xte, yte = Xte.astype(df.floatX) / 255, yte.astype(df.floatX) aug = AugmentationPipeline(Xtr, ytr, Cropper((46, 46))) print("Trainset: {}".format(len(Xtr))) print("Testset: {}".format(len(Xte))) # Naive deep regression # net = mknet_gpu(df.Linear(512, 1, initW=df.init.const(0))) # dotrain(net, df.MADCriterion(), aug, Xtr, ytr[:, None]) # dostats(net, aug, Xtr, batchsize=1000) # y_preds = np.squeeze(dopred_deg(net, aug, Xte)) # Biternion deep regression with cosine criterion # net = mknet_gpu(df.Linear(512, 2, initW=df.init.normal(0.01)), Biternion()) # dotrain(net, CosineCriterion(), aug, Xtr, deg2bit(ytr)) # dostats(net, aug, Xtr, batchsize=1000) # y_preds = bit2deg(np.squeeze(dopred_deg(net, aug, Xte))) # Biternion deep regression with Von-Mises criterion net = mknet_gpu(df.Linear(512, 2, initW=df.init.normal(0.01)), Biternion()) dotrain(net, VonMisesBiternionCriterion(1), aug, Xtr, deg2bit(ytr)) dostats(net, aug, Xtr, batchsize=1000) y_preds = bit2deg(np.squeeze(dopred_deg(net, aug, Xte))) loss = maad_from_deg(y_preds, yte) mean_loss = np.mean(loss) std_loss = np.std(loss) print("MAAD error (test) : %f ± %f" % (mean_loss, std_loss)) return
def dotrain(model, crit, aug, Xtr, ytr, nepochs=50, batchsize=100, title=None): opt = df.AdaDelta(rho=.95, eps=1e-7, lr=1) # progress = IntProgress(value=0, min=0, max=nepochs, description='Training:') # display(progress) model.training() costs = [] for e in range(nepochs): batchcosts = [] for Xb, yb in batched(batchsize, Xtr, ytr, shuf=True): if aug is not None: Xb, yb = aug.augbatch_train(Xb, yb) model.zero_grad_parameters() cost = model.accumulate_gradients(Xb, yb, crit) opt.update_parameters(model) # print("batch cost: %f" % cost) batchcosts.append(cost) costs.append(np.mean(batchcosts)) print("mean batch cost: %f" % costs[-1]) # progress.value = e+1 # liveplot(plotcost, costs, title) return costs
def testDataChannels(self): X = self.X[None, :, None, :, :] X = np.concatenate((X, X + 1), axis=2) Z = self.Z[None, :, None, :, :] Z = np.concatenate((Z, Z + 1), axis=2) P = df.SpatialMaxPooling3D(4, 3, 2).forward(X) np.testing.assert_array_equal(P, Z)
def net(): model = df.Sequential() model.add(df.Linear(28 * 28, 100)) model.add(df.ReLU()) model.add(df.Linear(100, 100)) model.add(df.ReLU()) model.add(df.Linear(100, 100)) model.add(df.ReLU()) model.add(df.Linear(100, 10)) model.add(df.SoftMax()) return model
def main(params): train_set, valid_set, test_set = df.datasets.mnist.data() train_set_x, train_set_y = train_set test_set_x, test_set_y = test_set model = lenet() criterion = df.ClassNLLCriterion() optimiser = df.SGD(lr=params['lr']) for epoch in range(100): model.training() train(train_set_x, train_set_y, model, optimiser, criterion, epoch, params['batch_size'], 'train') train(train_set_x, train_set_y, model, optimiser, criterion, epoch, params['batch_size'], 'stats') model.evaluate() validate(test_set_x, test_set_y, model, epoch, params['batch_size'])
def symb_forward(self, symb_input): # TODO: Not sure if this polymorphism is any good! if isinstance(symb_input, (list, tuple)): assert len(symb_input) == len( self.modules ), "If `{}` has multiple inputs, it should be the same amount as it has modules.".format( df.typename(self)) return tuple( module.symb_forward(symb_in) for module, symb_in in zip(self.modules, symb_input)) else: return tuple( module.symb_forward(symb_input) for module in self.modules)
def main(params): (Xtrain, ytrain), (Xval, yval), (Xtest, ytest) = df.datasets.mnist.data() model = twinnet() #criterion = df.ClassNLLCriterion() criterion = df.BCECriterion() optimiser = df.optimizers.AdaDelta(rho=0.95) for epoch in range(100): model.training() train(Xtrain, ytrain, model, optimiser, criterion, epoch, params['batch_size'], 'train') if epoch % 3 == 0: train(Xtrain, ytrain, model, optimiser, criterion, epoch, params['batch_size'], 'stats') model.evaluate() validate(Xval, yval, model, epoch, params['batch_size'])
def dotrain(model, crit, aug, Xtr, ytr, nepochs=3, batchsize=128, title=None): opt = df.AdaDelta(rho=.95, eps=1e-7, lr=1) model.training() costs = [] print("Training in progress...") for e in range(nepochs): print("Current epoch: {0} out of {1}".format(e + 1, nepochs)) batchcosts = [] for Xb, yb in batched(batchsize, Xtr, ytr, shuf=True): if aug is not None: Xb, yb = aug.augbatch_train(Xb, yb) model.zero_grad_parameters() cost = model.accumulate_gradients(Xb, yb, crit) opt.update_parameters(model) batchcosts.append(cost) costs.append(np.mean(batchcosts)) return costs
def symb_forward(self, symb_input, symb_targets): if symb_targets.ndim == 1: # This is the case when `symb_targets` are 1-hot encoding indices. int_targets = df.T.cast(symb_targets, 'int32') p_y = symb_input[df.T.arange(symb_targets.shape[0]), int_targets] if self.clip is not None: p_y = df.T.clip(p_y, self.clip, 1 - self.clip) return df.T.mean(-df.T.log(p_y)) elif symb_targets.ndim == symb_input.ndim: # This is the case when both are full distributions. p_y = symb_input if self.clip is not None: p_y = df.T.clip(p_y, self.clip, 1 - self.clip) return df.T.mean(-df.T.sum(symb_targets * df.T.log(p_y), axis=symb_input.ndim - 1)) else: assert False, "Mismatch in dimensionalities of `{}` input and targets.".format( df.typename(self))
def testStride(self): # Add another slice along depth X = np.concatenate((self.X, self.X[1:2] + 100), axis=0)[None, :, None, :, :] Z = np.array([ [ [24, 70, 72], [48, 88, 88], [48, 92, 92], [48, 96, 96], ], [ [124, 170, 172], [136, 176, 176], [136, 180, 180], [136, 184, 184], ], ], dtype=df.floatX)[None, :, None, :, :] P = df.SpatialMaxPooling3D(4, 3, 2, 2, 1, 1).forward(X) np.testing.assert_array_equal(P, Z)
def lenet(): model = df.Sequential() model.add(df.Reshape(-1, 1, 28, 28)) model.add(df.SpatialConvolution(1, 32, 5, 5, 1, 1, with_bias=False)) model.add(df.BatchNormalization(32)) model.add(df.ReLU()) model.add(df.SpatialMaxPooling(2, 2)) model.add(df.SpatialConvolution(32, 64, 5, 5, 1, 1, with_bias=False)) model.add(df.BatchNormalization(64)) model.add(df.ReLU()) model.add(df.SpatialMaxPooling(2, 2)) model.add(df.Reshape(-1, 4 * 4 * 64)) model.add(df.Linear(4 * 4 * 64, 100, with_bias=False)) model.add(df.BatchNormalization(100)) model.add(df.ReLU()) model.add(df.Dropout(0.5)) model.add(df.Linear(100, 10)) model.add(df.SoftMax()) return model
def symb_forward(self, symb_input, symb_targets): if symb_targets.ndim == 1: # This is the case when `symb_targets` are 1-hot encoding indices. int_targets = df.T.cast(symb_targets, 'int32') p_y = symb_input[df.T.arange(symb_targets.shape[0]), int_targets] if self.clip is not None: p_y = df.T.clip(p_y, self.clip, 1-self.clip) return df.T.mean(-df.T.log(p_y)) elif symb_targets.ndim == symb_input.ndim: # This is the case when both are full distributions. p_y = symb_input if self.clip is not None: p_y = df.T.clip(p_y, self.clip, 1-self.clip) return df.T.mean(-df.T.sum(symb_targets * df.T.log(p_y), axis=symb_input.ndim-1)) else: assert False, "Mismatch in dimensionalities of `{}` input and targets.".format(df.typename(self))
def __repr__(self): return df.typename(self) + "(" + ", ".join( k + "=" + str(v) for k, v in self.hyperparams.items()) + ")"
def __repr__(self): return df.typename(self) + "(" + ", ".join(k+"="+str(v) for k,v in self.hyperparams.items()) + ")"
def testSISO(self): X = np.array([[1,2],[3,4]], dtype=df2.floatX) Y = df2.Identity().forward(X) np.testing.assert_array_equal(X, Y)
from test import test from model import net, lenet2 if __name__ == "__main__": print("THIS IS JUST AN EXAMPLE.") print("Please don't take these numbers as a benchmark.") print("While the optimizer's parameters have been grid-searched,") print( "a fair comparison would run all experiments multiple times AND RUN MORE THAN FIVE EPOCHS." ) batch_size = 64 (Xtrain, ytrain), (Xval, yval), (Xtest, ytest) = load_mnist() criterion = df.ClassNLLCriterion() def run(optim): progress = make_progressbar('Training with ' + str(optim), 5) progress.start() model = net() model.training() for epoch in range(5): train(Xtrain, ytrain, model, optim, criterion, batch_size, 'train') train(Xtrain, ytrain, model, optim, criterion, batch_size, 'stats') progress.update(epoch + 1) progress.finish() model.evaluate()
def parameters(self): params, grads = [], [] if hasattr(self, 'weight'): assert hasattr(self, 'grad_weight'), "The layer {} has a `weight` variable but no `grad_weight`, you probably forget to implement it.".format(df.classname(self)) params += [self.weight] grads += [self.grad_weight] if hasattr(self, 'bias'): assert hasattr(self, 'grad_bias'), "The layer {} has a `bias` variable but no `grad_bias`, you probably forget to implement it.".format(df.classname(self)) params += [self.bias] grads += [self.grad_bias] return params, grads
def mknet(self, *outlayers): return df.Sequential( #df.SpatialConvolutionCUDNN(3,24,(3,3)), df.SpatialConvolution(3, 24, (3, 3)), df.BatchNormalization(24), df.ReLU(), #df.SpatialConvolutionCUDNN(24,24,(3,3)), df.SpatialConvolution(24, 24, (3, 3)), df.BatchNormalization(24), #df.SpatialMaxPoolingCUDNN(2,2), df.SpatialMaxPooling((2, 2)), #df.MaxPooling(2,2), df.ReLU(), #df.SpatialConvolutionCUDNN(24,48,(3,3)), df.SpatialConvolution(24, 48, (3, 3)), df.BatchNormalization(48), df.ReLU(), # df.PoolingCUDNN()? df.SpatialMaxPoolingCUDNN(48, 48, 3, 3), #df.SpatialConvolution(48,48,(3,3)), df.BatchNormalization(48), df.SpatialMaxPooling((2, 2)), df.ReLU(), #df.SpatialConvolutionCUDNN(48,64,(3,3)), df.SpatialConvolution(48, 64, (3, 3)), df.BatchNormalization(64), df.ReLU(), #df.SpatialConvolutionCUDNN(64,64,(3,3)), df.SpatialConvolution(64, 64, (3, 3)), df.BatchNormalization(64), df.ReLU(), df.Dropout(0.2), Flatten(), df.Linear(64 * 5 * 5, 512), df.ReLU(), df.Dropout(0.5), *outlayers)
def testBasic(self): X = self.X[None, :, None, :, :] Z = self.Z[None, :, None, :, :] P = df.SpatialMaxPooling3D(4, 3, 2).forward(X) np.testing.assert_array_equal(P, Z)
def symb_forward(self, symb_input): # TODO: Not sure if this polymorphism is any good! if isinstance(symb_input, (list, tuple)): assert len(symb_input) == len(self.modules), "If `{}` has multiple inputs, it should be the same amount as it has modules.".format(df.typename(self)) return tuple(module.symb_forward(symb_in) for module, symb_in in zip(self.modules, symb_input)) else: return tuple(module.symb_forward(symb_input) for module in self.modules)
def symb_forward(self, symb_inputs): assert isinstance( symb_inputs, (list, tuple) ), "Input to `{}` container needs to be a tuple or a list.".format( df.typename(self)) return df.T.concatenate(symb_inputs, self.axis)
def model(fully_conv=True): conv3 = lambda nin, nout: df.SpatialConvolutionCUDNN( nin, nout, 3, 3, border='same') return df.Sequential(conv3(3, 64), df.ReLU(), conv3(64, 64), df.ReLU(), df.SpatialMaxPoolingCUDNN(2, 2), conv3(64, 128), df.ReLU(), conv3(128, 128), df.ReLU(), df.SpatialMaxPoolingCUDNN(2, 2), conv3(128, 256), df.ReLU(), conv3(256, 256), df.ReLU(), conv3(256, 256), df.ReLU(), conv3(256, 256), df.ReLU(), df.SpatialMaxPoolingCUDNN(2, 2), conv3(256, 512), df.ReLU(), conv3(512, 512), df.ReLU(), conv3(512, 512), df.ReLU(), conv3(512, 512), df.ReLU(), df.SpatialMaxPoolingCUDNN(2, 2), conv3(512, 512), df.ReLU(), conv3(512, 512), df.ReLU(), conv3(512, 512), df.ReLU(), conv3(512, 512), df.ReLU(), df.SpatialMaxPoolingCUDNN(2, 2), *_vgg.model_head(fully_conv))
def symb_forward(self, symb_inputs): assert isinstance(symb_inputs, (list, tuple)), "Input to `{}` container needs to be a tuple or a list.".format(df.typename(self)) return df.T.concatenate(symb_inputs, self.axis)
def model_head(fully_conv=True): if fully_conv: return [ df.SpatialConvolutionCUDNN(512, 4096, 7, 7, border='valid'), df.ReLU(), df.Dropout(0.5), df.SpatialConvolutionCUDNN(4096, 4096, 1, 1, border='valid'), df.ReLU(), df.Dropout(0.5), df.SpatialConvolutionCUDNN(4096, 1000, 1, 1, border='valid'), df.SpatialSoftMaxCUDNN(), ] else: return [ df.Reshape(-1, 512 * 7 * 7), df.Linear(512 * 7 * 7, 4096), df.ReLU(), df.Dropout(0.5), df.Linear(4096, 4096), df.ReLU(), df.Dropout(0.5), df.Linear(4096, 1000), df.SoftMax() ]
def mknet(): return df.Sequential( # 48x70 (HxW) df.SpatialConvolution(3, 24, (3, 3)), # 46x68 df.BatchNormalization(24), df.ReLU(), df.SpatialConvolution(24, 24, (3, 3)), # 44x66 df.BatchNormalization(24), df.SpatialMaxPooling((2, 3)), # 22x22 df.ReLU(), df.SpatialConvolution(24, 48, (3, 3)), # 20x20 df.BatchNormalization(48), df.ReLU(), df.SpatialConvolution(48, 48, (3, 3)), # 18x18 df.BatchNormalization(48), df.SpatialMaxPooling((2, 2)), # 9x9 df.ReLU(), df.SpatialConvolution(48, 64, (3, 3)), # 7x7 df.BatchNormalization(64), df.ReLU(), df.SpatialConvolution(64, 64, (3, 3)), # 5x5 df.BatchNormalization(64), df.ReLU(), df.Dropout(0.2), Flatten(), df.Linear(64 * 5 * 5, 512), df.ReLU(), df.Dropout(0.5), df.Linear(512, 2, init=df.init.normal(0.01)), Biternion())
def __init__(self, weightsname, *unused, **unused_kw): self._net = df.Sequential( # 48x70 (HxW) df.SpatialConvolution(3, 24, (3, 3)), # 46x68 df.BatchNormalization(24), df.ReLU(), df.SpatialConvolution(24, 24, (3, 3)), # 44x66 df.BatchNormalization(24), df.SpatialMaxPooling((2, 3)), # 22x22 df.ReLU(), df.SpatialConvolution(24, 48, (3, 3)), # 20x20 df.BatchNormalization(48), df.ReLU(), df.SpatialConvolution(48, 48, (3, 3)), # 18x18 df.BatchNormalization(48), df.SpatialMaxPooling((2, 2)), # 9x9 df.ReLU(), df.SpatialConvolution(48, 64, (3, 3)), # 7x7 df.BatchNormalization(64), df.ReLU(), df.SpatialConvolution(64, 64, (3, 3)), # 5x5 df.BatchNormalization(64), df.ReLU(), df.Dropout(0.2), Flatten(), df.Linear(64 * 5 * 5, 512), df.ReLU(), df.Dropout(0.5), df.Linear(512, 2, init=df.init.normal(0.01)), Biternion()) self._net.__setstate__(np.load(weightsname)) self._net.evaluate() self._aug = AugmentationPipeline(None, None, Cropper((48, 70)))
def symb_forward(self, symb_input): raise NotImplementedError("`{}` needs to implement `symb_forward` method.".format(df.typename(self)))
def add(self, *modules): for m in modules: assert isinstance(m, df.Module), "`{}`s can only contain objects subtyping `df.Module`. You tried to add the following `{}`: {}".format(df.typename(self), df.typename(m), m) self.modules += modules
def testMIMO(self): X = np.array([[1,2],[3,4]], dtype=df2.floatX) Y1, Y2 = df2.Identity().forward([X, X*2]) np.testing.assert_array_equal(X, Y1) np.testing.assert_array_equal(X*2, Y2)
def symb_forward(self, symb_input, symb_targets): # A classic mistake, at least for myself. assert symb_targets.ndim == symb_input.ndim, "The targets of `{}` should have the same dimensionality as the net's output. You likely want to do something like `tgt[:,None]`.".format(df.typename(self)) if self.clip is not None: symb_input = df.T.clip(symb_input, self.clip, 1-self.clip) return df.T.mean(df.T.sum(df.T.nnet.binary_crossentropy(symb_input, symb_targets), axis=1))
def get_updates(self, params, grads): raise NotImplementedError("`{}` needs to implement `get_updates` method.".format(df.typename(self)))
def mknet_gpu(*outlayers): return df.Sequential( # 3@46 df.SpatialConvolutionCUDNN(3, 24, 3, 3), # -> 24@44 df.BatchNormalization(24), df.ReLU(), df.SpatialConvolutionCUDNN(24, 24, 3, 3), # -> 24@42 df.BatchNormalization(24), df.SpatialMaxPoolingCUDNN(2, 2), # -> 24@21 df.ReLU(), df.SpatialConvolutionCUDNN(24, 48, 3, 3), # -> 48@19 df.BatchNormalization(48), df.ReLU(), df.SpatialConvolutionCUDNN(48, 48, 3, 3), # -> 48@17 df.BatchNormalization(48), df.SpatialMaxPooling(2, 2), # -> 48@9 df.ReLU(), df.SpatialConvolutionCUDNN(48, 64, 3, 3), # -> 48@7 df.BatchNormalization(64), df.ReLU(), df.SpatialConvolutionCUDNN(64, 64, 3, 3), # -> 48@5 df.BatchNormalization(64), df.ReLU(), df.Dropout(0.2), Flatten(), df.Linear(64 * 5 * 5, 512), df.ReLU(), df.Dropout(0.5), *outlayers)
def __init__(self, weightsname, *unused, **unused_kw): self._net = df.Sequential( # 184x76 df.SpatialConvolution(3, 24, (3, 3)), # 182x74 df.BatchNormalization(24), df.ReLU(), df.SpatialConvolution(24, 24, (3, 3)), # 180x72 df.SpatialMaxPooling((3, 3)), # 60x24 df.BatchNormalization(24), df.ReLU(), df.SpatialConvolution(24, 48, (3, 3)), # 58x22 df.BatchNormalization(48), df.ReLU(), df.SpatialConvolution(48, 48, (3, 3)), # 56x20 df.SpatialMaxPooling((2, 2)), # 28x10 df.BatchNormalization(48), df.ReLU(), df.SpatialConvolution(48, 64, (3, 3)), # 26x8 df.BatchNormalization(64), df.ReLU(), df.SpatialConvolution(64, 64, (3, 3)), # 24x6 df.SpatialMaxPooling((2, 2)), # 12x3 df.BatchNormalization(64), df.ReLU(), df.SpatialConvolution(64, 64, (3, 2)), # 10x2 df.BatchNormalization(64), df.ReLU(), df.Dropout(0.2), Flatten(), df.Linear(64 * 10 * 2, 512), df.ReLU(), df.Dropout(0.5), df.Linear(512, 2, init=df.init.normal(0.01)), Biternion()) self._net.__setstate__(np.load(weightsname)) self._net.evaluate() self._aug = AugmentationPipeline(None, None, Cropper((184, 76)))
def get_updates(self, params, grads): raise NotImplementedError( "`{}` needs to implement `get_updates` method.".format( df.typename(self)))
def lenet_cudnn(): model = df.Sequential() model.add(df.Reshape(-1, 1, 28, 28)) model.add( df.SpatialConvolutionCUDNN(1, 32, 5, 5, 1, 1, border='same', with_bias=False)) model.add(df.BatchNormalization(32)) model.add(df.ReLU()) model.add(df.SpatialMaxPoolingCUDNN(2, 2)) model.add( df.SpatialConvolutionCUDNN(32, 64, 5, 5, 1, 1, border='same', with_bias=False)) model.add(df.BatchNormalization(64)) model.add(df.ReLU()) model.add(df.SpatialMaxPoolingCUDNN(2, 2)) model.add(df.Reshape(-1, 7 * 7 * 64)) model.add(df.Linear(7 * 7 * 64, 100, with_bias=False)) model.add(df.BatchNormalization(100)) model.add(df.ReLU()) model.add(df.Dropout(0.5)) model.add(df.Linear(100, 10)) model.add(df.SoftMax()) return model
def mknet(): return df.Sequential( # 184x76 df.SpatialConvolution(3, 24, (3, 3)), # 182x74 df.BatchNormalization(24), df.ReLU(), df.SpatialConvolution(24, 24, (3, 3)), # 180x72 df.SpatialMaxPooling((3, 3)), # 60x24 df.BatchNormalization(24), df.ReLU(), df.SpatialConvolution(24, 48, (3, 3)), # 58x22 df.BatchNormalization(48), df.ReLU(), df.SpatialConvolution(48, 48, (3, 3)), # 56x20 df.SpatialMaxPooling((2, 2)), # 28x10 df.BatchNormalization(48), df.ReLU(), df.SpatialConvolution(48, 64, (3, 3)), # 26x8 df.BatchNormalization(64), df.ReLU(), df.SpatialConvolution(64, 64, (3, 3)), # 24x6 df.SpatialMaxPooling((2, 2)), # 12x3 df.BatchNormalization(64), df.ReLU(), df.SpatialConvolution(64, 64, (3, 2)), # 10x2 df.BatchNormalization(64), df.ReLU(), df.Dropout(0.2), Flatten(), df.Linear(64 * 10 * 2, 512), df.ReLU(), df.Dropout(0.5), df.Linear(512, 2, init=df.init.normal(0.01)), Biternion())