def create_frcn_model(frcn_fine_tune=False): b1 = BranchNode(name="b1") imagenet_layers = add_vgg_layers() HW = (7, 7) frcn_layers = [ RoiPooling(layers=imagenet_layers, HW=HW, bprop_enabled=frcn_fine_tune), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), b1, Affine(nout=21, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) ] bb_layers = [ b1, Affine(nout=84, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) ] return Model(layers=Tree([frcn_layers, bb_layers]))
def create_network(): # weight initialization g1 = Gaussian(scale=0.01) g5 = Gaussian(scale=0.005) c0 = Constant(0) c1 = Constant(1) # model initialization padding = {'pad_d': 1, 'pad_h': 1, 'pad_w': 1} strides = {'str_d': 2, 'str_h': 2, 'str_w': 2} layers = [ Conv((3, 3, 3, 64), padding=padding, init=g1, bias=c0, activation=Rectlin()), Pooling((1, 2, 2), strides={'str_d': 1, 'str_h': 2, 'str_w': 2}), Conv((3, 3, 3, 128), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=101, init=g1, bias=c0, activation=Softmax()) ] return Model(layers=layers)
def gen_model(num_channels, height, width): assert NervanaObject.be is not None, 'need to generate a backend before using this function' init_uni = Kaiming() # we have 1 issue, they have bias layers we don't allow batchnorm and biases conv_common = dict(padding=1, init=init_uni, activation=Rectlin(), batch_norm=True) # set up the layers layers = [] # need to store a ref to the pooling layers to pass # to the upsampling layers to get the argmax indicies # for upsampling, this stack holds the pooling layer refs pool_layers = [] # first loop generates the encoder layers nchan = [64, 128, 256, 512, 512] for ind in range(len(nchan)): nchanu = nchan[ind] lrng = 2 if ind <= 1 else 3 for lind in range(lrng): nm = 'conv%d_%d' % (ind+1, lind+1) layers.append(Conv((3, 3, nchanu), strides=1, name=nm, **conv_common)) layers.append(Pooling(2, strides=2, name='conv%d_pool' % ind)) pool_layers.append(layers[-1]) if ind >= 2: layers.append(Dropout(keep=0.5, name='drop%d' % (ind+1))) # this loop generates the decoder layers for ind in range(len(nchan)-1,-1,-1): nchanu = nchan[ind] lrng = 2 if ind <= 1 else 3 # upsampling layers need a ref to the corresponding pooling layer # to access the argmx indices for upsampling layers.append(Upsampling(2, pool_layers.pop(), strides=2, padding=0, name='conv%d_unpool' % ind)) for lind in range(lrng): nm = 'deconv%d_%d' % (ind+1, lind+1) if ind < 4 and lind == lrng-1: nchanu = nchan[ind]/2 layers.append(Conv((3, 3, nchanu), strides=1, name=nm, **conv_common)) if ind == 0: break if ind >= 2: layers.append(Dropout(keep=0.5, name='drop%d' % (ind+1))) # last conv layer outputs 12 channels, 1 for each output class # with a pixelwise softmax over the channels act_last = PixelwiseSoftmax(num_channels, height, width, name="PixelwiseSoftmax") conv_last = dict(padding=1, init=init_uni, activation=act_last, batch_norm=False) layers.append(Conv((3, 3, num_channels), strides=1, name='deconv_out', **conv_last)) return layers
def layers(self): return [ Conv((7, 7, 96), init=Gaussian(scale=0.0001), bias=Constant(0), activation=Rectlin(), padding=3, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((5, 5, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=2, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Pooling(3, strides=2, padding=1), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=self.noutputs, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def fit_model(train_set, val_set, num_epochs=50): relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), # Xavier sqrt(3)/num_inputs [CHECK THIS] 'bias': Constant(0), 'activation': relu } layers = [] layers.append(Conv((3, 3, 128), **conv_params)) # 3x3 kernel * 128 nodes layers.append(Pooling(2)) layers.append(Conv((3, 3, 128), **conv_params)) layers.append(Pooling(2)) # Highest value from 2x2 window. layers.append(Conv((3, 3, 128), **conv_params)) layers.append( Dropout(keep=0.5) ) # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0 layers.append( Affine(nout=128, init=GlorotUniform(), bias=Constant(0), activation=relu) ) # 1 value per conv kernel - Linear Combination of layers layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=2, init=GlorotUniform(), bias=Constant(0), activation=Softmax(), name="class_layer")) # initialize model object cnn = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = Adam() # callbacks = Callbacks(cnn) # out_fname = 'yarin_fdl_out_data.h5' callbacks = Callbacks(cnn, eval_set=val_set, eval_freq=1) # , output_file=out_fname cnn.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) return cnn
def create_network(): layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()), ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def create_network(): init = Kaiming() padding = dict(pad_d=1, pad_h=1, pad_w=1) strides = dict(str_d=2, str_h=2, str_w=2) dilation = dict(dil_d=2, dil_h=2, dil_w=2) common = dict(init=init, batch_norm=True, activation=Rectlin()) layers = [ Conv((9, 9, 9, 16), padding=padding, strides=strides, init=init, activation=Rectlin()), Conv((5, 5, 5, 32), dilation=dilation, **common), Conv((3, 3, 3, 64), dilation=dilation, **common), Pooling((2, 2, 2), padding=padding, strides=strides), Conv((2, 2, 2, 128), **common), Conv((2, 2, 2, 128), **common), Conv((2, 2, 2, 128), **common), Conv((2, 2, 2, 256), **common), Conv((2, 2, 2, 1024), **common), Conv((2, 2, 2, 4096), **common), Conv((2, 2, 2, 2048), **common), Conv((2, 2, 2, 1024), **common), Dropout(), Affine(2, init=Kaiming(local=False), batch_norm=True, activation=Softmax()) ] return Model(layers=layers)
def __init__(self): self.in_shape = [1024, (2538, 38)] init = Constant(0) image_path = Sequential( [Affine(20, init, bias=init), Affine(10, init, bias=init)]) sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(4, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(20, init, bias=init, activation=Softmax()) ] self.layers = layers self.cost = GeneralizedCostMask(CrossEntropyMulti()) self.model = Model(layers=layers) self.model.initialize(self.in_shape, cost=self.cost)
def __init__(self, overlapping_classes=None, exclusive_classes=None, analytics_input=True, network_type='conv_net', num_words=60, width=100, lookup_size=0, lookup_dim=0, optimizer=Adam()): assert (overlapping_classes is not None) or (exclusive_classes is not None) self.width = width self.num_words = num_words self.overlapping_classes = overlapping_classes self.exclusive_classes = exclusive_classes self.analytics_input = analytics_input self.recurrent = network_type == 'lstm' self.lookup_size = lookup_size self.lookup_dim = lookup_dim init = GlorotUniform() activation = Rectlin(slope=1E-05) gate = Logistic() input_layers = self.input_layers(analytics_input, init, activation, gate) if self.overlapping_classes is None: output_layers = [ Affine(len(self.exclusive_classes), init, activation=Softmax()) ] elif self.exclusive_classes is None: output_layers = [ Affine(len(self.overlapping_classes), init, activation=Logistic()) ] else: output_branch = BranchNode(name='exclusive_overlapping') output_layers = Tree([[ SkipNode(), output_branch, Affine(len(self.exclusive_classes), init, activation=Softmax()) ], [ output_branch, Affine(len(self.overlapping_classes), init, activation=Logistic()) ]]) layers = [ input_layers, # this is where inputs meet, and where we may want to add depth or # additional functionality Dropout(keep=0.8), output_layers ] super(ClassifierNetwork, self).__init__(layers, optimizer=optimizer)
def aux_branch(bnode): return [ bnode, Pooling(fshape=5, strides=3, op="avg"), Conv((1, 1, 128), **common), Affine(nout=1024, init=init1, activation=relu, bias=bias), Dropout(keep=0.3), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0)) ]
def main(): parser = get_parser() args = parser.parse_args() print('Args:', args) loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') ext = extension_from_parameters(args) loader = p1b3.DataLoader(feature_subsample=args.feature_subsample, scaling=args.scaling, drug_features=args.drug_features, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) # initializer = Gaussian(loc=0.0, scale=0.01) initializer = GlorotUniform() activation = get_function(args.activation)() layers = [] reshape = None if args.convolution and args.convolution[0]: reshape = (1, loader.input_dim, 1) layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = args.convolution[i] filter_len = args.convolution[i+1] stride = args.convolution[i+2] # print(nb_filter, filter_len, stride) # fshape: (height, width, num_filters). layers.append(Conv((1, filter_len, nb_filter), strides={'str_h':1, 'str_w':stride}, init=initializer, activation=activation)) if args.pool: layers.append(Pooling((1, args.pool))) for layer in args.dense: if layer: layers.append(Affine(nout=layer, init=initializer, activation=activation)) if args.drop: layers.append(Dropout(keep=(1-args.drop))) layers.append(Affine(nout=1, init=initializer, activation=neon.transforms.Identity())) model = Model(layers=layers) train_iter = ConcatDataIter(loader, ndata=args.train_samples, lshape=reshape, datatype=args.datatype) val_iter = ConcatDataIter(loader, partition='val', ndata=args.val_samples, lshape=reshape, datatype=args.datatype) cost = GeneralizedCost(get_function(args.loss)()) optimizer = get_function(args.optimizer)() callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args) model.fit(train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
def aux_branch(bnode, ind): # TODO put dropout back in nm = 'loss%d/' % ind return [bnode, Pooling(fshape=5, strides=3, op="avg", name=nm+'ave_pool'), Conv((1, 1, 128), name=nm+'conv', **common), Affine(nout=1024, init=init1, activation=relu, bias=bias, name=nm+'fc'), Dropout(keep=1.0, name=nm+'drop_fc'), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0), name=nm+'classifier')]
def layers(self): bn = True return [ Conv((7, 7, 96), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=3, strides=1)\ if self.bn_first_layer else\ Conv((7, 7, 96), init=Kaiming(), bias=Constant(0), activation=Explin(), padding=3, strides=1), Pooling(3, strides=2, padding=1), Conv((5, 5, 256), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=2, strides=1), Pooling(3, strides=2, padding=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Pooling(3, strides=2, padding=1), Affine(nout=4096, init=Kaiming(), activation=Explin(), batch_norm=bn), Dropout(keep=0.5), Affine(nout=4096, init=Kaiming(), activation=Explin(), batch_norm=bn), Dropout(keep=0.5), Affine(nout=self.noutputs, init=Kaiming(), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic(shortcut=True)) ]
def __init__(self): self.in_shape = (3, 32, 32) relu = Rectlin() init_use = Constant(0) conv = dict(init=init_use, batch_norm=False, activation=relu) convp1 = dict(init=init_use, batch_norm=False, bias=init_use, activation=relu, padding=1) convp1s2 = dict(init=init_use, batch_norm=False, bias=init_use, padding=1, strides=2) layers = [ Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax()) ] self.layers = layers model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(self.in_shape, cost=cost) self.model = model
def main_branch(self, nout=100): return [ Conv(**self.conv_params(3, 32, strides=2, padding=0)), Pooling(**self.pool3s2p0), Conv(**self.conv_params(1, 80, strides=1, padding=0)), Pooling(**self.pool3s2p0), self.inception([(64, ), (48, 64), (64, 96), (32, )], b2fsz=5), self.inception([(64, ), (48, 64), (64, 96), (64, )], b2fsz=5), self.inception_inception([(64, ), (64, 64), (64, 64, 64), (192, )]), self.inception_inception([(64, ), (64, 64), (64, 64, 64), (192, )]), Pooling(fshape='all', strides=1, op="avg"), Dropout(keep=0.8), Conv(**self.conv_params(1, nout, activation=Softmax())) ]
def create_network(): layers = [ DataTransform(transform=Normalizer(divisor=128.)), Conv((11, 11, 96), init=Kaiming(), activation=Rectlin(), strides=4, padding=1), Conv((1, 1, 96), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 96), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 54->2, Conv((5, 5, 256), init=Kaiming(), activation=Rectlin(), strides=1), # 27->2, Conv((1, 1, 256), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 256), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 23->1, Conv((3, 3, 384), init=Kaiming(), activation=Rectlin(), strides=1, padding=1), Conv((1, 1, 384), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 12->, Dropout(keep=0.5), Conv((3, 3, 1024), init=Kaiming(), activation=Rectlin(), strides=1, padding=1), Conv((1, 1, 1024), init=Kaiming(), activation=Rectlin(), strides=1), Conv((1, 1, 1000), init=Kaiming(), activation=Rectlin(), strides=1), Pooling(6, op='avg'), Activation(Softmax()) ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def create_network(stage_depth): if stage_depth in (18, 18): stages = (2, 2, 2, 2) elif stage_depth in (34, 50): stages = (3, 4, 6, 3) elif stage_depth in (68, 101): stages = (3, 4, 23, 3) elif stage_depth in (102, 152): stages = (3, 8, 36, 3) else: raise ValueError('Invalid stage_depth value'.format(stage_depth)) bottleneck = False if stage_depth in (50, 101, 152): bottleneck = True layers = [Conv(name='Input Layer', **conv_params(7, 64, strides=2)), Pooling(3, strides=2)] # Structure of the deep residual part of the network: # stage_depth modules of 2 convolutional layers each at feature map depths # of 64, 128, 256, 512 nfms = list(itt.chain.from_iterable( [itt.repeat(2**(x + 6), r) for x, r in enumerate(stages)])) strides = [-1] + [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, bottleneck, stride)) layers.append(Pooling('all', op='avg', name='end_resnet')) layers.append(Conv(name = 'Custom Head 1', **conv_params(1, 1000, relu=True))) layers.append(Dropout(0.5)) layers.append(Conv(name = 'Custom Head 2', **conv_params(1, 2, relu=False))) layers.append(Activation(Softmax())) # layers.append(Affine(512, init=Kaiming(local=False), # batch_norm=True, activation=Rectlin())) # layers.append(Affine(2, init=Kaiming(local=False), activation=Softmax())) return Model(layers=layers)
def test_model_serialize(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = [ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] path2 = [ Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] layers = [ MergeConcat([path1, path2]), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), BatchNorm(), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model save_obj(mlp.serialize(keep_states=True), tmp_save) # Load model mlp = Model(layers=layers) mlp.load_weights(tmp_save) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) else: assert np.allclose(p, p_e) os.remove(tmp_save)
Conv((3, 3, 256), padding=1, init=init_g1, bias=Constant(1.0), activation=relu, name='conv5')) layers.append(Pooling(3, strides=2, name='pool5')) layers.append( Affine(nout=4096, init=init_g2, bias=Constant(1.0), activation=relu, name='fc6')) layers.append(Dropout(keep=0.5, name='drop6')) layers.append( Affine(nout=4096, init=init_g2, bias=Constant(1.0), activation=relu, name='fc7')) layers.append(Dropout(keep=0.5, name='drop7')) layers.append( Affine(nout=1000, init=init_g1, bias=Constant(0.0), activation=Softmax(), name='fc8'))
def build_model(dataset, frcn_rois_per_img, train_pre_nms_N=12000, train_post_nms_N=2000, test_pre_nms_N=6000, test_post_nms_N=300, inference=False): """ Returns the Faster-RCNN model. For inference, also returns a reference to the proposal layer. Faster-RCNN contains three modules: VGG, the Region Proposal Network (RPN), and the Classification Network (ROI-pooling + Fully Connected layers), organized as a tree. Tree has 4 branches: VGG -> b1 -> Conv (3x3) -> b2 -> Conv (1x1) -> CrossEntropyMulti (objectness label) b2 -> Conv (1x1) -> SmoothL1Loss (bounding box targets) b1 -> PropLayer -> ROI -> Affine -> Affine -> b3 -> Affine -> CrossEntropyMulti b3 -> Affine -> SmoothL1Loss When the model is constructed for inference, several elements are different: - The number of regions to keep before and after non-max suppression is (6000, 300) for training and (12000, 2000) for inference. - The out_shape of the proposalLayer of the network is equal to post_nms_N (number of rois to keep after performaing nms). This is configured by passing the inference flag to the proposalLayer constructor. Arguments: dataset (objectlocalization): Dataset object. frcn_rois_per_img (int): Number of ROIs per image considered by the classification network. inference (bool): Construct the model for inference. Default is False. Returns: model (Model): Faster-RCNN model. proposalLayer (proposalLayer): Reference to proposalLayer in the model. Returned only for inference=True. """ num_classes = dataset.num_classes # define the branch points b1 = BranchNode(name="conv_branch") b2 = BranchNode(name="rpn_branch") b3 = BranchNode(name="roi_branch") # define VGG VGG = util.add_vgg_layers() # define RPN rpn_init = dict(strides=1, init=Gaussian(scale=0.01), bias=Constant(0)) # these references are passed to the ProposalLayer. RPN_3x3 = Conv((3, 3, 512), activation=Rectlin(), padding=1, **rpn_init) RPN_1x1_obj = Conv((1, 1, 18), activation=PixelwiseSoftmax(c=2), padding=0, **rpn_init) RPN_1x1_bbox = Conv((1, 1, 36), activation=Identity(), padding=0, **rpn_init) # inference uses different network settings if not inference: pre_nms_N = train_pre_nms_N # default 12000 post_nms_N = train_post_nms_N # default 2000 else: pre_nms_N = test_pre_nms_N # default 6000 post_nms_N = test_post_nms_N # default 300 proposalLayer = ProposalLayer([RPN_1x1_obj, RPN_1x1_bbox], dataset, pre_nms_N=pre_nms_N, post_nms_N=post_nms_N, num_rois=frcn_rois_per_img, inference=inference) # define ROI classification network ROI = [ proposalLayer, RoiPooling(HW=(7, 7)), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5) ] ROI_category = Affine(nout=num_classes, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) ROI_bbox = Affine(nout=4 * num_classes, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) # build the model # the four branches of the tree mirror the branches listed above frcn_tree = Tree([ROI + [b3, ROI_category], [b3, ROI_bbox]]) model = Model(layers=Tree([ VGG + [b1, RPN_3x3, b2, RPN_1x1_obj], [b2, RPN_1x1_bbox], [b1] + [frcn_tree], ])) if inference: return (model, proposalLayer) else: return model
init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) bbox_pred = Affine(nout=84, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) frcn_layers = [ RoiPooling(layers=imagenet_layers, HW=(6, 6), bprop_enabled=frcn_fine_tune), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), b1, class_score ] bb_layers = [ b1, bbox_pred, ] # setup optimizer opt_w = GradientDescentMomentum(0.001 * learning_rate_scale, 0.9, wdecay=0.0005)
# really 10 classes, pad to nearest power of 2 to match conv output train_set = ArrayIterator(X_train, y_train, nclass=16, lshape=(3, 32, 32)) valid_set = ArrayIterator(X_test, y_test, nclass=16, lshape=(3, 32, 32)) init_uni = Gaussian(scale=0.05) opt_gdm = GradientDescentMomentum(learning_rate=float(args.learning_rate), momentum_coef=0.9, wdecay=float(args.weight_decay), schedule=Schedule(step_config=[200, 250, 300], change=0.1)) relu = Rectlin() conv = dict(init=init_uni, batch_norm=False, activation=relu) convp1 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1) convp1s2 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1, strides=2) layers = [Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti())
# load up the data set # setup weight initialization function init = Gaussian() # discriminiator using convolution layers lrelu = Rectlin(slope=0.1) # leaky relu for discriminator # sigmoid = Logistic() # sigmoid activation function conv1 = dict( init=init, batch_norm=False, activation=lrelu) # what's about BatchNorm Layer and batch_norm parameter? conv2 = dict(init=init, batch_norm=True, activation=lrelu, padding=2) conv3 = dict(init=init, batch_norm=True, activation=lrelu, padding=1) D_layers = [ Conv((5, 5, 5, 32), **conv1), Dropout(keep=0.8), Conv((5, 5, 5, 8), **conv2), Dropout(keep=0.8), Conv((5, 5, 5, 8), **conv2), Dropout(keep=0.8), Conv((5, 5, 5, 8), **conv3), Dropout(keep=0.8), Pooling((2, 2, 2)), # what's about the Flatten Layer? Linear(1, init=init) ] #what's about the activation function? # generator using covolution layers latent_size = 200 relu = Rectlin(slope=0) # relu for generator conv4 = dict(init=init,
def test_model_serialize(backend_default, data): dataset = MNIST(path=data) (X_train, y_train), (X_test, y_test), nclass = dataset.load_data() train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential([ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) path2 = Sequential([ Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) layers = [ MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp.initialize(train_set, cost=mlp.cost) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model mlp.save_params(tmp_save, keep_states=True) # Load model mlp = Model(tmp_save) mlp.initialize(train_set) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert allclose_with_out(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert allclose_with_out(_s, _s_e) else: assert allclose_with_out(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): assert type(p) == type(p_e) if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert allclose_with_out(_p, _p_e) elif isinstance(p, np.ndarray): assert allclose_with_out(p, p_e) else: assert p == p_e os.remove(tmp_save)
reset_cells=True, batch_norm=False, bi_sum=False) elif args.rlayer_type == 'bibnrnn': rlayer = DeepBiRNN(hidden_size, g_uni, activation=Tanh(), depth=1, reset_cells=True, batch_norm=True) layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=uni), rlayer, RecurrentSum(), Dropout(keep=0.5), Affine(2, g_uni, bias=g_uni, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = Adagrad(learning_rate=0.01, gradient_clip_value=gradient_clip_value) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer,
schedule=Schedule(step_config=step_config, change=float( assignments.get("momentum_step_change"))), ) relu = Rectlin() conv = dict(init=init_uni, batch_norm=False, activation=relu) convp1 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1) convp1s2 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1, strides=2) layers = [ Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax()) ]
1: [ Conv((3, 5, 64), init=gauss, strides=big, **common), Pooling(2, strides=2), Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Conv((3, 3, 256), init=gauss, strides=small, **common), Conv((2, 2, 512), init=gauss, strides=tiny, **common), Conv((2, 2, 128), init=gauss, strides=tiny, **common), DeepBiRNN(64, init=glorot, reset_cells=True, depth=3, **common), RecurrentMean(), Affine(nout=2, init=gauss, activation=Softmax()) ], 2: [ Conv((3, 5, 64), init=gauss, strides=big, **common), Pooling(2, strides=2), Dropout(0.8), Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Dropout(0.4), Conv((3, 3, 256), init=gauss, strides=small, **common), Dropout(0.2), Conv((2, 2, 512), init=gauss, strides=tiny, **common), Conv((2, 2, 128), init=gauss, strides=tiny, **common), DeepBiRNN(64, init=glorot, reset_cells=True, depth=5, **common), RecurrentMean(), Affine(nout=2, init=gauss, activation=Softmax()) ], 3: [ Conv((3, 5, 64), init=gauss, strides=big, **common), Pooling(2, strides=2), Dropout(0.8),
layers = [Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=1.0), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=1.0), Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax())] if args.direct: assert args.model_file is not None model = Model(args.model_file) args.model_file = None else: model = Model(layers=layers) # drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45 weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.)) opt_gdm = GradientDescentMomentum(0.01 / 10, 0.9, wdecay=0.0005, schedule=weight_sched, stochastic_round=args.rounding) opt_biases = GradientDescentMomentum(0.02 / 10, 0.9, schedule=Schedule([44], 0.1),
layers = [] layers.append(DataTransform(transform=Normalizer(divisor=128.))) layers.append(Conv((11, 11, 96), init=init_uni, activation=relu, strides=4, padding=1)) layers.append(Conv((1, 1, 96), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 96), init=init_uni, activation=relu, strides=2, padding=1)) # 54->27 layers.append(Conv((5, 5, 256), init=init_uni, activation=relu, strides=1)) # 27->23 layers.append(Conv((1, 1, 256), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 256), init=init_uni, activation=relu, strides=2, padding=1)) # 23->12 layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 384), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=2, padding=1)) # 12->6 layers.append(Dropout(keep=0.5)) layers.append(Conv((3, 3, 1024), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 1024), init=init_uni, activation=relu, strides=1)) layers.append(Conv((1, 1, 1000), init=init_uni, activation=relu, strides=1)) layers.append(Pooling(6, op='avg')) layers.append(Activation(Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file)
minibatch_size=args.batch_size, subset_fraction=1) test_set = DataLoader(config, be) test_set = TypeCast(test_set, index=0, dtype=np.float32) # cast image to float relu = Rectlin() init_uni = GlorotUniform() convp1 = dict(init=init_uni, batch_norm=True, activation=relu, padding=1) layers = [ Conv((5, 5, 24), **convp1), Pooling(2, op='max'), Conv((3, 3, 32), **convp1), Pooling(2, op='max'), Conv((3, 3, 48), **convp1), Dropout(keep=.6), Pooling(2, op='max'), Affine(nout=64, init=init_uni, activation=relu), Dropout(keep=.4), Affine(nout=2, init=init_uni, activation=Softmax()) ] lunaModel = Model(layers) lunaModel.load_params('LUNA16_simpleCNN2_model.prm') # neon_logger.display('Calculating metrics on the test set. This could take a while...') # neon_logger.display('Misclassification error (test) = {:.2f}%'.format(lunaModel.eval(test_set, metric=Misclassification())[0] * 100)) # neon_logger.display('Precision/recall (test) = {}'.format(lunaModel.eval(test_set, metric=PrecisionRecall(num_classes=2)))) # neon_logger.display('Misclassification (test) = {}'.format(lunaModel.eval(test_set, metric=Misclassification()))) # neon_logger.display('Accuracy (test) = {}'.format(lunaModel.eval(test_set, metric=Accuracy())))