def layers(self): return [ Conv((7, 7, 96), init=Gaussian(scale=0.0001), bias=Constant(0), activation=Rectlin(), padding=3, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((5, 5, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=2, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Pooling(3, strides=2, padding=1), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=self.noutputs, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def create_frcn_model(frcn_fine_tune=False): b1 = BranchNode(name="b1") imagenet_layers = add_vgg_layers() HW = (7, 7) frcn_layers = [ RoiPooling(layers=imagenet_layers, HW=HW, bprop_enabled=frcn_fine_tune), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), b1, Affine(nout=21, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) ] bb_layers = [ b1, Affine(nout=84, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) ] return Model(layers=Tree([frcn_layers, bb_layers]))
valid_set = DataIteratorSequence(time_series.test, seq_len, return_sequences=return_sequences) # define weights initialization init = GlorotUniform() # Uniform(low=-0.08, high=0.08) # define model: model is different for the 2 strategies (sequence target or not) if return_sequences is True: layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=False), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] else: layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] model = Model(layers=layers) cost = GeneralizedCost(MeanSquared()) optimizer = RMSProp(stochastic_round=args.rounding)
def build_model(dataset, frcn_rois_per_img, train_pre_nms_N=12000, train_post_nms_N=2000, test_pre_nms_N=6000, test_post_nms_N=300, inference=False): """ Returns the Faster-RCNN model. For inference, also returns a reference to the proposal layer. Faster-RCNN contains three modules: VGG, the Region Proposal Network (RPN), and the Classification Network (ROI-pooling + Fully Connected layers), organized as a tree. Tree has 4 branches: VGG -> b1 -> Conv (3x3) -> b2 -> Conv (1x1) -> CrossEntropyMulti (objectness label) b2 -> Conv (1x1) -> SmoothL1Loss (bounding box targets) b1 -> PropLayer -> ROI -> Affine -> Affine -> b3 -> Affine -> CrossEntropyMulti b3 -> Affine -> SmoothL1Loss When the model is constructed for inference, several elements are different: - The number of regions to keep before and after non-max suppression is (6000, 300) for training and (12000, 2000) for inference. - The out_shape of the proposalLayer of the network is equal to post_nms_N (number of rois to keep after performaing nms). This is configured by passing the inference flag to the proposalLayer constructor. Arguments: dataset (objectlocalization): Dataset object. frcn_rois_per_img (int): Number of ROIs per image considered by the classification network. inference (bool): Construct the model for inference. Default is False. Returns: model (Model): Faster-RCNN model. proposalLayer (proposalLayer): Reference to proposalLayer in the model. Returned only for inference=True. """ num_classes = dataset.num_classes # define the branch points b1 = BranchNode(name="conv_branch") b2 = BranchNode(name="rpn_branch") b3 = BranchNode(name="roi_branch") # define VGG VGG = util.add_vgg_layers() # define RPN rpn_init = dict(strides=1, init=Gaussian(scale=0.01), bias=Constant(0)) # these references are passed to the ProposalLayer. RPN_3x3 = Conv((3, 3, 512), activation=Rectlin(), padding=1, **rpn_init) RPN_1x1_obj = Conv((1, 1, 18), activation=PixelwiseSoftmax(c=2), padding=0, **rpn_init) RPN_1x1_bbox = Conv((1, 1, 36), activation=Identity(), padding=0, **rpn_init) # inference uses different network settings if not inference: pre_nms_N = train_pre_nms_N # default 12000 post_nms_N = train_post_nms_N # default 2000 else: pre_nms_N = test_pre_nms_N # default 6000 post_nms_N = test_post_nms_N # default 300 proposalLayer = ProposalLayer([RPN_1x1_obj, RPN_1x1_bbox], dataset, pre_nms_N=pre_nms_N, post_nms_N=post_nms_N, num_rois=frcn_rois_per_img, inference=inference) # define ROI classification network ROI = [ proposalLayer, RoiPooling(HW=(7, 7)), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5) ] ROI_category = Affine(nout=num_classes, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) ROI_bbox = Affine(nout=4 * num_classes, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) # build the model # the four branches of the tree mirror the branches listed above frcn_tree = Tree([ROI + [b3, ROI_category], [b3, ROI_bbox]]) model = Model(layers=Tree([ VGG + [b1, RPN_3x3, b2, RPN_1x1_obj], [b2, RPN_1x1_bbox], [b1] + [frcn_tree], ])) if inference: return (model, proposalLayer) else: return model
args = parser.parse_args() # load up the mnist data set dataset = MNIST(path=args.data_dir) train_set = dataset.train_iter valid_set = dataset.valid_iter # setup weight initialization function init = Uniform(-1, 1) # setup layers layers = [ BinaryAffine(nout=4096, init=init, batch_norm=True, activation=Sign()), BinaryAffine(nout=4096, init=init, batch_norm=True, activation=Sign()), BinaryAffine(nout=4096, init=init, batch_norm=True, activation=Sign()), BinaryAffine(nout=10, init=init, batch_norm=True, activation=Identity()) ] # setup cost function as Square Hinge Loss cost = GeneralizedCost(costfunc=SquareHingeLoss()) # setup optimizer LR_start = 1.65e-2 def ShiftAdaMax_with_Scale(LR=1): return ShiftAdaMax(learning_rate=LR_start * LR, schedule=ShiftSchedule(2, shift_size=1)) optimizer = MultiOptimizer({
def create_model(dis_model='dc', gen_model='dc', cost_type='wasserstein', noise_type='normal', im_size=64, n_chan=3, n_noise=100, n_gen_ftr=64, n_dis_ftr=64, depth=4, n_extra_layers=0, batch_norm=True, gen_squash=None, dis_squash=None, dis_iters=5, wgan_param_clamp=None, wgan_train_sched=False): """ Create a GAN model and associated GAN cost function for image generation Arguments: dis_model (str): Discriminator type, can be 'mlp' for a simple MLP or 'dc' for a DC-GAN style model. (defaults to 'dc') gen_model (str): Generator type, can be 'mlp' for a simple MLP or 'dc' for a DC-GAN style model. (defaults to 'dc') cost_type (str): Cost type, can be 'original', 'modified' following Goodfellow2014 or 'wasserstein' following Arjovsky2017 (defaults to 'wasserstein') noise_type (str): Noise distribution, can be 'uniform or' 'normal' (defaults to 'normal') im_size (int): Image size (defaults to 64) n_chan (int): Number of image channels (defaults to 3) n_noise (int): Number of noise dimensions (defaults to 100) n_gen_ftr (int): Number of generator feature maps (defaults to 64) n_dis_ftr (int): Number of discriminator feature maps (defaults to 64) depth (int): Depth of layers in case of MLP (defaults to 4) n_extra_layers (int): Number of extra conv layers in case of DC (defaults to 0) batch_norm (bool): Enable batch normalization (defaults to True) gen_squash (str or None): Squashing function at the end of generator (defaults to None) dis_squash (str or None): Squashing function at the end of discriminator (defaults to None) dis_iters (int): Number of critics for discriminator (defaults to 5) wgan_param_clamp (float or None): In case of WGAN weight clamp value, None for others wgan_train_sched (bool): Enable training schedule of number of critics (defaults to False) """ assert dis_model in ['mlp', 'dc'], \ "Unsupported model type for discriminator net, supported: 'mlp' and 'dc'" assert gen_model in ['mlp', 'dc'], \ "Unsupported model type for generator net, supported: 'mlp' and 'dc'" assert cost_type in ['original', 'modified', 'wasserstein'], \ "Unsupported GAN cost function type, supported: 'original', 'modified' and 'wasserstein'" # types of final squashing functions squash_func = dict(nosquash=Identity(), sym=Tanh(), asym=Logistic()) if cost_type == 'wasserstein': if gen_model == 'mlp': gen_squash = gen_squash or 'nosquash' elif gen_model == 'dc': gen_squash = gen_squash or 'sym' dis_squash = dis_squash or 'nosquash' else: # for all GAN costs other than Wasserstein gen_squash = gen_squash or 'sym' dis_squash = dis_squash or 'asym' assert gen_squash in ['nosquash', 'sym', 'asym'], \ "Unsupported final squashing function for generator," \ " supported: 'nosquash', 'sym' and 'asym'" assert dis_squash in ['nosquash', 'sym', 'asym'], \ "Unsupported final squashing function for discriminator," \ " supported: 'nosquash', 'sym' and 'asym'" gfa = squash_func[gen_squash] dfa = squash_func[dis_squash] # create model layers if gen_model == 'mlp': gen = create_mlp_generator(im_size, n_chan, n_gen_ftr, depth, batch_norm=False, finact=gfa) noise_dim = (n_noise, ) elif gen_model == 'dc': gen = create_dc_generator(im_size, n_chan, n_noise, n_gen_ftr, n_extra_layers, batch_norm, finact=gfa) noise_dim = (n_noise, 1, 1) if dis_model == 'mlp': dis = create_mlp_discriminator(im_size, n_dis_ftr, depth, batch_norm=False, finact=dfa) elif dis_model == 'dc': dis = create_dc_discriminator(im_size, n_chan, n_dis_ftr, n_extra_layers, batch_norm, finact=dfa) layers = GenerativeAdversarial(generator=Sequential(gen, name="Generator"), discriminator=Sequential( dis, name="Discriminator")) return GAN(layers=layers, noise_dim=noise_dim, noise_type=noise_type, k=dis_iters, wgan_param_clamp=wgan_param_clamp, wgan_train_sched=wgan_train_sched), \ GeneralizedGANCost(costfunc=GANCost(func=cost_type))
# Pooling(3, strides=2), # Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), # Dropout(keep=0.5), # Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), # Dropout(keep=0.5), # Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] class_score = Affine(nout=21, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) bbox_pred = Affine(nout=84, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) frcn_layers = [ RoiPooling(layers=imagenet_layers, HW=(6, 6), bprop_enabled=frcn_fine_tune), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), b1, class_score ]
def test_identity_derivative(backend): inputs = np.array([0, 1, -2]).reshape((3, 1)) outputs = np.ones((1, 1)) compare_tensors(Identity(), inputs, outputs, deriv=True)
def test_identity(backend): inputs = np.array([0, 1, -2]).reshape((3, 1)) outputs = np.array([0, 1, -2]).reshape((3, 1)) compare_tensors(Identity(), inputs, outputs)
glorot = GlorotUniform() layers = [ Conv((nbands, filter_width, nfilters), init=gauss, bias=Constant(0), activation=Rectlin(), padding=dict(pad_h=0, pad_w=5), strides=dict(str_h=1, str_w=str_w)), DeepBiRNN(hidden_size, init=glorot, activation=Rectlinclip(), batch_norm=True, reset_cells=True, depth=depth), Affine(hidden_size, init=glorot, activation=Rectlinclip()), Affine(nout=nout, init=glorot, activation=Identity()) ] model = Model(layers=layers) opt = GradientDescentMomentumNesterov(learning_rate, momentum, gradient_clip_norm=gradient_clip_norm, stochastic_round=False) callbacks = Callbacks(model, eval_set=dev, **args.callback_args) # Print validation set word error rate at the end of every epoch pcb = WordErrorRateCallback(dev, argmax_decoder, max_tscrpt_len, epoch_freq=1) callbacks.add_callback(pcb) cost = GeneralizedCost(costfunc=CTC(max_tscrpt_len, nout=nout))
padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), # The following layers are used in Alexnet, but not being used for Fast-RCNN # Pooling(3, strides=2), # Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), # Dropout(keep=0.5), # Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), # Dropout(keep=0.5), # Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] class_score = Affine( nout=21, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) bbox_pred = Affine( nout=84, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) frcn_layers = [ RoiPooling(layers=imagenet_layers, HW=(6, 6), bprop_enabled=frcn_fine_tune), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), b1, class_score ] bb_layers = [ b1, bbox_pred,