def __init__(self, branch_units, activation=Rectlin(), bias_init=UniformInit(low=-0.08, high=0.08), filter_init=XavierInit()): (p1, p2, p3, p4) = branch_units self.branch_1 = Convolution((1, 1, p1[0]), activation=activation, bias_init=bias_init, filter_init=filter_init) self.branch_2 = [Convolution((1, 1, p2[0]), activation=activation, bias_init=bias_init, filter_init=filter_init), Convolution((3, 3, p2[1]), activation=activation, bias_init=bias_init, filter_init=filter_init, padding=1)] self.branch_3 = [Convolution((1, 1, p3[0]), activation=activation, bias_init=bias_init, filter_init=filter_init), Convolution((5, 5, p3[1]), activation=activation, bias_init=bias_init, filter_init=filter_init, padding=2)] self.branch_4 = [Pool2D(fshape=3, padding=1, strides=1, op="max"), Convolution((1, 1, p3[0]), activation=activation, bias_init=bias_init, filter_init=filter_init)]
def __init__(self, number_embeddings_features, tokens_in_embeddings, deep_parameters, deep_activation_fn, drop_out_rate=0.0): super(WideDeepClassifier, self).__init__(name="WideAndDeep") # Embeddings # Make the axes self.luts = [] for e in range(len(number_embeddings_features)): init_uniform = UniformInit(0, 1) # pad_idx have to be initialize to 0 explicitly. lut = LookupTable(tokens_in_embeddings[e], number_embeddings_features[e], init_uniform, pad_idx=0, update=True) self.luts.append(lut) # Model specification init_xavier = XavierInit() layers = [] for i in range(len(deep_parameters)): layers.append( Affine(nout=deep_parameters[i], weight_init=init_xavier, activation=deep_activation_fn)) if drop_out_rate > 0.0: layers.append(Dropout(keep=drop_out_rate)) layers.append(Affine(axes=tuple(), weight_init=init_xavier)) self.deep_layers = Sequential(layers) self.linear_layer = Affine(axes=tuple(), weight_init=init_xavier)
np.random.seed(args.rng_seed) # Create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification init_xav = XavierInit() seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Convolution((5, 5, 16), filter_init=init_xav, activation=Rectlin()), Pooling((2, 2), strides=2), Convolution((5, 5, 32), filter_init=init_xav, activation=Rectlin()), Pooling((2, 2), strides=2), Affine(nout=500, weight_init=init_xav, activation=Rectlin()), Affine(axes=ax.Y, weight_init=init_xav, activation=Softmax()) ]) optimizer = GradientDescentMomentum(0.01, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary(train_prob, ng.one_hot(inputs['label'], axis=ax.Y))
outputs = [ branch_1_output, branch_2_output, branch_3_output, branch_4_output ] # This does the equivalent of neon's merge-broadcast return ng.concat_along_axis(outputs, branch_1_output.axes.channel_axis()) seq1 = Sequential([ Convolution((7, 7, 64), padding=3, strides=2, activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit()), Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'), Convolution((1, 1, 64), activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit()), Convolution((3, 3, 192), activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit(), padding=1), Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'), Inception([(64, ), (96, 128), (16, 32), (32, )]), Inception([(128, ), (128, 192), (32, 96), (64, )]), Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'), Inception([(192, ), (96, 208), (16, 48), (64, )]),
branch_1_output = self.branch_1(in_obj) branch_2_output = self.branch_2[0](in_obj) branch_2_output = self.branch_2[1](branch_2_output) branch_3_output = self.branch_3[0](in_obj) branch_3_output = self.branch_3[1](branch_3_output) branch_4_output = self.branch_4[0](in_obj) branch_4_output = self.branch_4[1](branch_4_output) outputs = [branch_1_output, branch_2_output, branch_3_output, branch_4_output] # This does the equivalent of neon's merge-broadcast return ng.concat_along_axis(outputs, branch_1_output.axes.channel_axis()) seq1 = Sequential([Convolution((7, 7, 64), padding=3, strides=2, activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit()), Pool2D(fshape=3, padding=1, strides=2, op='max'), Convolution((1, 1, 64), activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit()), Convolution((3, 3, 192), activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit(), padding=1), Pool2D(fshape=3, padding=1, strides=2, op='max'), Inception([(64,), (96, 128), (16, 32), (32,)]), Inception([(128,), (128, 192), (32, 96), (64,)]), Pool2D(fshape=3, padding=1, strides=2, op='max'), Inception([(192,), (96, 208), (16, 48), (64,)]), Inception([(160,), (112, 224), (24, 64), (64,)]), Inception([(128,), (128, 256), (24, 64), (64,)]), Inception([(112,), (144, 288), (32, 64), (64,)]), Inception([(256,), (160, 320), (32, 128), (128,)]),