Exemplo n.º 1
0
    Inception([(64, ), (96, 128), (16, 32), (32, )]),
    Inception([(128, ), (128, 192), (32, 96), (64, )]),
    Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'),
    Inception([(192, ), (96, 208), (16, 48), (64, )]),
    Inception([(160, ), (112, 224), (24, 64), (64, )]),
    Inception([(128, ), (128, 256), (24, 64), (64, )]),
    Inception([(112, ), (144, 288), (32, 64), (64, )]),
    Inception([(256, ), (160, 320), (32, 128), (128, )]),
    Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'),
    Inception([(256, ), (160, 320), (32, 128), (128, )]),
    Inception([(384, ), (192, 384), (48, 128), (128, )]),
    Pooling(pool_shape=(7, 7), strides=1, pool_type="avg"),
    Affine(axes=ax.Y,
           weight_init=XavierInit(),
           bias_init=bias_init,
           activation=Softmax())
])

lr_schedule = {
    'name': 'schedule',
    'base_lr': 0.01,
    'gamma': (1 / 250.)**(1 / 3.),
    'schedule': [22, 44, 65]
}

optimizer = GradientDescentMomentum(lr_schedule,
                                    0.0,
                                    wdecay=0.0005,
                                    iteration=inputs['iteration'])
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob,
Exemplo n.º 2
0
out_axis = ng.make_axis(length=len(shakes.vocab) + 1, name="out_feature_axis")
in_axes = ng.make_axes([batch_axis, time_axis])
out_axes = ng.make_axes([batch_axis, time_axis])

# Build placeholders for the created axes
inputs = {'X': ng.placeholder(in_axes), 'y': ng.placeholder(out_axes),
          'iteration': ng.placeholder(axes=())}

# Network Definition
if(use_embedding is False):
    seq1 = Sequential([Preprocess(functor=expand_onehot),
                       LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True,
                            activation=Logistic(), gate_activation=Tanh(),
                            return_sequence=True),
                       Affine(weight_init=init_uni, bias_init=init_uni,
                              activation=Softmax(), axes=out_axis)])
else:
    embedding_dim = 8
    seq1 = Sequential([LookupTable(len(shakes.vocab) + 1, embedding_dim, init_uni, update=True),
                       LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True,
                            activation=Logistic(), gate_activation=Tanh(),
                            return_sequence=True),
                       Affine(weight_init=init_uni, bias_init=init_uni,
                              activation=Softmax(), axes=out_axis)])

# Optimizer
# Initial learning rate is 0.01 (base_lr)
# At iteration (num_iterations // 75), lr is multiplied by gamma (new lr = .95 * .01)
# At iteration (num_iterations * 2 // 75), it is reduced by gamma again
# So on..
no_steps = 75
Exemplo n.º 3
0
ax.Y.length = 10

######################
# Model specification


def cifar_mean_subtract(x):
    bgr_mean = ng.persistent_tensor(
        axes=x.axes.find_by_name('C'),
        initial_value=np.array([104., 119., 127.]))
    return (x - bgr_mean) / 255.


seq1 = Sequential([Preprocess(functor=cifar_mean_subtract),
                   Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()),
                   Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())])

optimizer = GradientDescentMomentum(0.1, 0.9)
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['image'])
eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y))
eval_outputs = dict(results=inference_prob, cross_ent_loss=eval_loss)

# Now bind the computations we are interested in
with closing(ngt.make_transformer()) as transformer:
    train_computation = make_bound_computation(transformer, train_outputs, inputs)
Exemplo n.º 4
0
else:
    layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y))

if args.layer_type == "rnn":
    rlayer = Recurrent(hidden_size, init, activation=Tanh())
elif args.layer_type == "birnn":
    rlayer = BiRNN(hidden_size,
                   init,
                   activation=Tanh(),
                   return_sequence=True,
                   sum_out=True)

# model initialization
seq1 = Sequential([
    layer_0, rlayer,
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ))
])

optimizer = RMSProp()

train_prob = seq1(inputs['inp_txt'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
                                    usebits=True)
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['inp_txt'])
Exemplo n.º 5
0
    return (x - bgr_mean) / 255.


init_uni = UniformInit(-0.1, 0.1)

seq1 = Sequential([Preprocess(functor=cifar_mean_subtract),
                   Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin(),
                               batch_norm=args.use_batch_norm),
                   Pooling((2, 2), strides=2),
                   Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin(),
                               batch_norm=args.use_batch_norm),
                   Pooling((2, 2), strides=2),
                   Affine(nout=500, weight_init=init_uni, activation=Rectlin(),
                          batch_norm=args.use_batch_norm),
                   Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax())])

optimizer = GradientDescentMomentum(0.01, 0.9)
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['image'])
# errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label'])
eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y))
eval_outputs = dict(results=inference_prob, cross_ent_loss=eval_loss)


# Now bind the computations we are interested in
Exemplo n.º 6
0
 def __init__(self, net_type, resnet_size, bottleneck, num_resnet_mods, batch_norm=True):
     # For CIFAR10 dataset
     if (net_type in ('cifar10', 'cifar100')):
         # Number of Filters
         num_fils = [16, 32, 64]
         # Network Layers
         layers = [
             # Subtracting mean as suggested in paper
             Preprocess(functor=cifar10_mean_subtract),
             # First Conv with 3x3 and stride=1
             Convolution(**conv_params(3, 16, batch_norm=batch_norm))]
         first_resmod = True  # Indicates the first residual module
         # Loop 3 times for each filter.
         for fil in range(3):
             # Lay out n residual modules so that we have 2n layers.
             for resmods in range(num_resnet_mods):
                 if(resmods == 0):
                     if(first_resmod):
                         # Strides=1 and Convolution side path
                         main_path, side_path = self.get_mp_sp(num_fils[fil],
                                                               net_type, direct=False,
                                                               batch_norm=batch_norm)
                         layers.append(ResidualModule(main_path, side_path))
                         layers.append(Activation(Rectlin()))
                         first_resmod = False
                     else:
                         # Strides=2 and Convolution side path
                         main_path, side_path = self.get_mp_sp(num_fils[fil], net_type,
                                                               direct=False, strides=2,
                                                               batch_norm=batch_norm)
                         layers.append(ResidualModule(main_path, side_path))
                         layers.append(Activation(Rectlin()))
                 else:
                     # Strides=1 and direct connection
                     main_path, side_path = self.get_mp_sp(num_fils[fil], net_type,
                                                           batch_norm=batch_norm)
                     layers.append(ResidualModule(main_path, side_path))
                     layers.append(Activation(Rectlin()))
         # Do average pooling --> fully connected--> softmax.
         layers.append(Pooling((8, 8), pool_type='avg'))
         layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm))
         layers.append(Activation(Softmax()))
     # For I1K dataset
     elif (net_type in ('i1k', 'i1k100')):
         # Number of Filters
         num_fils = [64, 128, 256, 512]
         # Number of residual modules we need to instantiate at each level
         num_resnet_mods = num_i1k_resmods(resnet_size)
         # Network layers
         layers = [
             # Subtracting mean
             Preprocess(functor=i1k_mean_subtract),
             # First Conv layer
             Convolution((7, 7, 64), strides=2, padding=3,
                         batch_norm=batch_norm, activation=Rectlin(),
                         filter_init=KaimingInit()),
             # Max Pooling
             Pooling((3, 3), strides=2, pool_type='max', padding=1)]
         first_resmod = True  # Indicates the first residual module for which strides are 1
         # Loop 4 times for each filter
         for fil in range(4):
             # Lay out residual modules as in num_resnet_mods list
             for resmods in range(num_resnet_mods[fil]):
                 if(resmods == 0):
                     if(first_resmod):
                         # Strides=1 and Convolution Side path
                         main_path, side_path = self.get_mp_sp(num_fils[fil],
                                                               net_type,
                                                               direct=False,
                                                               bottleneck=bottleneck,
                                                               batch_norm=batch_norm)
                         layers.append(ResidualModule(main_path, side_path))
                         layers.append(Activation(Rectlin()))
                         first_resmod = False
                     else:
                         # Strides=2 and Convolution side path
                         main_path, side_path = self.get_mp_sp(num_fils[fil],
                                                               net_type,
                                                               direct=False,
                                                               bottleneck=bottleneck,
                                                               strides=2,
                                                               batch_norm=batch_norm)
                         layers.append(ResidualModule(main_path, side_path))
                         layers.append(Activation(Rectlin()))
                 else:
                     # Strides=1 and direct connection
                     main_path, side_path = self.get_mp_sp(num_fils[fil],
                                                           net_type,
                                                           bottleneck=bottleneck,
                                                           batch_norm=batch_norm)
                     layers.append(ResidualModule(main_path, side_path))
                     layers.append(Activation(Rectlin()))
         # Do average pooling --> fully connected--> softmax.
         layers.append(Pooling((7, 7), pool_type='avg'))
         layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(),
                              batch_norm=batch_norm))
         layers.append(Activation(Softmax()))
     else:
         raise NameError("Incorrect dataset. Should be --dataset cifar10 or --dataset i1k")
     super(BuildResnet, self).__init__(layers=layers)