Пример #1
0
    def __init__(self, inputs, dataset, stage_depth,
                 batch_norm=False, activation=False, preprocess=False):
        nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)]
        strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])]
        layers = []
        if preprocess and dataset == 'cifar10':
            layers = Preprocess(functor=cifar_mean_subtract)
        layers.append(Convolution(**conv_params(3, 16, batch_norm=batch_norm)))
        layers.append(f_module(nfms[0], first=True, batch_norm=batch_norm))

        for nfm, stride in zip(nfms[1:], strides):
            layers.append(f_module(nfm, strides=stride, batch_norm=batch_norm))

        if batch_norm:
            layers.append(BatchNorm())
        if activation:
            layers.append(Activation(Rectlin()))
        layers.append(Pool2D(8, strides=2, op='avg'))
        if dataset == 'cifar10':
            ax.Y.length = 10
            layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(),
                                 batch_norm=batch_norm, activation=Softmax()))
        elif dataset == 'i1k':
            ax.Y.length = 1000
            layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(),
                                 batch_norm=batch_norm, activation=Softmax()))
        else:
            raise ValueError("Incorrect dataset provided")
        super(mini_residual_network, self).__init__(layers=layers)
Пример #2
0
def create_network():
    '''
    Define 3D convolutional network
    '''

    # Define for weight initialization
    g1 = GaussianInit(mean=0., var=0.01)
    g5 = GaussianInit(mean=0., var=0.005)
    c0 = ConstantInit(val=0.)
    c1 = ConstantInit(val=1.)
    ax.Y.length = 101

    padding = {'D': 1, 'H': 1, 'W': 1, 'C': 0}
    strides = {'D': 2, 'H': 2, 'W': 2, 'C': 1}

    layers = [
        Convolution((3, 3, 3, 64),
                    padding=padding,
                    filter_init=g1,
                    bias_init=c0,
                    activation=Rectlin()),
        Pooling((1, 2, 2), strides={
            'D': 1,
            'H': 2,
            'W': 2,
            'C': 1
        }),
        Convolution((3, 3, 3, 128),
                    padding=padding,
                    filter_init=g1,
                    bias_init=c1,
                    activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Convolution((3, 3, 3, 256),
                    padding=padding,
                    filter_init=g1,
                    bias_init=c1,
                    activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Convolution((3, 3, 3, 256),
                    padding=padding,
                    filter_init=g1,
                    bias_init=c1,
                    activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Convolution((3, 3, 3, 256),
                    padding=padding,
                    filter_init=g1,
                    bias_init=c1,
                    activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Affine(nout=2048, weight_init=g5, bias_init=c1, activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=2048, weight_init=g5, bias_init=c1, activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(axes=ax.Y, weight_init=g1, bias_init=c0, activation=Softmax())
    ]

    return Sequential(layers)
Пример #3
0
def make_generator(out_axis):

    generator = [Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()),
                 Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()),
                 Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()),
                 Affine(axes=out_axis, weight_init=w_init, bias_init=b_init, activation=None)]

    return Sequential(generator, name="Generator")
Пример #4
0
def make_discriminator():

    discriminator = [Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()),
                     Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()),
                     Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()),
                     Affine(nout=1, weight_init=w_init, bias_init=b_init, activation=None)]

    return Sequential(discriminator, name="Discriminator")
Пример #5
0
    def __init__(self,
                 nfilters,
                 filter_width,
                 str_w,
                 nbands,
                 depth,
                 hidden_size,
                 batch_norm=False,
                 batch_norm_affine=False,
                 batch_norm_conv=False,
                 to_ctc=True):

        self.to_ctc = to_ctc

        # Initializers
        gauss = GaussianInit(0.01)
        glorot = GlorotInit()

        # 1D Convolution layer
        padding = dict(pad_h=0, pad_w=filter_width // 2, pad_d=0)
        strides = dict(str_h=1, str_w=str_w, str_d=1)
        dilation = dict(dil_d=1, dil_h=1, dil_w=1)

        conv_layer = Convolution((nbands, filter_width, nfilters),
                                 gauss,
                                 bias_init=ConstantInit(0),
                                 padding=padding,
                                 strides=strides,
                                 dilation=dilation,
                                 activation=Rectlin(),
                                 batch_norm=batch_norm_conv)

        # Add BiRNN layers
        deep_birnn = DeepBiRNN(depth,
                               hidden_size,
                               glorot,
                               Rectlinclip(),
                               batch_norm=batch_norm)

        # Add a single affine layer
        fc = Affine(nout=hidden_size,
                    weight_init=glorot,
                    activation=Rectlinclip(),
                    batch_norm=batch_norm_affine)

        # Add the final affine layer
        # Softmax output is computed within the CTC cost function, so no activation is needed here.
        if self.to_ctc is False:
            activation = Softmax()
        else:
            activation = None
        final = Affine(axes=ax.Y, weight_init=glorot, activation=activation)

        layers = [conv_layer, deep_birnn, fc, final]

        super(Deepspeech, self).__init__(layers=layers)
Пример #6
0
def define_recurrent_layers(out_axes=None,
                            celltype='RNN',
                            recurrent_units=[32],
                            init=GlorotInit(),
                            return_sequence=True):
    layers = []
    for e, i in enumerate(recurrent_units):
        layer_return_sequence = e < len(recurrent_units) - 1 or return_sequence
        if celltype == 'RNN':
            layers.append(
                Recurrent(nout=i,
                          init=init,
                          backward=False,
                          activation=Tanh(),
                          return_sequence=layer_return_sequence))
        elif celltype == 'LSTM':
            layers.append(
                LSTM(nout=i,
                     init=init,
                     backward=False,
                     activation=Tanh(),
                     gate_activation=Logistic(),
                     return_sequence=layer_return_sequence))
    if out_axes is not None:
        affine_layer = Affine(weight_init=init,
                              bias_init=init,
                              activation=Identity(),
                              axes=out_axes)
        layers.append(affine_layer)
    return layers
Пример #7
0
    def __init__(self,
                 inputs,
                 stage_depth,
                 batch_norm=True,
                 activation=True,
                 preprocess=True):
        nfms = [
            2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)
        ]
        strides = [
            1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])
        ]
        layers = []
        if preprocess:
            layers = Preprocess(functor=cifar_mean_subtract)
        parallel_axis = inputs['image'].axes.batch_axes()
        with ng.metadata(device_id=('1', '2'), parallel=parallel_axis[0]):
            layers.append(
                Convolution(**conv_params(3, 16, batch_norm=batch_norm)))
            layers.append(f_module(nfms[0], first=True))

            for nfm, stride in zip(nfms[1:], strides):
                layers.append(f_module(nfm, strides=stride))

        if batch_norm:
            layers.append(BatchNorm())
        if activation:
            layers.append(Activation(Rectlin()))
        layers.append(Pool2D(8, strides=2, op='avg'))
        layers.append(
            Affine(axes=ax.Y,
                   weight_init=KaimingInit(),
                   batch_norm=batch_norm,
                   activation=Softmax()))
        self.layers = layers
Пример #8
0
    def __init__(self,
                 number_embeddings_features,
                 tokens_in_embeddings,
                 deep_parameters,
                 deep_activation_fn,
                 drop_out_rate=0.0):

        super(WideDeepClassifier, self).__init__(name="WideAndDeep")

        # Embeddings
        # Make the axes
        self.luts = []

        for e in range(len(number_embeddings_features)):
            init_uniform = UniformInit(0, 1)

            # pad_idx have to be initialize to 0 explicitly.

            lut = LookupTable(tokens_in_embeddings[e],
                              number_embeddings_features[e],
                              init_uniform,
                              pad_idx=0,
                              update=True)

            self.luts.append(lut)

        # Model specification

        init_xavier = XavierInit()

        layers = []
        for i in range(len(deep_parameters)):
            layers.append(
                Affine(nout=deep_parameters[i],
                       weight_init=init_xavier,
                       activation=deep_activation_fn))
            if drop_out_rate > 0.0:
                layers.append(Dropout(keep=drop_out_rate))

        layers.append(Affine(axes=tuple(), weight_init=init_xavier))

        self.deep_layers = Sequential(layers)

        self.linear_layer = Affine(axes=tuple(), weight_init=init_xavier)
Пример #9
0
def make_layers(use_large, vocab_size):

    if use_large:
        init = GaussianInit(0., 0.02)
    else:
        init = GaussianInit(0., 0.05)

    layers = []
    layers.append(make_embedding_layer(vocab_size))
    layers.append(lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'}))

    kernel_sizes = [7, 7, 3, 3, 3, 3]
    pool_layer_idxs = [0, 1, 5]
    conv_nout = 1024 if use_large else 256
    fc_nout = 2048 if use_large else 1024
    for i in range(6):
        conv_layer = Convolution(
            **conv_params(kernel_sizes[i], conv_nout, init))
        layers.append(conv_layer)
        if i in pool_layer_idxs:
            pool_layer = Pooling(pool_shape=(3, ), strides=3)
            layers.append(pool_layer)
    layers.append(
        Affine(nout=fc_nout,
               weight_init=init,
               bias_init=ConstantInit(0.),
               activation=Rectlin()))
    layers.append(Dropout(keep=0.5))
    layers.append(
        Affine(nout=fc_nout,
               weight_init=init,
               bias_init=ConstantInit(0.),
               activation=Rectlin()))
    layers.append(Dropout(keep=0.5))
    layers.append(
        Affine(axes=(ax.Y, ),
               weight_init=init,
               bias_init=ConstantInit(0.),
               activation=Softmax()))

    return layers
Пример #10
0
def define_model(out_axis, filter_shapes=[5], n_filters=[32], init=KaimingInit()):
    assert len(filter_shapes) == len(n_filters)

    layers = []
    for e, (f, n) in enumerate(zip(filter_shapes, n_filters)):
        layers.append(Convolution(filter_shape=(f, n), filter_init=init, strides=1, padding="valid", dilation=1, activation=Rectlin(), batch_norm=True))

    affine_layer = Affine(weight_init=init, bias_init=init,
                          activation=Identity(), axes=out_axis)

    model = Sequential(layers + [affine_layer])

    return model
Пример #11
0
def make_discriminator(bn=True, disc_activation=None, bias_init=None):
    conv_layers = [
        Convolution((4, 4, 128),
                    filter_init,
                    strides=2,
                    padding=1,
                    activation=lrelu,
                    batch_norm=False,
                    bias_init=bias_init)
    ]

    conv_layers.append(
        Convolution((4, 4, 256),
                    filter_init,
                    strides=2,
                    padding=1,
                    activation=lrelu,
                    batch_norm=bn,
                    bias_init=bias_init))
    conv_layers.append(
        Convolution((4, 4, 512),
                    filter_init,
                    strides=2,
                    padding=1,
                    activation=lrelu,
                    batch_norm=bn,
                    bias_init=bias_init))
    conv_layers.append(
        Convolution((4, 4, 1024),
                    filter_init,
                    strides=2,
                    padding=1,
                    activation=lrelu,
                    batch_norm=bn,
                    bias_init=bias_init))
    conv_layers.append(
        Affine(weight_init=filter_init,
               activation=None,
               batch_norm=False,
               axes=ng.make_axes({
                   "C": 1,
                   "H": 1,
                   "W": 1
               })))
    return Sequential(conv_layers, name="Discriminator")
Пример #12
0
    def __init__(self, stage_depth):
        nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)]
        print(nfms)
        strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])]

        layers = [Preprocess(functor=cifar_mean_subtract),
                  Convolution(**conv_params(3, 16)),
                  f_module(nfms[0], first=True)]

        for nfm, stride in zip(nfms[1:], strides):
            layers.append(f_module(nfm, strides=stride))

        layers.append(BatchNorm())
        layers.append(Activation(Rectlin()))
        layers.append(Pooling((8, 8), pool_type='avg'))
        layers.append(Affine(axes=ax.Y,
                             weight_init=KaimingInit(),
                             activation=Softmax()))
        super(residual_network, self).__init__(layers=layers)
Пример #13
0
def make_generator(bn=True, bias_init=None):
    deconv_layers = [
        Affine(weight_init=filter_init,
               activation=None,
               batch_norm=False,
               axes=ng.make_axes({
                   "C": 1024,
                   "H": 4,
                   "W": 4
               })),
        Deconvolution((4, 4, 512),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=relu,
                      batch_norm=bn,
                      bias_init=bias_init),
        Deconvolution((4, 4, 256),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=relu,
                      batch_norm=bn,
                      bias_init=bias_init),
        Deconvolution((4, 4, 128),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=relu,
                      batch_norm=bn,
                      bias_init=bias_init)
    ]

    deconv_layers.append(
        Deconvolution((4, 4, 3),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=Tanh(),
                      batch_norm=False,
                      bias_init=bias_init))
    return Sequential(deconv_layers, name="Generator")
Пример #14
0
 def __init__(self, net_type, resnet_size, bottleneck, num_resnet_mods):
     # For CIFAR10 dataset
     if net_type == 'cifar10':
         # Number of Filters
         num_fils = [16, 32, 64]
         # Network Layers
         layers = [
             # Subtracting mean as suggested in paper
             Preprocess(functor=cifar10_mean_subtract),
             # First Conv with 3x3 and stride=1
             Convolution(**conv_params(3, 16))
         ]
         first_resmod = True  # Indicates the first residual module
         # Loop 3 times for each filter.
         for fil in range(3):
             # Lay out n residual modules so that we have 2n layers.
             for resmods in range(num_resnet_mods):
                 if (resmods == 0):
                     if (first_resmod):
                         # Strides=1 and Convolution side path
                         main_path, side_path = self.get_mp_sp(
                             num_fils[fil], net_type, direct=False)
                         layers.append(ResidualModule(main_path, side_path))
                         layers.append(Activation(Rectlin()))
                         first_resmod = False
                     else:
                         # Strides=2 and Convolution side path
                         main_path, side_path = self.get_mp_sp(
                             num_fils[fil],
                             net_type,
                             direct=False,
                             strides=2)
                         layers.append(ResidualModule(main_path, side_path))
                         layers.append(Activation(Rectlin()))
                 else:
                     # Strides=1 and direct connection
                     main_path, side_path = self.get_mp_sp(
                         num_fils[fil], net_type)
                     layers.append(ResidualModule(main_path, side_path))
                     layers.append(Activation(Rectlin()))
         # Do average pooling --> fully connected--> softmax.
         layers.append(Pooling([8, 8], pool_type='avg'))
         layers.append(
             Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=True))
         layers.append(Activation(Softmax()))
     # For I1K dataset
     elif net_type == "i1k":
         # Number of Filters
         num_fils = [64, 128, 256, 512]
         # Number of residual modules we need to instantiate at each level
         num_resnet_mods = num_i1k_resmods(resnet_size)
         # Network layers
         layers = [
             # Subtracting mean
             Preprocess(functor=i1k_mean_subtract),
             # First Conv layer
             Convolution((7, 7, 64),
                         strides=2,
                         padding=3,
                         batch_norm=True,
                         activation=Rectlin(),
                         filter_init=KaimingInit()),
             # Max Pooling
             Pooling([3, 3], strides=2, pool_type='max', padding=1)
         ]
         first_resmod = True  # Indicates the first residual module for which strides are 1
         # Loop 4 times for each filter
         for fil in range(4):
             # Lay out residual modules as in num_resnet_mods list
             for resmods in range(num_resnet_mods[fil]):
                 if (resmods == 0):
                     if (first_resmod):
                         # Strides=1 and Convolution Side path
                         main_path, side_path = self.get_mp_sp(
                             num_fils[fil],
                             net_type,
                             direct=False,
                             bottleneck=bottleneck)
                         layers.append(ResidualModule(main_path, side_path))
                         layers.append(Activation(Rectlin()))
                         first_resmod = False
                     else:
                         # Strides=2 and Convolution side path
                         main_path, side_path = self.get_mp_sp(
                             num_fils[fil],
                             net_type,
                             direct=False,
                             bottleneck=bottleneck,
                             strides=2)
                         layers.append(ResidualModule(main_path, side_path))
                         layers.append(Activation(Rectlin()))
                 else:
                     # Strides=1 and direct connection
                     main_path, side_path = self.get_mp_sp(
                         num_fils[fil], net_type, bottleneck=bottleneck)
                     layers.append(ResidualModule(main_path, side_path))
                     layers.append(Activation(Rectlin()))
         # Do average pooling --> fully connected--> softmax.
         layers.append(Pooling([7, 7], pool_type='avg'))
         layers.append(
             Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=True))
         layers.append(Activation(Softmax()))
     else:
         raise NameError(
             "Incorrect dataset. Should be --dataset cifar10 or --dataset i1k"
         )
     super(BuildResnet, self).__init__(layers=layers)
Пример #15
0
init_uni = UniformInit(-0.1, 0.1)

seq1 = Sequential([
    Preprocess(functor=cifar_mean_subtract),
    Convolution((5, 5, 16),
                filter_init=init_uni,
                activation=Rectlin(),
                batch_norm=args.use_batch_norm),
    Pool2D(2, strides=2),
    Convolution((5, 5, 32),
                filter_init=init_uni,
                activation=Rectlin(),
                batch_norm=args.use_batch_norm),
    Pool2D(2, strides=2),
    Affine(nout=500,
           weight_init=init_uni,
           activation=Rectlin(),
           batch_norm=args.use_batch_norm),
    Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax())
])

optimizer = GradientDescentMomentum(0.01, 0.9)
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['image'])
Пример #16
0
inputs = train_set.make_placeholders()
ax.Y.length = 10

######################
# Model specification


def cifar_mean_subtract(x):
    bgr_mean = ng.persistent_tensor(
        axes=x.axes.find_by_name('C'),
        initial_value=np.array([104., 119., 127.]))
    return (x - bgr_mean) / 255.


seq1 = Sequential([Preprocess(functor=cifar_mean_subtract),
                   Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()),
                   Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())])

optimizer = GradientDescentMomentum(0.1, 0.9)
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['image'])
errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label'])
eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y))
eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors)

# Now bind the computations we are interested in
Пример #17
0
    def __init__(self,
                 params_dict,
                 nout,
                 init,
                 init_h2h=None,
                 bias_init=None,
                 activation=None,
                 gate_activation=None,
                 batch_norm=False,
                 reset_cells=True,
                 **kwargs):
        super(MatchLSTMCell_withAttention, self).__init__(**kwargs)

        self.init = params_dict['init']
        max_question = params_dict['max_question']
        max_para = params_dict['max_para']
        hidden_size = nout

        # Axes
        # Axis for length of the hidden units
        self.hidden_rows = ng.make_axis(length=hidden_size, name='hidden_rows')
        # Axis for length of the hidden units
        self.F = ng.make_axis(length=hidden_size, name='F')
        # Axis for length of max question length
        self.hidden_cols_ques = ng.make_axis(length=max_question,
                                             name='hidden_cols_ques')
        # Axis with length of embedding sizes
        self.embed_axis = ng.make_axis(length=params_dict['embed_size'],
                                       name='embed_axis')
        # Recurrent axis for max question length
        self.REC = ng.make_axis(length=max_question, name='REC')
        # axis with size 1
        self.dummy_axis = ng.make_axis(length=1, name='dummy_axis')
        # Axis for batch size
        self.N = ng.make_axis(length=params_dict['batch_size'], name='N')
        # Axis for the output of match lstm cell
        self.lstm_feature = ng.make_axis(length=2 * hidden_size,
                                         name='lstm_feature')
        # Length of final classification layer (maximum length of the
        # paragraph)
        self.ax = params_dict['ax']
        self.ax.Y.length = max_para

        # Variables to be learnt during training (part of the attention network)
        # naming convention taken from teh paper
        self.W_p = ng.variable(axes=[self.hidden_rows, self.F],
                               initial_value=self.init)
        self.W_q = ng.variable(axes=[self.hidden_rows, self.F],
                               initial_value=self.init)
        self.W_r = ng.variable(axes=[self.hidden_rows, self.F],
                               initial_value=self.init)
        self.b_p = ng.variable(axes=self.hidden_rows, initial_value=self.init)
        self.w_lr = ng.variable(axes=[self.hidden_rows],
                                initial_value=self.init)

        # Constants for creating masks and initial hidden states
        self.e_q = ng.constant(axes=[self.dummy_axis, self.hidden_cols_ques],
                               const=np.ones([1, max_question]))
        self.e_q2 = ng.constant(axes=[self.F, self.dummy_axis], const=1)
        self.h_r_old = ng.constant(axes=[self.F, self.N], const=0)

        # Define variables for implementing the stacking operation. the default
        # stack op seems to be slow
        L1 = np.vstack(
            (np.eye(hidden_size), np.zeros([hidden_size, hidden_size])))
        L2 = np.vstack((np.zeros([hidden_size,
                                  hidden_size]), np.eye(hidden_size)))
        self.ZX = ng.constant(const=L1, axes=[self.lstm_feature, self.F])
        self.ZY = ng.constant(const=L2, axes=[self.lstm_feature, self.F])

        # LSTM Cell Initialization (Code from the standard LSTM Cell in ngraph)
        self.nout = nout
        self.init = init
        self.init_h2h = init_h2h if init_h2h is not None else init
        self.bias_init = bias_init
        self.activation = activation
        if gate_activation is not None:
            self.gate_activation = gate_activation
        else:
            self.gate_activation = self.activation
        self.batch_norm = batch_norm
        self.reset_cells = reset_cells
        self.i2h = {}
        self.h2h = {}
        self.gate_transform = {}
        self.gate_output = {}
        for gate in self._gate_names:
            self.h2h[gate] = Linear(nout=self.nout, init=self.init_h2h[gate])
            self.i2h[gate] = Affine(axes=self.h2h[gate].axes,
                                    weight_init=self.init[gate],
                                    bias_init=self.bias_init[gate],
                                    batch_norm=self.batch_norm)
            if gate is 'g':
                self.gate_transform[gate] = self.activation
            else:
                self.gate_transform[gate] = self.gate_activation
        self.out_axes = None
Пример #18
0
    def __init__(self,
                 params_dict,
                 nout,
                 init,
                 init_h2h=None,
                 bias_init=None,
                 activation=None,
                 gate_activation=None,
                 batch_norm=False,
                 reset_cells=True,
                 **kwargs):
        super(AnswerPointer_withAttention, self).__init__(**kwargs)

        self.init_axes = params_dict['init']
        max_question = params_dict['max_question']
        max_para = params_dict['max_para']
        hidden_size = nout

        # Axes
        # Axis for length of the hidden units
        self.hidden_rows = ng.make_axis(length=hidden_size, name='hidden_rows')
        # Axis for length of max_para
        self.hidden_cols_para = ng.make_axis(length=max_para,
                                             name='hidden_cols_para')
        # Axis for length of hidden unit size
        self.F = ng.make_axis(length=hidden_size, name='F')
        # Axis for length of max_question
        self.REC = ng.make_axis(length=max_question, name='REC')
        # Axis with length 1
        self.dummy_axis = ng.make_axis(length=1, name='dummy_axis')
        # Axis with length of batch_size
        self.N = ng.make_axis(length=params_dict['batch_size'], name='N')
        # Axis with twice the length of hidden sizes
        self.lstm_feature_new = ng.make_axis(length=2 * hidden_size,
                                             name='lstm_feature')
        self.ax = params_dict['ax']
        # Length of final classification layer (maximum length of the
        # paragraph)
        self.ax.Y.length = max_para

        # Variables
        self.V_answer = ng.variable(
            axes=[self.hidden_rows, self.lstm_feature_new],
            initial_value=self.init_axes)
        self.W_a = ng.variable(axes=[self.hidden_rows, self.F],
                               initial_value=self.init_axes)
        self.b_a = ng.variable(axes=self.hidden_rows,
                               initial_value=self.init_axes)
        self.e_q = ng.constant(axes=[self.dummy_axis, self.hidden_cols_para],
                               const=np.ones([1, max_para]))
        self.e_q2 = ng.constant(axes=[self.lstm_feature_new, self.dummy_axis],
                                const=1)
        self.v_lr = ng.variable(axes=[self.hidden_rows],
                                initial_value=self.init_axes)
        self.W_RNNx = ng.variable(axes=[self.hidden_rows, self.F],
                                  initial_value=self.init_axes)
        self.W_RNNh = ng.variable(axes=[self.hidden_rows, self.F],
                                  initial_value=self.init_axes)

        # LSTM Cell Initialization
        self.nout = nout
        self.init = init
        self.init_h2h = init_h2h if init_h2h is not None else init
        self.bias_init = bias_init
        self.activation = activation
        if gate_activation is not None:
            self.gate_activation = gate_activation
        else:
            self.gate_activation = self.activation
        self.batch_norm = batch_norm
        self.reset_cells = reset_cells
        self.i2h = {}
        self.h2h = {}
        self.gate_transform = {}
        self.gate_output = {}
        for gate in self._gate_names:
            self.h2h[gate] = Linear(nout=self.nout, init=self.init_h2h[gate])
            self.i2h[gate] = Affine(axes=self.h2h[gate].axes,
                                    weight_init=self.init[gate],
                                    bias_init=self.bias_init[gate],
                                    batch_norm=self.batch_norm)
            if gate is 'g':
                self.gate_transform[gate] = self.activation
            else:
                self.gate_transform[gate] = self.gate_activation
        self.out_axes = None
Пример #19
0
    bgr_mean = ng.persistent_tensor(axes=x.axes[0],
                                    initial_value=np.array([[104., 119.,
                                                             127.]]))
    y = ng.expand_dims((x - bgr_mean) / 255., ax.D, 1)
    return y


init_uni = UniformInit(-0.1, 0.1)

seq1 = Sequential([
    Preprocess(functor=cifar_mean_subtract),
    Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin()),
    Pool2D(2, strides=2),
    Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin()),
    Pool2D(2, strides=2),
    Affine(nout=500, weight_init=init_uni, activation=Rectlin()),
    Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax())
])

######################
# Input specification
ax.C.length, ax.H.length, ax.W.length = train_set.shapes['image']
ax.D.length = 1
ax.N.length = args.batch_size
ax.Y.length = 10

# placeholders with descriptive names
inputs = dict(image=ng.placeholder([ax.C, ax.H, ax.W, ax.N]),
              label=ng.placeholder([ax.N]))

optimizer = GradientDescentMomentum(0.01, 0.9)
Пример #20
0
init = UniformInit(low=-0.08, high=0.08)

# model initialization
one_hot_enc = Preprocess(functor=expand_onehot)
enc = Recurrent(hidden_size,
                init,
                activation=Tanh(),
                reset_cells=True,
                return_sequence=False)
one_hot_dec = Preprocess(functor=expand_onehot)
dec = Recurrent(hidden_size,
                init,
                activation=Tanh(),
                reset_cells=True,
                return_sequence=True)
linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y))

optimizer = RMSProp(decay_rate=0.95,
                    learning_rate=2e-3,
                    epsilon=1e-6,
                    gradient_clip_value=gradient_clip_value)

# build network graph
one_hot_enc_out = one_hot_enc.train_outputs(inputs['inp_txt'])
one_hot_dec_out = one_hot_dec.train_outputs(inputs['prev_tgt'])
enc_out = enc.train_outputs(one_hot_enc_out)
dec_out = dec.train_outputs(one_hot_dec_out, init_state=enc_out)
output_prob = linear.train_outputs(dec_out)

loss = ng.cross_entropy_multi(output_prob,
                              ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
time_axis = ng.make_axis(length=seq_len, name="REC")
feature_axis = ng.make_axis(length=n_features, name="F")
out_axis = ng.make_axis(length=n_features, name="Fo")

in_axes = ng.make_axes([batch_axis, time_axis, feature_axis])
out_axes = ng.make_axes([batch_axis, time_axis, out_axis])

# Build placeholders for the created axes
inputs = dict(X=ng.placeholder(in_axes),
              y=ng.placeholder(out_axes),
              iteration=ng.placeholder(axes=()))

# define model
if args.modeltype == "TCN":
    affine_layer = Affine(axes=out_axis,
                          weight_init=GaussianInit(0, 0.01),
                          activation=Logistic())
    model = Sequential(
        [lambda op: ng.map_roles(op, {
            'F': 'C',
            'REC': 'W'
        })] +
        tcn(n_features, hidden_sizes, kernel_size=kernel_size,
            dropout=dropout).layers +
        [lambda op: ng.map_roles(op, {
            'C': 'F',
            'W': 'REC'
        })] + [affine_layer])
elif args.modeltype == "LSTM":
    model = Sequential(
        recurrent_model.define_model(out_axis,
Пример #22
0
                                    time_steps=time_steps,
                                    total_iterations=args.num_iterations)

valid_set = SequentialArrayIterator(ptb_data['valid'],
                                    batch_size=args.batch_size,
                                    time_steps=time_steps)

# weight initialization
init = UniformInit(low=-0.08, high=0.08)

# model initialization
seq1 = Sequential([
    Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)),
    Recurrent(hidden_size, init, activation=Tanh()),
    Affine(weight_init=init,
           activation=Softmax(),
           bias_init=init,
           axes=(ax.Y, ax.REC))
])

# Bind axes lengths:
ax.Y.length = len(tree_bank_data.vocab)
ax.REC.length = time_steps
ax.N.length = args.batch_size

# placeholders with descriptive names
inputs = dict(inp_txt=ng.placeholder([ax.REC, ax.N]),
              tgt_txt=ng.placeholder([ax.REC, ax.N]))

optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6)
output_prob = seq1.train_outputs(inputs['inp_txt'])
loss = ng.cross_entropy_multi(output_prob,
Пример #23
0
    'iteration': ng.placeholder(axes=())
}

# Network Definition
if (use_embedding is False):
    seq1 = Sequential([
        Preprocess(functor=expand_onehot),
        LSTM(nout=recurrent_units,
             init=init_uni,
             backward=False,
             reset_cells=True,
             activation=Logistic(),
             gate_activation=Tanh(),
             return_sequence=True),
        Affine(weight_init=init_uni,
               bias_init=init_uni,
               activation=Softmax(),
               axes=out_axis)
    ])
else:
    embedding_dim = 8
    seq1 = Sequential([
        LookupTable(len(shakes.vocab) + 1,
                    embedding_dim,
                    init_uni,
                    update=True),
        LSTM(nout=recurrent_units,
             init=init_uni,
             backward=False,
             reset_cells=True,
             activation=Logistic(),
             gate_activation=Tanh(),
Пример #24
0
def affine_layer(h_dim, activation, name):
    return Affine(nout=h_dim,
                  activation=activation,
                  weight_init=GaussianInit(std=1.0),
                  bias_init=ConstantInit(val=0.0),
                  name=name)
Пример #25
0
                activation=Rectlin(),
                padding=1),
    Pool2D(2, strides=2),
    Convolution((3, 3, 512),
                filter_init=GaussianInit(var=0.01),
                bias_init=init,
                activation=Rectlin(),
                padding=1),
    Convolution((3, 3, 512),
                filter_init=GaussianInit(var=0.01),
                bias_init=init,
                activation=Rectlin(),
                padding=1),
    Pool2D(2, strides=2),
    Affine(nout=4096,
           weight_init=GaussianInit(var=0.01),
           bias_init=init,
           activation=Rectlin()),
    Affine(nout=4096,
           weight_init=GaussianInit(var=0.01),
           bias_init=init,
           activation=Rectlin()),
    Affine(axes=ax.Y,
           weight_init=GaussianInit(var=0.01),
           bias_init=init,
           activation=Softmax())
])

# Learning rate change based on schedule from learning_rate_policies.py
lr_schedule = {
    'name': 'schedule',
    'base_lr': 0.01,
Пример #26
0
previous_steps = [ng.constant(0., [batch_axis, feature_axis])] + [target_steps[i] for i in range(seq_len - 1)]
previous = ng.stack(previous_steps, time_axis)

# define model
encoder_recurrent_units = list(map(int, args.n_hidden.split(",")))
if args.bottleneck:
    decoder_recurrent_units = encoder_recurrent_units[::-1]
else:
    decoder_recurrent_units = encoder_recurrent_units
encoder = recurrent_model.RecurrentEncoder(celltype=args.modeltype,
                                           recurrent_units=encoder_recurrent_units,
                                           bottleneck=args.bottleneck)
decoder = recurrent_model.RecurrentDecoder(out_axes=(feature_axis,), celltype=args.modeltype,
                                           recurrent_units=decoder_recurrent_units)

affine_layer = Affine(weight_init=init_uni, bias_init=init_uni, activation=Identity(),
                      axes=[out_axis])

# Optimizer
optimizer = RMSProp(gradient_clip_value=args.grad_clip_value, learning_rate=args.lr)


def predictions(encoder, affine_layer, inputs):
    encoded = encoder(inputs, combine=True)
    preds = affine_layer(encoded)
    preds = ng.axes_with_order(preds, rul_axes)
    return preds


def build_seq2seq_computations():
    # Training loss, optimizer
    train_decoded = recurrent_model.encode_and_decode(encoder, decoder,
Пример #27
0
else:
    layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y))

if args.layer_type == "rnn":
    rlayer = Recurrent(hidden_size, init, activation=Tanh())
elif args.layer_type == "birnn":
    rlayer = BiRNN(hidden_size,
                   init,
                   activation=Tanh(),
                   return_sequence=True,
                   sum_out=True)

# model initialization
seq1 = Sequential([
    layer_0, rlayer,
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ))
])

optimizer = RMSProp()

train_prob = seq1(inputs['inp_txt'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
                                    usebits=True)
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['inp_txt'])
Пример #28
0
args = parser.parse_args()

np.random.seed(args.rng_seed)

# Create the dataloader
train_data, valid_data = MNIST(args.data_dir).load_data()
train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations)
valid_set = ArrayIterator(valid_data, args.batch_size)

inputs = train_set.make_placeholders()
ax.Y.length = 10

######################
# Model specification
seq1 = Sequential([Preprocess(functor=lambda x: x / 255.),
                   Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()),
                   Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic())])

optimizer = GradientDescentMomentum(0.1, 0.9)

output_prob = seq1.train_outputs(inputs['image'])
errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label'])
loss = ng.cross_entropy_binary(output_prob, ng.one_hot(inputs['label'], axis=ax.Y))
mean_cost = ng.mean(loss, out_axes=())
updates = optimizer(loss)

train_outputs = dict(batch_cost=mean_cost, updates=updates)
loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors)

# Now bind the computations we are interested in
transformer = ngt.make_transformer()
Пример #29
0
                padding=1),
    Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'),
    Inception([(64, ), (96, 128), (16, 32), (32, )]),
    Inception([(128, ), (128, 192), (32, 96), (64, )]),
    Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'),
    Inception([(192, ), (96, 208), (16, 48), (64, )]),
    Inception([(160, ), (112, 224), (24, 64), (64, )]),
    Inception([(128, ), (128, 256), (24, 64), (64, )]),
    Inception([(112, ), (144, 288), (32, 64), (64, )]),
    Inception([(256, ), (160, 320), (32, 128), (128, )]),
    Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'),
    Inception([(256, ), (160, 320), (32, 128), (128, )]),
    Inception([(384, ), (192, 384), (48, 128), (128, )]),
    Pooling(pool_shape=(7, 7), strides=1, pool_type="avg"),
    Affine(axes=ax.Y,
           weight_init=XavierInit(),
           bias_init=bias_init,
           activation=Softmax())
])

lr_schedule = {
    'name': 'schedule',
    'base_lr': 0.01,
    'gamma': (1 / 250.)**(1 / 3.),
    'schedule': [22, 44, 65]
}

optimizer = GradientDescentMomentum(lr_schedule,
                                    0.0,
                                    wdecay=0.0005,
                                    iteration=inputs['iteration'])
train_prob = seq1(inputs['image'])
Пример #30
0
inputs = {
    'X': ng.placeholder(in_axes),
    'y': ng.placeholder(out_axes),
    'iteration': ng.placeholder(axes=())
}

# Network Definition
seq1 = Sequential([
    LSTM(nout=recurrent_units,
         init=init_uni,
         backward=False,
         activation=Logistic(),
         gate_activation=Tanh(),
         return_sequence=predict_seq),
    Affine(weight_init=init_uni,
           bias_init=init_uni,
           activation=Identity(),
           axes=out_axis)
])

# Optimizer
# Following policy will set the initial learning rate to 0.05 (base_lr)
# At iteration (num_iterations // 5), learning rate is multiplied by gamma (new lr = .005)
# At iteration (num_iterations // 2), it will be reduced by gamma again (new lr = .0005)
schedule = [num_iterations // 5, num_iterations // 2]
learning_rate_policy = {
    'name': 'schedule',
    'schedule': schedule,
    'gamma': 0.1,
    'base_lr': 0.05
}
optimizer = Adam(learning_rate=learning_rate_policy,