def build_model(inputs, mask, units, depth, n_labels, feat_dim, init_lr, direction, dropout, init_filters, optim, lstm=False, vgg=False): filters=init_filters outputs = Masking(mask_value=0.0)(inputs) if vgg is False: outputs = vgg2l.VGG2L(inputs, init_filters, feat_dim) else: outputs = vgg1l.VGG(inputs, init_filters, feat_dim) outputs = network.network(outputs,units, depth, n_labels, direction, dropout, lstm) outputs = TimeDistributed(Dense(n_labels+1))(outputs) outputs = Activation('softmax')(outputs) model=Model([inputs, mask], outputs) # we can get accuracy from data along with batch/temporal axes. if optim == 'adam': model.compile(keras.optimizers.Adam(lr=init_lr, clipnorm=50.), loss=['categorical_crossentropy'], metrics=['categorical_accuracy']) else: model.compile(keras.optimizers.Adadelta(), loss=['categorical_crossentropy'], metrics=['categorical_accuracy']) return model
def build_model(inputs, mask, units, depth, n_labels, feat_dim, dropout, init_filters, lstm=False, vgg=False): outputs = Masking(mask_value=0.0)(inputs) if vgg is False: outputs = vgg2l.VGG2L(outputs, init_filters, feat_dim) else: outputs = vgg1l.VGG(outputs, init_filters, feat_dim) outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([outputs, mask]) outputs = Masking(mask_value=0.0)(outputs) outputs = network.lc_network(outputs, units, depth, n_labels, dropout, init_filters, lstm) outputs = TimeDistributed(Dense(n_labels + 1))(outputs) outputs = Activation('softmax')(outputs) model = Model([inputs, mask], outputs) return model
def build_model(inputs, mask, units, depth, n_labels, feat_dim, init_lr, dropout, init_filters, optim, lstm=False, vgg=False): masked = Masking(mask_value=0.0)(inputs) if vgg is False: outputs = vgg2l.VGG2L(masked, init_filters, feat_dim) else: outputs = vgg1l.VGG(masked, init_filters, feat_dim) outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([outputs, mask]) outputs = Masking(mask_value=0.0)(outputs) outputs1 = network.lc_network(outputs, units, depth, n_labels, dropout, init_filters, lstm) depth = 2 init_filters = 16 # 1/2 outputs2 = dilation.VGG2L_Strides(masked, init_filters, feat_dim) outputs2 = network.lc_network(outputs2, units, depth, n_labels, dropout, init_filters, lstm) outputs2 = dilation.VGG2L_Transpose(outputs2, init_filters, units * 2) # 1/4 outputs3 = dilation.VGG2L_QuadStrides( masked, init_filters, feat_dim) # output = time/2, feat_dim*filters*2 outputs3 = network.lc_network(outputs3, units, depth, n_labels, dropout, init_filters, lstm) # output = time/2, units*2 outputs3 = dilation.VGG2L_QuadTranspose(outputs3, init_filters, units * 2) #output = time, units*2 #outputs = Add()([outputs, outputs2, outputs3]) outputs = Add()([outputs1, outputs2, outputs3]) outputs = TimeDistributed(Dense(n_labels + 1))(outputs) outputs = Activation('softmax')(outputs) model = Model([inputs, mask], outputs) if optim == 'adam': model.compile(keras.optimizers.Adam(lr=init_lr), loss=['categorical_crossentropy'], metrics=['categorical_accuracy']) else: model.compile(keras.optimizers.Adadelta(lr=init_lr), loss=['categorical_crossentropy'], metrics=['categorical_accuracy']) return model
def build_model(inputs, mask, units, depth, n_labels, feat_dim, init_lr, dropout, init_filters, optim, lstm=False, vgg=False): outputs = Masking(mask_value=0.0)(inputs) if vgg is False: outputs = vgg2l.VGG2L(outputs, init_filters, feat_dim) else: outputs = vgg1l.VGG(outputs, init_filters, feat_dim) for n in range (depth): # forward, keep current states # statefule if lstm is False: x=GRU(units, kernel_initializer='glorot_uniform', return_sequences=True, stateful=True, dropout=dropout, unroll=False)(outputs) else: x=LSTM(units, kernel_initializer='glorot_uniform', return_sequences=True, unit_forget_bias=True, stateful=True, dropout=dropout, unroll=False)(outputs) # backward, not keep current states # do not preserve state values for backward pass if lstm is False: y=GRU(units, kernel_initializer='glorot_uniform', return_sequences=True, stateful=False, unroll=False, dropout=dropout, go_backwards=True)(outputs) else: y=LSTM(units, kernel_initializer='glorot_uniform', return_sequences=True, unit_forget_bias=True, stateful=False, unroll=False, dropout=dropout, go_backwards=True)(outputs) outputs = Concatenate(axis=-1)([x,y]) outputs=layer_normalization.LayerNormalization()(outputs) outputs = TimeDistributed(Dense(n_labels+1))(outputs) outputs = Activation('softmax')(outputs) #outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([outputs, mask]) model = Model([inputs, mask], outputs) if optim == 'adam': model.compile(keras.optimizers.Adam(lr=init_lr, clipnorm=50.), loss=['categorical_crossentropy'], metrics=['categorical_accuracy']) else: model.compile(keras.optimizers.Adadelta(lr=init_lr, clipnorm=50.), loss=['categorical_crossentropy'], metrics=['categorical_accuracy']) return model
def build_model(file, inputs, mask, units, depth, n_labels, feat_dim, init_lr, dropout, init_filters, optim, proc_frames, lstm=False, vgg=False): outputs = Masking(mask_value=0.0)(inputs) if vgg is False: outputs = vgg2l.VGG2L(outputs, init_filters, feat_dim) else: outputs = vgg1l.VGG(outputs, init_filters, feat_dim) vgg_model = Model(inputs, outputs) vgg_model.trainable = False inputs = vgg_model.outputs outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([inputs, mask]) rnn_inputs = Masking(mask_value=0.0)(outputs) rnn_model, rnn_trunc_model = network.lc_network(rnn_inputs, units, depth, n_labels, dropout, init_filters, proc_frames, lstm) outputs = rnn_model.outputs outputs = TimeDistributed(Dense(n_labels + 1))(outputs) outputs = Activation('softmax')(outputs) model = Model(inputs=[vgg_model.inputs, mask], outputs) valid_model = Model(inputs=[vgg_model.inputs, mask], rnn_trunc_model.outputs) if optim == 'adam': model.compile(keras.optimizers.Adam(lr=init_lr), loss=['categorical_crossentropy'], metrics=['categorical_accuracy']) else: model.compile(keras.optimizers.Adadelta(lr=init_lr), loss=['categorical_crossentropy'], metrics=['categorical_accuracy']) return model, valid_model
def build_model(inputs, units, depth, n_labels, feat_dim, init_lr, dropout, init_filters, optim): outputs = vgg2l.VGG2L(inputs, init_filters, feat_dim) outputs = lstm(outputs, units, depth, n_labels, dropout) outputs = TimeDistributed(Dense(n_labels + 1))(outputs) outputs = Activation('softmax')(outputs) if optim is not None: model = CTCModel.CTCModel([inputs], [outputs], greedy=True) if optim == 'adam': model.compile(keras.optimizers.Adam(lr=init_lr)) elif optim == 'sgd': model.compile(keras.optimizers.SGD(lr=init_lr, momentum=0.9)) else: model.compile(keras.optimizers.Adadelta(lr=init_lr)) else: model = Model(inputs, outputs) return model
def build_model(inputs, mask, units, depth, n_labels, feat_dim, init_lr, dropout, init_filters, optim, lstm=False, vgg=False): outputs = Masking(mask_value=0.0)(inputs) if vgg is False: outputs = vgg2l.VGG2L(outputs, init_filters, feat_dim) else: outputs = vgg1l.VGG(outputs, init_filters, feat_dim) outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([outputs, mask]) outputs = Masking(mask_value=0.0)(outputs) outputs = network.lc_network(outputs, units, depth, n_labels, dropout, init_filters, lstm) outputs = TimeDistributed(Dense(n_labels + 1))(outputs) outputs = Activation('softmax')(outputs) model = Model([inputs, mask], outputs) if optim == 'adam': model.compile(keras.optimizers.Adam(lr=init_lr), loss=[multi_utils.soft_loss], metrics=['categorical_accuracy']) #metrics=[multi_utils.soft_acc]) else: model.compile(keras.optimizers.Adadelta(lr=init_lr), loss=[multi_utils.soft_loss], metrics=['categorical_accuracy']) #metrics=[multi_utils.soft_acc]) return model
def build_model(inputs, units, depth, n_labels, feat_dim, init_lr, direction, dropout, init_filters, optim, lstm=False, vgg=False): if vgg is False: outputs = vgg2l.VGG2L(inputs, init_filters, feat_dim) else: outputs = vgg1l.VGG(inputs, init_filters, feat_dim) #outputs = dilation.VGG2L_Strides(inputs, init_filters, feat_dim) outputs = network.network(outputs,units, depth, n_labels, direction, dropout, lstm) #outputs = dilation.VGG2L_Transpose(outputs, init_filters, units*2) outputs = TimeDistributed(Dense(n_labels+1))(outputs) outputs = Activation('softmax')(outputs) model=CTCModel.CTCModel([inputs], [outputs], greedy=True) if optim == 'adam': model.compile(keras.optimizers.Adam(lr=init_lr)) elif optim == 'sgd': model.compile(keras.optimizers.SGD(lr=init_lr, momentum=0.9)) else: model.compile(keras.optimizers.Adadelta(lr=init_lr)) return model
def build_model(inputs, units, depth, n_labels, feat_dim, direction, init_filters, lstm=False, vgg=False): if vgg is False: outputs = vgg2l.VGG2L(inputs, init_filters, feat_dim) else: outputs = vgg1l.VGG(inputs, init_filters, feat_dim) outputs = network.network(outputs, units, depth, n_labels, direction, 0.0, lstm) outputs = TimeDistributed(Dense(n_labels + 1))(outputs) outputs = Activation('softmax')(outputs) model = Model(inputs, outputs) return model
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, required=True, help='training data') parser.add_argument('--eval', type=str, help='evaluation data') parser.add_argument('--feat-dim', default=40, type=int, help='feats dim') parser.add_argument('--n-labels', default=1024, type=int, required=True, help='number of output labels') parser.add_argument('--batch-size', default=1, type=int, help='mini-batch size') parser.add_argument('--snapshot', type=str, default='./', help='snapshot directory') parser.add_argument('--snapshot-prefix', type=str, default='snapshot', help='snapshot file prefix') parser.add_argument('--eval-output-prefix', type=str, default='eval_out') parser.add_argument('--units', type=int, default=16, help='number of LSTM cells') parser.add_argument('--lstm-depth', type=int, default=2, help='number of LSTM layers') parser.add_argument('--process-frames', type=int, default=10, help='process frames') parser.add_argument('--extra-frames1', type=int, default=10, help='1st extra frames') parser.add_argument('--extra-frames2', type=int, default=10, help='2nd extra frames') parser.add_argument('--num-extra-frames1', type=int, default=1, help='number of extra frames 1') parser.add_argument('--filters', type=int, default=16, help='number of filters') parser.add_argument('--lstm', action='store_true') parser.add_argument('--vgg', action='store_true') parser.add_argument('--prior', type=str, default=None, help='prior weights') parser.add_argument('--prior-scale', type=float, default=1.0, help='prior scaler') parser.add_argument('--weights', type=str, required=True, help='model weights') args = parser.parse_args() eval_in = Input(batch_shape=(args.batch_size, None, args.feat_dim)) eval_mask = Input(batch_shape=(args.batch_size, None, args.feat_dim * args.filters * 2)) outputs = Masking(mask_value=0.0)(inputs) if vgg is False: outputs = vgg2l.VGG2L(outputs, init_filters, feat_dim) else: outputs = vgg1l.VGG(outputs, init_filters, feat_dim) outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([outputs, mask]) outputs = Masking(mask_value=0.0)(outputs) outputs = network.lc_network(outputs, units, depth, n_labels, dropout, lstm) outputs = TimeDistributed(Dense(n_labels + 1))(outputs) outputs = Activation('softmax')(outputs) model = Model([inputs, mask], outputs) return model