def create_model(vocab_size, args): if args.rnn == 'GRU': RNN = recurrent.GRU elif args.rnn == 'LSTM': RNN = recurrent.LSTM else: assert False, "Invalid RNN" if args.bidirectional: model = Graph() model.add_input(name="input", batch_input_shape=(args.batch_size, 1), dtype="uint") model.add_node(Embedding(vocab_size, args.embed_size, mask_zero=True), name="embed", input='input') for i in xrange(args.layers): model.add_node( RNN(args.hidden_size, return_sequences=True), name='forward' + str(i + 1), input='embed' if i == 0 else 'dropout' + str(i) if args.dropout > 0 else None, inputs=['forward' + str(i), 'backward' + str(i)] if i > 0 and args.dropout == 0 else []) model.add_node( RNN(args.hidden_size, return_sequences=True, go_backwards=True), name='backward' + str(i + 1), input='embed' if i == 0 else 'dropout' + str(i) if args.dropout > 0 else None, inputs=['forward' + str(i), 'backward' + str(i)] if i > 0 and args.dropout == 0 else []) if args.dropout > 0: model.add_node( Dropout(args.dropout), name='dropout' + str(i + 1), inputs=['forward' + str(i + 1), 'backward' + str(i + 1)]) model.add_node( TimeDistributedDense(vocab_size, activation="softmax"), name="softmax", input='dropout' + str(args.layers) if args.dropout > 0 else None, inputs=[ 'forward' + str(args.layers), 'backward' + str(args.layers) ] if args.dropout == 0 else []) model.add_output(name='output', input="softmax") else: model = Sequential() model.add(Embedding(vocab_size, args.embed_size, mask_zero=True)) for i in xrange(args.layers): model.add(RNN(args.hidden_size, return_sequences=True)) if args.dropout > 0: model.add(Dropout(args.dropout)) model.add(TimeDistributedDense(vocab_size, activation="softmax")) return model
def create_model(vocab_size, args): if args.rnn == 'GRU': RNN = recurrent.GRU elif args.rnn == 'LSTM': RNN = recurrent.LSTM else: assert False, "Invalid RNN" if args.bidirectional: model = Graph() model.add_input(name="input", batch_input_shape=(args.batch_size,1), dtype="uint") model.add_node(Embedding(vocab_size, args.embed_size, mask_zero=True), name="embed", input='input') for i in xrange(args.layers): model.add_node(RNN(args.hidden_size, return_sequences=True), name='forward'+str(i+1), input='embed' if i == 0 else 'dropout'+str(i) if args.dropout > 0 else None, inputs=['forward'+str(i), 'backward'+str(i)] if i > 0 and args.dropout == 0 else []) model.add_node(RNN(args.hidden_size, return_sequences=True, go_backwards=True), name='backward'+str(i+1), input='embed' if i == 0 else 'dropout'+str(i) if args.dropout > 0 else None, inputs=['forward'+str(i), 'backward'+str(i)] if i > 0 and args.dropout == 0 else []) if args.dropout > 0: model.add_node(Dropout(args.dropout), name='dropout'+str(i+1), inputs=['forward'+str(i+1), 'backward'+str(i+1)]) model.add_node(TimeDistributedDense(vocab_size, activation="softmax"), name="softmax", input='dropout'+str(args.layers) if args.dropout > 0 else None, inputs=['forward'+str(args.layers), 'backward'+str(args.layers)] if args.dropout == 0 else []) model.add_output(name='output', input="softmax") else: model = Sequential() model.add(Embedding(vocab_size, args.embed_size, mask_zero=True)) for i in xrange(args.layers): model.add(RNN(args.hidden_size, return_sequences=True)) if args.dropout > 0: model.add(Dropout(args.dropout)) model.add(TimeDistributedDense(vocab_size, activation="softmax")) return model
ballnum=BALLNUM, sigma=SIGMA, batchsize=BATCHSIZE), name='gaussian', input='data96') """ graph.add_node(BallModel(batchsize=BATCHSIZE),name='ballxyz',input='denseall5') graph.add_node(ConvertToXY(batchsize=BATCHSIZE,height=HEIGHT,width=WIDTH,ballnum=BALLNUM),name='ballxy',input='ballxyz') graph.add_node(GaussianModel(height=HEIGHT,width=WIDTH,ballnum=BALLNUM,sigma=SIGMA,batchsize=BATCHSIZE),name='gaussian',input='ballxy') """ #main.add_output(name='heatmap',input='denseall2') main.add_output(name='final', input='gaussian') #graph.add_output(name='gaussianmap',input='gaussian') ##To 26 Degree """ ##48*3 ball=Sequential() ball.add(BallModel(prenet=graph,batchsize=BATCHSIZE)) ##48*2 ( (xk/zk+0.5)*width (yk/zk+0.5)*height) xy=Sequential() xy.add(ConvertToXY(prenet=ball,batchsize=BATCHSIZE,height=HEIGHT,width=WIDTH,ballnum=BALLNUM)) ##Gaussian Sphere Model main=Sequential() main.add(GaussianModel(prenet=ball,height=HEIGHT,width=WIDTH,ballnum=BALLNUM,sigma=SIGMA,batchsize=BATCHSIZE)) """ sgd = SGD(lr=0.5, momentum=0.9, nesterov=True)
str(i) if args.dropout > 0 else None, inputs=['forward' + str(i), 'backward' + str(i)] if i > 0 and args.dropout == 0 else []) if args.dropout > 0: model.add_node( Dropout(args.dropout), name='dropout' + str(i + 1), inputs=['forward' + str(i + 1), 'backward' + str(i + 1)]) model.add_output( name='output', input='dropout' + str(args.layers) if args.dropout > 0 else None, inputs=['forward' + str(args.layers), 'backward' + str(args.layers)] if args.dropout == 0 else []) else: model = Sequential() model.add( Embedding(vocab_size, args.embed_size, mask_zero=not args.convolution)) if args.convolution: model.add( Convolution1D(nb_filter=args.conv_filters, filter_length=args.conv_filter_length, border_mode=args.conv_border_mode, activation=args.conv_activation, subsample_length=args.conv_subsample_length)) if args.pooling: model.add(MaxPooling1D(pool_length=args.pool_length)) for i in xrange(args.layers): model.add( RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True)) if args.dropout > 0: model.add(Dropout(args.dropout))
main.add_input(name='data96',ndim=4) main.add_node(GaussianModel(prenet=xy,height=HEIGHT,width=WIDTH,ballnum=BALLNUM,sigma=SIGMA,batchsize=BATCHSIZE),name='gaussian',input='data96') """ graph.add_node(BallModel(batchsize=BATCHSIZE),name='ballxyz',input='denseall5') graph.add_node(ConvertToXY(batchsize=BATCHSIZE,height=HEIGHT,width=WIDTH,ballnum=BALLNUM),name='ballxy',input='ballxyz') graph.add_node(GaussianModel(height=HEIGHT,width=WIDTH,ballnum=BALLNUM,sigma=SIGMA,batchsize=BATCHSIZE),name='gaussian',input='ballxy') """ #main.add_output(name='heatmap',input='denseall2') main.add_output(name='final',input='gaussian') #graph.add_output(name='gaussianmap',input='gaussian') ##To 26 Degree """ ##48*3 ball=Sequential() ball.add(BallModel(prenet=graph,batchsize=BATCHSIZE)) ##48*2 ( (xk/zk+0.5)*width (yk/zk+0.5)*height) xy=Sequential() xy.add(ConvertToXY(prenet=ball,batchsize=BATCHSIZE,height=HEIGHT,width=WIDTH,ballnum=BALLNUM)) ##Gaussian Sphere Model main=Sequential() main.add(GaussianModel(prenet=ball,height=HEIGHT,width=WIDTH,ballnum=BALLNUM,sigma=SIGMA,batchsize=BATCHSIZE)) """ sgd = SGD(lr=0.5, momentum=0.9, nesterov=True)
def fit_model(self, X, Y, use_attention, att_context, bidirectional): print >> sys.stderr, "Input shape:", X.shape, Y.shape early_stopping = EarlyStopping(patience=2) num_classes = len(self.label_ind) if bidirectional: tagger = Graph() tagger.add_input(name='input', input_shape=X.shape[1:]) if use_attention: tagger.add_node(TensorAttention(X.shape[1:], context=att_context), name='attention', input='input') lstm_input_node = 'attention' else: lstm_input_node = 'input' tagger.add_node(LSTM(X.shape[-1] / 2, return_sequences=True), name='forward', input=lstm_input_node) tagger.add_node(LSTM(X.shape[-1] / 2, return_sequences=True, go_backwards=True), name='backward', input=lstm_input_node) tagger.add_node(TimeDistributedDense(num_classes, activation='softmax'), name='softmax', inputs=['forward', 'backward'], merge_mode='concat', concat_axis=-1) tagger.add_output(name='output', input='softmax') tagger.summary() tagger.compile('adam', {'output': 'categorical_crossentropy'}) #tagger.fit({'input':X, 'output':Y}, validation_split=0.1, callbacks=[early_stopping], show_accuracy=True, nb_epoch=100, batch_size=10) tagger.fit({ 'input': X, 'output': Y }, validation_split=0.1, callbacks=[early_stopping], nb_epoch=100, batch_size=10) else: tagger = Sequential() word_proj_dim = 50 if use_attention: _, input_len, timesteps, input_dim = X.shape tagger.add( HigherOrderTimeDistributedDense(input_dim=input_dim, output_dim=word_proj_dim)) att_input_shape = (input_len, timesteps, word_proj_dim) print >> sys.stderr, "Attention input shape:", att_input_shape tagger.add(Dropout(0.5)) tagger.add( TensorAttention(att_input_shape, context=att_context)) else: _, input_len, input_dim = X.shape tagger.add( TimeDistributedDense(input_dim=input_dim, input_length=input_len, output_dim=word_proj_dim)) tagger.add( LSTM(input_dim=word_proj_dim, output_dim=word_proj_dim, input_length=input_len, return_sequences=True)) tagger.add(TimeDistributedDense(num_classes, activation='softmax')) tagger.summary() tagger.compile(loss='categorical_crossentropy', optimizer='adam') tagger.fit(X, Y, validation_split=0.1, callbacks=[early_stopping], show_accuracy=True, batch_size=10) return tagger
model.add_node(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True), name='forward'+str(i+1), input='embed' if i == 0 else 'dropout'+str(i) if args.dropout > 0 else None, inputs=['forward'+str(i), 'backward'+str(i)] if i > 0 and args.dropout == 0 else []) model.add_node(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True, go_backwards=True), name='backward'+str(i+1), input='embed' if i == 0 else 'dropout'+str(i) if args.dropout > 0 else None, inputs=['forward'+str(i), 'backward'+str(i)] if i > 0 and args.dropout == 0 else []) if args.dropout > 0: model.add_node(Dropout(args.dropout), name='dropout'+str(i+1), inputs=['forward'+str(i+1), 'backward'+str(i+1)]) model.add_output(name='output', input='dropout'+str(args.layers) if args.dropout > 0 else None, inputs=['forward'+str(args.layers), 'backward'+str(args.layers)] if args.dropout == 0 else []) else: model = Sequential() model.add(Embedding(vocab_size, args.embed_size, mask_zero=not args.convolution)) if args.convolution: model.add(Convolution1D(nb_filter=args.conv_filters, filter_length=args.conv_filter_length, border_mode=args.conv_border_mode, activation=args.conv_activation, subsample_length=args.conv_subsample_length)) if args.pooling: model.add(MaxPooling1D(pool_length=args.pool_length)) for i in xrange(args.layers): model.add(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True)) if args.dropout > 0: model.add(Dropout(args.dropout)) for i in xrange(args.dense_layers): if i + 1 == args.dense_layers: model.add(Dense(args.hidden_size, activation='linear'))
def create_model(vocab_size, args): assert args.batch_size % 3 == 0, "Batch size must be multiple of 3" if args.rnn == 'GRU': RNN = recurrent.GRU elif args.rnn == 'LSTM': RNN = recurrent.LSTM else: assert False, "Invalid RNN" if args.bidirectional: assert not args.convolution, "Convolutional layer is not supported with bidirectional RNN" assert not args.pooling, "Pooling layer is not supported with bidirectional RNN" assert args.dense_layers == 0, "Dense layers are not supported with bidirectional RNN" model = Graph() model.add_input(name="input", batch_input_shape=(args.batch_size,1), dtype="uint") model.add_node(Embedding(vocab_size, args.embed_size, mask_zero=True), name="embed", input='input') for i in xrange(args.layers): model.add_node(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True), name='forward'+str(i+1), input='embed' if i == 0 else 'dropout'+str(i) if args.dropout > 0 else None, inputs=['forward'+str(i), 'backward'+str(i)] if i > 0 and args.dropout == 0 else []) model.add_node(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True, go_backwards=True), name='backward'+str(i+1), input='embed' if i == 0 else 'dropout'+str(i) if args.dropout > 0 else None, inputs=['forward'+str(i), 'backward'+str(i)] if i > 0 and args.dropout == 0 else []) if args.dropout > 0: model.add_node(Dropout(args.dropout), name='dropout'+str(i+1), inputs=['forward'+str(i+1), 'backward'+str(i+1)]) model.add_output(name='output', input='dropout'+str(args.layers) if args.dropout > 0 else None, inputs=['forward'+str(args.layers), 'backward'+str(args.layers)] if args.dropout == 0 else []) assert args.dense_layers == 0, "Bidirectional model doesn't support dense layers yet" else: model = Sequential() model.add(Embedding(vocab_size, args.embed_size, mask_zero=not args.convolution)) if args.convolution: model.add(Convolution1D(nb_filter=args.conv_filters, filter_length=args.conv_filter_length, border_mode=args.conv_border_mode, activation=args.conv_activation, subsample_length=args.conv_subsample_length)) if args.pooling: model.add(MaxPooling1D(pool_length=args.pool_length)) for i in xrange(args.layers): model.add(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True)) if args.dropout > 0: model.add(Dropout(args.dropout)) for i in xrange(args.dense_layers): if i + 1 == args.dense_layers: model.add(Dense(args.hidden_size, activation='linear')) else: model.add(Dense(args.hidden_size, activation=args.dense_activation)) return model
input_name = dropout_name for i in range(num_dense): dense_name = 'dense' + str(i) dense = Dense(128, activation='relu', init='he_normal') model.add_node(dense, name=dense_name, input=input_name) input_name = dense_name model.add_node(Dense(1, activation='sigmoid', init='he_normal'), name='dense', input=input_name) model.add_output(name='output', input='dense') model.compile(loss={'output': 'binary_crossentropy'}, optimizer=optimizer) else: for i in range(num_conv): model.add(Convolution1D(64, 3, input_dim=40, activation='relu', init='he_normal')) model.add(MaxPooling1D()) for i in range(num_gru): return_sequences = i < num_gru - 1 go_backwards = sys.argv[1] != 'normal' if sys.argv[1] == 'lstm': model.add(LSTM(128, input_dim=40, activation='relu', inner_activation='sigmoid', init='he_normal', return_sequences=True)) model.add(GaussianDropout(0.4)) model.add(LSTM(128, input_dim=40, activation='relu', inner_activation='sigmoid', init='he_normal', go_backwards=go_backwards, return_sequences=return_sequences)) model.add(GaussianDropout(0.4)) else: if go_backwards: model.add(GRU(128, input_dim=40, activation='relu', inner_activation='sigmoid', init='he_normal', go_backwards=True, return_sequences=True)) model.add(GaussianDropout(0.4)) model.add(GRU(128, input_dim=40, activation='relu', inner_activation='sigmoid', init='he_normal', go_backwards=True, return_sequences=return_sequences))
def constructDNNModel(modelIndex): model = [] if modelIndex == 1: # CVPR'14 CNN model = Graph() model.add_input(name='input', input_shape=(channels, patchHeight, patchWidth)) model.add_node(Convolution2D(50, 7, 7, init=initialization, activation='linear', border_mode='valid', input_shape=(1, 32, 32)), name='conv1', input='input') model.add_node(MaxPooling2D(pool_size=(26, 26)), name='max_pool', input='conv1') model.add_node(Flatten(), name='flat_max', input='max_pool') model.add_node(layer=Lambda(min_pool_inp, output_shape=(50, 26, 26)), name='invert_val', input='conv1') model.add_node(MaxPooling2D(pool_size=(26, 26)), name='min_pool', input='invert_val') model.add_node(Flatten(), name='flat_min', input='min_pool') model.add_node(Dense(800, init=initialization, activation='relu'), name='dense1', inputs=['flat_max', 'flat_min'], merge_mode='concat') model.add_node(Dense(800, init=initialization, activation='relu'), name='dense2', input='dense1') model.add_node(Dropout(0.5), name='dropout2', input='dense2') model.add_node(Dense(1, activation='linear'), name='output', input='dropout2', create_output=True) # print model.get_config() print("Model params = " + str(model.count_params())) sgd = SGD(lr=learningRate, momentum=0.9, decay=1e-6, Nesterov=True) model.compile(loss={'output': 'mae'}, optimizer=sgd) print 'Finsihed compiling the model. No error in model construction' # print '......Starting training .........\n\n' elif modelIndex == 2: # train_imageQuality_regressMOS_loweKernels.py model = Sequential() model.add( Activation('linear', input_shape=(channels, patchHeight, patchWidth))) # 32 model.add( Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 30 model.add( Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 28 model.add( Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 26 model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1))) # 25 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add( Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 23 model.add( Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 21 model.add( Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 19 model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1))) # 18 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 16 model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 14 model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 12 model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1))) # 11 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 9 model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 7 model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 5 model.add(MaxPooling2D(pool_size=(2, 2))) # 2 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add(Flatten()) # model.add(Dropout(0.25)) model.add( Dense(800, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation="relu")) model.add(Dropout(0.5)) model.add( Dense(800, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation="relu")) model.add(Dropout(0.5)) model.add( Dense(nb_output, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation="linear")) print("Built the model") # ------------------------------------------------------------------------------------------------------------------------------------------------ # if doWeightLoadSaveTest: # pdb.set_trace() model.save_weights(weightSavePath + 'weightsLoadSaveTest.h5', overwrite=True) model.load_weights(weightSavePath + 'weightsLoadSaveTest.h5') print("Weight load/save test passed...") # model.load_weights('/media/AccessParag/Code/weights/bestWeightsAtEpoch_000.h5') # print("Weights at Epoch 0 loaded") # ------------------------------------------------------------------------------------------------------------------------------------------------ # sgd = SGD(lr=learningRate, decay=1e-6, momentum=0.9, nesterov=True) # adam = Adam(lr=learningRate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(loss=linear_correlation_loss, optimizer=sgd) print("Compilation Finished") elif modelIndex == 3: # train_imageQuality_regressMOS_loweKernels.py model = Sequential() model.add( Activation('linear', input_shape=(channels, patchHeight, patchWidth))) # 32 model.add( Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 30 model.add( Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 28 model.add( Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 26 model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1))) # 25 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 23 model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 21 model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 19 model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1))) # 18 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 16 model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 14 model.add( Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 12 model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1))) # 11 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add( Convolution2D(128, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 9 model.add( Convolution2D(128, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 7 model.add( Convolution2D(128, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation="relu")) # 5 model.add(MaxPooling2D(pool_size=(2, 2))) # 2 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add(Flatten()) # model.add(Dropout(0.25)) model.add( Dense(800, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation="relu")) model.add(Dropout(0.5)) model.add( Dense(800, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation="relu")) model.add(Dropout(0.5)) model.add( Dense(nb_output, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation="linear")) print("Built the model") # ------------------------------------------------------------------------------------------------------------------------------------------------ # if doWeightLoadSaveTest: # pdb.set_trace() model.save_weights(weightSavePath + 'weightsLoadSaveTest.h5', overwrite=True) model.load_weights(weightSavePath + 'weightsLoadSaveTest.h5') print("Weight load/save test passed...") # ------------------------------------------------------------------------------------------------------------------------------------------------ # sgd = SGD(lr=learningRate, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss=linear_correlation_loss, optimizer=sgd) print("Compilation Finished") return model
def run(model_name, dataset_name): """ run the baseline model :param model_name: name of baseline models, CNN or LSTM :param dataset_name: name of datasets, candidate values [bbc, digg, MySpace, rw, Twitter, YouTube] """ print "model: %s" % model_name print "process dataset %s..." % dataset_name data = cPickle.load(open('./pkl/%s.pkl' % dataset_name, 'rb')) records, glove_embeddings, vocab, word_to_df = data dim_emb = len(glove_embeddings[1]) # index of word starts from 1 # initial weights of embedding layer embeddings = np.zeros((len(vocab) + 1, dim_emb), dtype='float32') for w in vocab: wid = vocab[w] embeddings[wid, :] = glove_embeddings[wid] train_x, train_y, val_x, val_y, test_x, test_y, test_strength = [], [], [], [], [], [], [] max_len = 0 for r in records: text = r['text'] y = r['label'] wids = [vocab[w] for w in text.split(' ')] if len(wids) > max_len: max_len = len(wids) if r['type'] == 'train': train_x.append(wids) train_y.append(y) elif r['type'] == 'val': val_x.append(wids) val_y.append(y) elif r['type'] == 'test': strength = r['strength'] test_x.append(wids) test_y.append(y) test_strength.append(strength) train_x, val_x, test_x = ToArray(train=train_x, val=val_x, test=test_x) train_y, val_y, test_y = ToArray(train=train_y, val=val_y, test=test_y) _, _, test_strength = ToArray(train=[], val=[], test=test_strength) #print train_x.shape, val_x.shape, test_x.shape train_x, val_x, test_x = Padding(train=train_x, val=val_x, test=test_x, max_len=max_len) batch_size = 50 if model_name == 'CNN' else 32 if train_x.shape[0] % batch_size: n_extra = batch_size - train_x.shape[0] % batch_size x_extra = train_x[:n_extra, :] y_extra = train_y[:n_extra] train_x = np.append(train_x, x_extra, axis=0) train_y = np.append(train_y, y_extra, axis=0) np.random.seed(38438) # shuffle the training set train_set = np.random.permutation(zip(train_x, train_y)) train_x, train_y = [], [] for (x, y) in train_set: train_x.append(x) train_y.append(y) n_labels = 2 train_x = np.array(train_x) train_y = np.array(train_y) train_y = to_categorical(train_y) val_y = to_categorical(val_y) print "n_train: %s, n_val: %s, n_test: %s" % ( train_x.shape[0], val_x.shape[0], test_x.shape[0]) if model_name == 'CNN': model = Graph() model.add_input(name='input', input_shape=(max_len, ), dtype='int') model.add_node(Embedding(input_dim=len(vocab) + 1, output_dim=dim_emb, input_length=max_len, weights=[embeddings]), name="emb", input="input") filter_hs = [3, 4, 5] n_filter = 100 dropout_rate = 0.5 n_epoch = 20 for i in xrange(len(filter_hs)): win_size = filter_hs[i] conv_name = 'conv%s' % i pool_name = "pool%s" % i flatten_name = "flatten%s" % i pool_size = max_len - win_size + 1 model.add_node( layer=Convolution1D(nb_filter=n_filter, filter_length=win_size, activation='relu', W_constraint=maxnorm(m=3), b_constraint=maxnorm(m=3)), name=conv_name, input='emb', ) model.add_node(layer=MaxPooling1D(pool_length=pool_size), name=pool_name, input=conv_name) model.add_node(layer=Flatten(), name=flatten_name, input=pool_name) model.add_node(layer=Dropout(p=dropout_rate), name="dropout", inputs=["flatten0", "flatten1", "flatten2"]) model.add_node(layer=Dense(output_dim=n_labels, activation='softmax'), name='softmax', input='dropout') model.add_output(input='softmax', name="output") model.compile(loss={'output': 'categorical_crossentropy'}, optimizer='adadelta', metrics=['accuracy']) model_path = './model/%s_%s.hdf5' % (model_name, dataset_name) best_model = ModelCheckpoint(filepath=model_path, monitor='val_acc', save_best_only=True, mode='max') print "training..." model.fit(data={ 'input': train_x, 'output': train_y }, batch_size=batch_size, nb_epoch=n_epoch, validation_data={ 'input': val_x, 'output': val_y }, callbacks=[best_model], verbose=0) else: model = Sequential() model.add( Embedding(input_dim=len(vocab) + 1, output_dim=dim_emb, mask_zero=True, input_length=max_len, weights=[embeddings])) model.add(LSTM(output_dim=128, dropout_W=0.2, dropout_U=0.2)) model.add(Dense(n_labels, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model_path = './model/%s_%s.hdf5' % (model_name, dataset_name) best_model = ModelCheckpoint(filepath=model_path, monitor='val_acc', save_best_only=True, mode='max') n_epoch = 20 print "training..." model.fit(x=train_x, y=train_y, batch_size=batch_size, nb_epoch=n_epoch, validation_data=(val_x, val_y), callbacks=[best_model], verbose=0) pred_strength = [] print "load the best model from disk..." model.load_weights(model_path) if model_name == 'LSTM': pred_strength = model.predict(x=test_x, batch_size=batch_size) else: for i in xrange(len(test_x)): res = model.predict(data={'input': test_x[i:i + 1]}, batch_size=1) pred_strength.append(res['output']) pred_strength = np.array(pred_strength) pred_strength = pred_strength.reshape( (pred_strength.shape[0], pred_strength.shape[2])) assert pred_strength.shape == test_strength.shape print "evaluate performance of the system..." accu, mae, rmse = evaluate(strength_gold=test_strength, strength_pred=pred_strength) print "%s over %s--->accuracy: %s, mae: %s, rmse: %s\n\n" % ( model_name, dataset_name, accu, mae, rmse) pred_strengths_lines = [] for strength in pred_strength: pred_strengths_lines.append('%s\n' % ' '.join([str(ele) for ele in strength]))
model.add_node(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True), name='forward'+str(i+1), input='embed' if i == 0 else 'dropout'+str(i) if args.dropout > 0 else None, inputs=['forward'+str(i), 'backward'+str(i)] if i > 0 and args.dropout == 0 else []) model.add_node(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True, go_backwards=True), name='backward'+str(i+1), input='embed' if i == 0 else 'dropout'+str(i) if args.dropout > 0 else None, inputs=['forward'+str(i), 'backward'+str(i)] if i > 0 and args.dropout == 0 else []) if args.dropout > 0: model.add_node(Dropout(args.dropout), name='dropout'+str(i+1), inputs=['forward'+str(i+1), 'backward'+str(i+1)]) model.add_output(name='output', input='dropout'+str(args.layers) if args.dropout > 0 else None, inputs=['forward'+str(args.layers), 'backward'+str(args.layers)] if args.dropout == 0 else []) else: model = Sequential() model.add(Embedding(vocab_size, args.embed_size, mask_zero=True)) for i in xrange(args.layers): model.add(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True)) if args.dropout > 0: model.add(Dropout(args.dropout)) for i in xrange(args.dense_layers): if i + 1 == args.dense_layers: model.add(Dense(args.hidden_size, activation='linear')) else: model.add(Dense(args.hidden_size, activation=args.dense_activation)) model.summary() print "Loading weights from %s" % args.model_path model.load_weights(args.model_path)
def fit_model(self, X, Y, use_attention, att_context, bidirectional): print >>sys.stderr, "Input shape:", X.shape, Y.shape num_classes = len(self.label_ind) if bidirectional: tagger = Graph() tagger.add_input(name='input', input_shape=X.shape[1:]) if use_attention: tagger.add_node(TensorAttention(X.shape[1:], context=att_context), name='attention', input='input') lstm_input_node = 'attention' else: lstm_input_node = 'input' tagger.add_node(LSTM(X.shape[-1]/2, return_sequences=True), name='forward', input=lstm_input_node) tagger.add_node(LSTM(X.shape[-1]/2, return_sequences=True, go_backwards=True), name='backward', input=lstm_input_node) tagger.add_node(TimeDistributedDense(num_classes, activation='softmax'), name='softmax', inputs=['forward', 'backward'], merge_mode='concat', concat_axis=-1) tagger.add_output(name='output', input='softmax') print >>sys.stderr, tagger.summary() tagger.compile('adam', {'output':'categorical_crossentropy'}) tagger.fit({'input':X, 'output':Y}) else: tagger = Sequential() word_proj_dim = 50 if use_attention: _, input_len, timesteps, input_dim = X.shape tagger.add(HigherOrderTimeDistributedDense(input_dim=input_dim, output_dim=word_proj_dim)) att_input_shape = (input_len, timesteps, word_proj_dim) print >>sys.stderr, "Attention input shape:", att_input_shape tagger.add(Dropout(0.5)) tagger.add(TensorAttention(att_input_shape, context=att_context)) #tagger.add(Dropout(0.5)) else: _, input_len, input_dim = X.shape tagger.add(TimeDistributedDense(input_dim=input_dim, output_dim=word_proj_dim)) tagger.add(LSTM(input_dim=word_proj_dim, output_dim=word_proj_dim, input_length=input_len, return_sequences=True)) tagger.add(TimeDistributedDense(num_classes, activation='softmax')) print >>sys.stderr, tagger.summary() tagger.compile(loss='categorical_crossentropy', optimizer='adam') tagger.fit(X, Y, batch_size=10) return tagger
str(i) if args.dropout > 0 else None, inputs=['forward' + str(i), 'backward' + str(i)] if i > 0 and args.dropout == 0 else []) if args.dropout > 0: model.add_node( Dropout(args.dropout), name='dropout' + str(i + 1), inputs=['forward' + str(i + 1), 'backward' + str(i + 1)]) model.add_output( name='output', input='dropout' + str(args.layers) if args.dropout > 0 else None, inputs=['forward' + str(args.layers), 'backward' + str(args.layers)] if args.dropout == 0 else []) else: model = Sequential() model.add(Embedding(vocab_size, args.embed_size, mask_zero=True)) for i in xrange(args.layers): model.add( RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True)) if args.dropout > 0: model.add(Dropout(args.dropout)) for i in xrange(args.dense_layers): if i + 1 == args.dense_layers: model.add(Dense(args.hidden_size, activation='linear')) else: model.add(Dense(args.hidden_size, activation=args.dense_activation)) model.summary()
def create_model(vocab_size, args): assert args.batch_size % 3 == 0, "Batch size must be multiple of 3" if args.rnn == 'GRU': RNN = recurrent.GRU elif args.rnn == 'LSTM': RNN = recurrent.LSTM else: assert False, "Invalid RNN" if args.bidirectional: assert not args.convolution, "Convolutional layer is not supported with bidirectional RNN" assert not args.pooling, "Pooling layer is not supported with bidirectional RNN" assert args.dense_layers == 0, "Dense layers are not supported with bidirectional RNN" model = Graph() model.add_input(name="input", batch_input_shape=(args.batch_size, 1), dtype="uint") model.add_node(Embedding(vocab_size, args.embed_size, mask_zero=True), name="embed", input='input') for i in xrange(args.layers): model.add_node( RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True), name='forward' + str(i + 1), input='embed' if i == 0 else 'dropout' + str(i) if args.dropout > 0 else None, inputs=['forward' + str(i), 'backward' + str(i)] if i > 0 and args.dropout == 0 else []) model.add_node( RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True, go_backwards=True), name='backward' + str(i + 1), input='embed' if i == 0 else 'dropout' + str(i) if args.dropout > 0 else None, inputs=['forward' + str(i), 'backward' + str(i)] if i > 0 and args.dropout == 0 else []) if args.dropout > 0: model.add_node( Dropout(args.dropout), name='dropout' + str(i + 1), inputs=['forward' + str(i + 1), 'backward' + str(i + 1)]) model.add_output( name='output', input='dropout' + str(args.layers) if args.dropout > 0 else None, inputs=[ 'forward' + str(args.layers), 'backward' + str(args.layers) ] if args.dropout == 0 else []) assert args.dense_layers == 0, "Bidirectional model doesn't support dense layers yet" else: model = Sequential() model.add( Embedding(vocab_size, args.embed_size, mask_zero=not args.convolution)) if args.convolution: model.add( Convolution1D(nb_filter=args.conv_filters, filter_length=args.conv_filter_length, border_mode=args.conv_border_mode, activation=args.conv_activation, subsample_length=args.conv_subsample_length)) if args.pooling: model.add(MaxPooling1D(pool_length=args.pool_length)) for i in xrange(args.layers): model.add( RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True)) if args.dropout > 0: model.add(Dropout(args.dropout)) for i in xrange(args.dense_layers): if i + 1 == args.dense_layers: model.add(Dense(args.hidden_size, activation='linear')) else: model.add( Dense(args.hidden_size, activation=args.dense_activation)) return model
while count < X_train.shape[0]: yield X_train[count:count + number], Y_train[count:count + number].reshape( (-1, 1)) count += number print("Constructing model...", end=" ") dropout = 0.5 model = Graph() model = Sequential() model.add( LSTM(output_dim=2 * n_d, input_shape=(batch_size, 2 * n_d), activation="tanh", return_sequences=False)) model.add(Dropout(dropout)) model.add(Dense(1, activation="sigmoid")) model.compile(loss="binary_crossentropy", optimizer="adam", class_mode="binary") print("done.") ts = [] ds = [] data_file = "performance/data.tsv"
def constructDNNModel(modelIndex): model = [] if modelIndex == 1: model = Graph() model.add_input(name='input', input_shape=(imgChannels, patchSize, patchSize)) model.add_node(Convolution2D(50, 7, 7, init=initialization, activation='linear', border_mode='valid', input_shape=(1, 32, 32)), name='conv1', input='input') model.add_node(MaxPooling2D(pool_size=(26, 26)), name='max_pool', input='conv1') model.add_node(Flatten(), name='flat_max', input='max_pool') model.add_node(layer=Lambda(min_pool_inp, output_shape=(50, 26, 26)), name='invert_val', input='conv1') model.add_node(MaxPooling2D(pool_size=(26, 26)), name='min_pool', input='invert_val') model.add_node(Flatten(), name='flat_min', input='min_pool') model.add_node(Dense(800, init=initialization, activation='relu'), name='dense1', inputs=['flat_max', 'flat_min'], merge_mode='concat') model.add_node(Dense(800, init=initialization, activation='relu'), name='dense2', input='dense1') model.add_node(Dropout(0.5), name='dropout2', input='dense2') model.add_node(Dense(1, activation='linear'), name='output', input='dropout2', create_output=True) # print model.get_config() print model.count_params() print("Built the model") # ------------------------------------------------------------------------------------------------------------------------------------------------ # if doWeightLoadSaveTest: # pdb.set_trace() model.save_weights(weightSavePath + 'weightsLoadSaveTest.h5', overwrite=True) model.load_weights(weightSavePath + 'weightsLoadSaveTest.h5') print("Weight load/save test passed...") # ------------------------------------------------------------------------------------------------------------------------------------------------ # sgd = SGD(lr=learningRate, decay=1e-6, momentum=0.9, nesterov=True) model.load_weights(weightSavePath + 'bestWeights_referenceCNN_valLoss.h5') print "Best val loss weights loaded." # adam = Adam(lr=learningRate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(loss={'output':'mae'}, optimizer=sgd) print("Compilation Finished") return model elif modelIndex == 2: # train_imageQuality_regressMOS_loweKernels.py model = Sequential() model.add(Activation('linear',input_shape=(imgChannels,patchSize,patchSize))) # 32 model.add(Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 30 model.add(Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 28 model.add(Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 26 model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1))) # 25 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add(Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 23 model.add(Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 21 model.add(Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 19 model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1))) # 18 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 16 model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 14 model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 12 model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1))) # 11 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 9 model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 7 model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 5 model.add(MaxPooling2D(pool_size=(2,2))) # 2 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add(Flatten()) # model.add(Dropout(0.25)) model.add(Dense(800, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation = "relu")) model.add(Dropout(0.5)) model.add(Dense(800, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation = "relu")) model.add(Dropout(0.5)) model.add(Dense(nb_output, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation = "linear")) print("Built the model") # ------------------------------------------------------------------------------------------------------------------------------------------------ # if doWeightLoadSaveTest: # pdb.set_trace() model.save_weights(weightSavePath + 'weightsLoadSaveTest.h5', overwrite=True) model.load_weights(weightSavePath + 'weightsLoadSaveTest.h5') print("Weight load/save test passed...") # model.load_weights('/media/AccessParag/Code/weights/bestWeightsAtEpoch_000.h5') # print("Weights at Epoch 0 loaded") # ------------------------------------------------------------------------------------------------------------------------------------------------ # sgd = SGD(lr=learningRate, decay=1e-6, momentum=0.9, nesterov=True) model.load_weights(weightSavePath + 'bestWeights_referenceCNN_valLoss.h5') print "Best val loss weights loaded." model.compile(loss=linear_correlation_loss, optimizer=sgd) print("Compilation Finished") elif modelIndex == 3: # train_imageQuality_regressMOS_loweKernels.py model = Sequential() model.add(Activation('linear',input_shape=(imgChannels,patchSize,patchSize))) # 32 model.add(Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 30 model.add(Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 28 model.add(Convolution2D(48, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 26 model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1))) # 25 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 23 model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 21 model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 19 model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1))) # 18 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 16 model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 14 model.add(Convolution2D(64, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 12 model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1))) # 11 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add(Convolution2D(128, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 9 model.add(Convolution2D(128, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 7 model.add(Convolution2D(128, 3, 3, border_mode='valid', trainable=True, init=initialization, W_regularizer=l2(regularizer), subsample=(1, 1), activation = "relu")) # 5 model.add(MaxPooling2D(pool_size=(2,2))) # 2 # ------------------------------------------------------------------------------------------------------------------------------------------------ # model.add(Flatten()) # model.add(Dropout(0.25)) model.add(Dense(800, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation = "relu")) model.add(Dropout(0.5)) model.add(Dense(800, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation = "relu")) model.add(Dropout(0.5)) model.add(Dense(nb_output, trainable=True, init=initialization, W_regularizer=l2(regularizer), activation = "linear")) print("Built the model") # ------------------------------------------------------------------------------------------------------------------------------------------------ # if doWeightLoadSaveTest: # pdb.set_trace() model.save_weights(weightSavePath + 'weightsLoadSaveTest.h5', overwrite=True) model.load_weights(weightSavePath + 'weightsLoadSaveTest.h5') print("Weight load/save test passed...") # ------------------------------------------------------------------------------------------------------------------------------------------------ # sgd = SGD(lr=learningRate, decay=1e-6, momentum=0.9, nesterov=True) model.load_weights(weightSavePath + 'bestWeights_referenceCNN_valLoss.h5') print "Best val loss weights loaded." model.compile(loss=linear_correlation_loss, optimizer=sgd) print("Compilation Finished") return model
str(i) if args.dropout > 0 else None, inputs=['forward' + str(i), 'backward' + str(i)] if i > 0 and args.dropout == 0 else []) if args.dropout > 0: shared.add_node( Dropout(args.dropout), name='dropout' + str(i + 1), inputs=['forward' + str(i + 1), 'backward' + str(i + 1)]) shared.add_output( name='output', input='dropout' + str(args.layers) if args.dropout > 0 else None, inputs=['forward' + str(args.layers), 'backward' + str(args.layers)] if args.dropout == 0 else []) else: shared = Sequential() shared.add(Embedding(vocab_size, args.embed_size, mask_zero=True)) for i in xrange(args.layers): shared.add( RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True)) if args.dropout > 0: shared.add(Dropout(args.dropout)) model.add_shared_node(shared, name="shared", inputs=['question', 'correct', 'incorrect'], merge_mode='concat', create_output=True) shared.summary() model.summary()
shared.add_node(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True), name='forward'+str(i+1), input='embed' if i == 0 else 'dropout'+str(i) if args.dropout > 0 else None, inputs=['forward'+str(i), 'backward'+str(i)] if i > 0 and args.dropout == 0 else []) shared.add_node(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True, go_backwards=True), name='backward'+str(i+1), input='embed' if i == 0 else 'dropout'+str(i) if args.dropout > 0 else None, inputs=['forward'+str(i), 'backward'+str(i)] if i > 0 and args.dropout == 0 else []) if args.dropout > 0: shared.add_node(Dropout(args.dropout), name='dropout'+str(i+1), inputs=['forward'+str(i+1), 'backward'+str(i+1)]) shared.add_output(name='output', input='dropout'+str(args.layers) if args.dropout > 0 else None, inputs=['forward'+str(args.layers), 'backward'+str(args.layers)] if args.dropout == 0 else []) else: shared = Sequential() shared.add(Embedding(vocab_size, args.embed_size, mask_zero=True)) for i in xrange(args.layers): shared.add(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True)) if args.dropout > 0: shared.add(Dropout(args.dropout)) model.add_shared_node(shared, name="shared", inputs=['question', 'correct', 'incorrect'], merge_mode='concat', create_output=True) shared.summary() model.summary() print "Compiling model..." model.compile(optimizer=args.optimizer, loss={'shared': cosine_ranking_loss}) callbacks=[ModelCheckpoint(filepath=args.save_path, verbose=1, save_best_only=False), EarlyStopping(patience=args.patience, verbose=1)]
def build_cnn_lstm_rnns(model_type, maxlen, max_features, emb_size=128, emb_matrix=None, recur_type='lstm', nb_filter=64, filter_length=3, pool_length=2, nb_classes = 2, recur_size=128, dropout_ratio=0.5, tune_emb=True): ''' run cnn+lstm rnns ''' print('Building model:', model_type, 'cnn-lstm') #create the emb layer if emb_matrix is not None: max_features, emb_size = emb_matrix.shape emb_layer = Embedding(max_features, emb_size, weights=[emb_matrix], input_length=maxlen, trainable=tune_emb) else: emb_layer = Embedding(max_features, emb_size, input_length=maxlen, trainable=tune_emb) #create fwd recurrent layer if recur_type.lower() == 'lstm': recur_layer = LSTM(recur_size) elif recur_type.lower() == 'gru': recur_layer = GRU(recur_size) elif recur_type.lower() == 'simplernn': recur_layer = SimpleRNN(recur_size) if model_type.lower() == 'bidirectional': model = Graph() # add common input model.add_input(name='input', input_shape=(maxlen,), dtype=int) # add word id (input) layer; model.add_node(emb_layer, name='embedding', input='input') # add the emb node model.add_node(Dropout(dropout_ratio), name='emb_dropout', input='embedding') # add dropout to emb # add a cnn layer model.add_node(Convolution1D(nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1), name='conv', input='emb_dropout') model.add_node(MaxPooling1D(pool_length=pool_length), name='pool', input='conv') #create bwd recurrent layer if recur_type.lower() == 'lstm': recur_layer2 = LSTM(recur_size, go_backwards=True) elif recur_type.lower() == 'gru': recur_layer2 = GRU(recur_size, go_backwards=True) elif recur_type.lower() == 'simplernn': recur_layer2 = SimpleRNN(recur_size, go_backwards=True) # add rnns model.add_node(recur_layer, name='forward', input='pool') # fwd lstm layer model.add_node(recur_layer2, name='backward', input='pool') # fwd lstm layer model.add_node(Dropout(dropout_ratio), name='dropout', inputs=['forward', 'backward']) # add dropout to lstm layers if nb_classes == 2: print('Doing binary classification...') model.add_node(Dense(1, activation='sigmoid'), name='sigmoid', input='dropout') # output node model.add_output(name='output', input='sigmoid') elif nb_classes > 2: print('Doing classification with class #', nb_classes) model.add_node(Dense(nb_classes, activation='softmax'), name='softmax', input='dropout') # output node model.add_output(name='output', input='softmax') else: print("Wrong argument nb_classes: ", nb_classes) exit(1) else: model = Sequential() #emb and dropout model.add(emb_layer) model.add(Dropout(dropout_ratio)) # we add a Convolution1D, which will learn nb_filter (word group) filters of size filter_length: model.add(Convolution1D(nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1)) # we use standard max pooling (halving the output of the previous layer): model.add(MaxPooling1D(pool_length=pool_length)) model.add(recur_layer) model.add(Dropout(dropout_ratio)) if nb_classes == 2: print('Doing binary classification...') model.add(Dense(1)) model.add(Activation('sigmoid')) elif nb_classes > 2: print('Doing classification with class #', nb_classes) model.add(Dense(nb_classes)) model.add(Activation('softmax')) else: print("Wrong argument nb_classes: ", nb_classes) exit(1) return model