def prep_model(glove, vocab, module_prep_model, c, oact): # Input embedding and encoding model = Graph() N = B.embedding(model, glove, vocab, s0pad, s1pad, c['inp_e_dropout']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, s0pad, s1pad, c) # Measurement if c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = final_outputs[1] else: ptscorer = c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = c['mlpsum'] model.add_node(name='scoreS', input=ptscorer(model, final_outputs, c['Ddim'], N, c['l2reg'], **kwargs), layer=Activation(oact)) model.add_output(name='score', input='scoreS') return model
def fit(self, X, y, batch_size=128, nb_epoch=100, verbose=1, callbacks=[], validation_split=0., validation_data=None, shuffle=True, show_accuracy=False, class_weight=None, sample_weight=None): Graph.fit(self, {self._input_name:X, self._output_name:y}, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=validation_split, validation_data=validation_data, shuffle=shuffle, callbacks=callbacks)
def __init__(self, input_dim=4000): graph = Graph() half_input_dim = input_dim / 2 hidden_dim = half_input_dim / 2 graph.add_input(name='in1', input_shape=(half_input_dim,)) graph.add_input(name='in2', input_shape=(half_input_dim,)) #raph.add_node(Dense(hidden_dim, activation='sigmoid'), name='pre_hidden1', input='in1') #graph.add_node(Dense(hidden_dim, activation='sigmoid'), name='hidden1', input='pre_hidden1') #graph.add_node(Dense(hidden_dim, activation='sigmoid'), name='pre_hidden2', input='in2') #graph.add_node(Dense(hidden_dim, activation='sigmoid'), name='hidden2', input='pre_hidden2') graph.add_node(Dense(hidden_dim, activation='sigmoid'), name='hidden1', input='in1') graph.add_node(Dense(hidden_dim, activation='sigmoid'), name='hidden2', input='in2') graph.add_node(Dense(1, activation='sigmoid'), name='pre-out', inputs=['hidden1', 'hidden2'], merge_mode='concat') # for training graph.add_output(name='out', input='pre-out') self.graph = graph
def prep_model(self, module_prep_model): # Input embedding and encoding model = Graph() N = B.embedding(model, self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) # Measurement if self.c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = [final_outputs[1]] else: ptscorer = self.c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] kwargs['Dinit'] = self.c['Dinit'] model.add_node(name='scoreS', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], **kwargs), layer=Activation('linear')) model.add_node(name='out', input='scoreS', layer=Dense(6, W_regularizer=l2(self.c['l2reg']))) model.add_node(name='outS', input='out', layer=Activation('softmax')) model.add_output(name='classes', input='outS') return model
def create_graph_model(): model = Graph() model.add_input(name='input', input_shape=(input_dim,)) model.add_node(Dense(32, activation='relu'), name='d1', input='input') model.add_node(Dense(nb_classes, activation='softmax'), name='d2', input='d1') model.add_output(name='output', input='d2') return model
def prep_model(self, module_prep_model, oact='sigmoid'): # Input embedding and encoding model = Graph() N = B.embedding(model, self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) # Measurement if self.c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = final_outputs[1] else: ptscorer = self.c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] if self.c['f_add_kw']: model.add_input('kw', input_shape=(1,)) model.add_input('akw', input_shape=(1,)) kwargs['extra_inp'] = ['kw', 'akw'] model.add_node(name='scoreS', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], **kwargs), layer=Activation(oact)) model.add_output(name='score', input='scoreS') return model
def dcgan_pyramid_mse_generator(generator, input_dim=40): g = Graph() g.add_input("input", input_shape=(input_dim, )) g.add_node(generator, "generator", input="input") g.add_node(Split(-2, None, axis=1), "mean", input="input") g.add_output("output", input="generator") return g
def test_recursive(): # test layer-like API graph = Graph() graph.add_input(name='input1', input_shape=(32,)) graph.add_node(Dense(16), name='dense1', input='input1') graph.add_node(Dense(4), name='dense2', input='input1') graph.add_node(Dense(4), name='dense3', input='dense1') graph.add_output(name='output1', inputs=['dense2', 'dense3'], merge_mode='sum') seq = Sequential() seq.add(Dense(32, input_shape=(32,))) seq.add(graph) seq.add(Dense(4)) seq.compile('rmsprop', 'mse') seq.fit(X_train_graph, y_train_graph, batch_size=10, nb_epoch=10) loss = seq.evaluate(X_test_graph, y_test_graph) # test serialization config = seq.get_config() new_graph = Sequential.from_config(config) seq.summary() json_str = seq.to_json() new_graph = model_from_json(json_str) yaml_str = seq.to_yaml() new_graph = model_from_yaml(yaml_str)
def create_temporal_graph_model(): model = Graph() model.add_input(name="input", input_shape=(timesteps, input_dim)) model.add_node(GRU(32, return_sequences=True), name="d1", input="input") model.add_node(TimeDistributedDense(nb_classes, activation="softmax"), name="d2", input="d1") model.add_output(name="output", input="d2") return model
def create_graph_model(): model = Graph() model.add_input(name="input", input_shape=(input_dim,)) model.add_node(Dense(32, activation="relu"), name="d1", input="input") model.add_node(Dense(nb_classes, activation="softmax"), name="d2", input="d1") model.add_output(name="output", input="d2") return model
def convo1(input_shape): g = Graph() g.add_input("input", input_shape) g.add_node(ConvB(input_shape,64,7,7, subsample=(2, 2)),"conv1", "input") g.add_node(MaxPooling2D((3, 3),strides=(2, 2)), "maxpool","conv1") g.add_output("output", "maxpool") return g
def create_graph_model(): model = Graph() model.add_input(name='input') model.add_node(Dense(784, 50, activation='relu'), name='d1', input='input') model.add_node(Dense(50, 10, activation='softmax'), name='d2', input='d1') model.add_output(name='output', input='d2') return model
def create_temporal_graph_model(): model = Graph() model.add_input(name='input', input_shape=(timesteps, input_dim)) model.add_node(GRU(32, return_sequences=True), name='d1', input='input') model.add_node(TimeDistributedDense(nb_classes, activation='softmax'), name='d2', input='d1') model.add_output(name='output', input='d2') return model
def gan_grid_idx(generator, discriminator, batch_size=128, nb_z=20, reconstruct_fn=None): nb_grid_params = nb_normalized_params() z_shape = (batch_size, nb_z) grid_params_shape = (nb_grid_params, ) g_graph = Graph() g_graph.add_input('z', input_shape=z_shape[1:]) g_graph.add_input('grid_params', input_shape=grid_params_shape) g_graph.add_node(generator, 'generator', inputs=['grid_params', 'z']) g_graph.add_output('output', input='generator') d_graph = asgraph(discriminator, input_name=GAN.d_input) return GAN(g_graph, d_graph, z_shape, reconstruct_fn=reconstruct_fn)
def build_model(self): # again, credit to Cheng Guo self.model = Graph() self.model.add_input(name='input', input_shape=(self.preprocessor.maxlen,), dtype=int) #pdb.set_trace() self.model.add_node(Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, input_length=self.preprocessor.maxlen, weights=self.preprocessor.init_vectors), name='embedding', input='input') self.model.add_node(Dropout(0.), name='dropout_embedding', input='embedding') for n_gram in self.ngram_filters: self.model.add_node(Convolution1D(nb_filter=self.nb_filter, filter_length=n_gram, border_mode='valid', activation='relu', subsample_length=1, input_dim=self.preprocessor.embedding_dims, input_length=self.preprocessor.maxlen), name='conv_' + str(n_gram), input='dropout_embedding') self.model.add_node(MaxPooling1D(pool_length=self.preprocessor.maxlen - n_gram + 1), name='maxpool_' + str(n_gram), input='conv_' + str(n_gram)) self.model.add_node(Flatten(), name='flat_' + str(n_gram), input='maxpool_' + str(n_gram)) self.model.add_node(Dropout(self.dropout), name='dropout', inputs=['flat_' + str(n) for n in self.ngram_filters]) self.model.add_node(Dense(1, input_dim=self.nb_filter * len(self.ngram_filters)), name='dense', input='dropout') self.model.add_node(Activation('sigmoid'), name='sigmoid', input='dense') self.model.add_output(name='output', input='sigmoid') print("model built") print(self.model.summary()) self.model.compile(loss={'output': 'binary_crossentropy'}, optimizer="adam")#optimizer)
def prep_model(glove, vocab, module_prep_model, c, spad=spad): # Input embedding and encoding model = Graph() N = B.embedding(model, glove, vocab, spad, spad, c['inp_e_dropout'], c['inp_w_dropout'], add_flags=c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, spad, spad, c) # Measurement kwargs = dict() if c['ptscorer'] == B.mlp_ptscorer: kwargs['sum_mode'] = c['mlpsum'] model.add_node(name='scoreS', input=c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'], **kwargs), layer=Activation('sigmoid')) model.add_output(name='score', input='scoreS') return model
def prep_model(glove, vocab, dropout=1/2, dropout_w=0, dropout_in=4/5, l2reg=1e-4, cnnact='tanh', cnninit='glorot_uniform', cdim={1: 1, 2: 1/2, 3: 1/2, 4: 1/2, 5: 1/2}, project=True, pdim=2.5, ptscorer=B.mlp_ptscorer, mlpsum='sum', Ddim=1, oact='sigmoid'): model = Graph() N = B.embedding(model, glove, vocab, s0pad, s1pad, dropout, dropout_w) if dropout_in is None: dropout_in = dropout Nc = B.cnnsum_input(model, N, s0pad, dropout=dropout_in, l2reg=l2reg, cnninit=cnninit, cnnact=cnnact, cdim=cdim) # Projection if project: model.add_shared_node(name='proj', inputs=['e0s_', 'e1s_'], outputs=['e0p', 'e1p'], layer=Dense(input_dim=Nc, output_dim=int(N*pdim), W_regularizer=l2(l2reg))) # This dropout is controversial; it might be harmful to apply, # or at least isn't a clear win. # model.add_shared_node(name='projdrop', inputs=['e0p', 'e1p'], outputs=['e0p_', 'e1p_'], # layer=Dropout(dropout_in, input_shape=(N,))) # final_outputs = ['e0p_', 'e1p_'] final_outputs = ['e0p', 'e1p'] else: final_outputs = ['e0s_', 'e1s_'] # Measurement kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = mlpsum model.add_node(name='scoreS', input=ptscorer(model, final_outputs, Ddim, N, l2reg, **kwargs), layer=Activation(oact)) model.add_output(name='score', input='scoreS') return model
def __init__(self, keywords, winSize=100, wdim=32, zdim=1024, zdim2=1024, reg=0., lstm_activation='tanh', lstm_inner_activation='hard_sigmoid', output_activation='softmax', loss_optimizer='adagrad', load_from_file=True, filename=""): """ Put other relevant arguments here such as dim of hidden units etc. """ super(RnnAttentionDense2, self).__init__(keywords, winSize) # save parameters vocab_size = len(keywords) + winSize + 1 self.params = {} self.params["wdim"] = wdim self.params["vocab_size"] = vocab_size # initialize keras model self.model = Graph() # convert words to dense vectors self.model.add_input(name='word', input_shape=(winSize,), dtype='int') self.model.add_node(Embedding(vocab_size, wdim, input_length=winSize), name='wvec', input='word') self.model.add_node(Flatten(), name='wvecf', input='wvec') self.model.add_node(Dense(winSize, activation='sigmoid', W_regularizer=l2(reg)), name='attn', input='wvecf') self.model.add_node(RepeatVector(wdim), name='attnr', input='attn') self.model.add_node(Permute(dims=(2,1)), name='attnp', input='attnr') # multiply word vector by attention and flatten output self.model.add_node(Flatten(), name='awvecf', inputs=['wvec', 'attnp'], merge_mode='mul') # fully connected layers self.model.add_node(Dense(zdim, activation='relu', W_regularizer=l2(reg)), name='d1', input='awvecf') self.model.add_node(Dense(zdim2, activation='relu', W_regularizer=l2(reg)), name='d2', input='d1') # final layer self.model.add_node(Dense(vocab_size, activation=output_activation, W_regularizer=l2(reg)), name='d3', input='d2') self.model.add_output(name='probs', input='d3') # compile with optimizer, loss function self.model.compile(loss_optimizer, {'probs': 'categorical_crossentropy'}) # also compile a function for getting the attention vector self.get_attn = theano.function([self.model.inputs[i].input for i in self.model.input_order], self.model.nodes['attn'].get_output(train=False), on_unused_input='ignore')
def __init__(self, input_dim=4000): graph = Graph() #hidden_dim = input_dim / 2 hidden_dim = 100 graph.add_input(name='X', input_shape=(input_dim,)) graph.add_node(Dense(hidden_dim, activation='sigmoid'), name='hidden', input='X') graph.add_node(Dense(1, activation='sigmoid'), name='pre-out', input='hidden') # for training graph.add_output(name='out', input='pre-out') self.graph = graph
def small_vgglike_net(shapes, solver, init): # Little net following VGG-like architecture model = Sequential() rgb_shape = shapes['images'] poselet_classes, = shapes['poselet'] # Will leave out BN for now # model.add(BatchNormalization(mode=0, axis=1)) model.add(Convolution2D(64, 3, 3, border_mode='same', init=init, activation='relu', input_shape=rgb_shape)) model.add(Convolution2D(64, 3, 3, border_mode='same', init=init, activation='relu')) model.add(Convolution2D(64, 3, 3, border_mode='same', init=init, activation='relu')) model.add(MaxPooling2D((2, 2))) model.add(Convolution2D(128, 3, 3, border_mode='same', init=init, activation='relu')) model.add(Convolution2D(128, 3, 3, border_mode='same', init=init, activation='relu')) model.add(Convolution2D(128, 3, 3, border_mode='same', init=init, activation='relu')) model.add(MaxPooling2D((2, 2))) model.add(Convolution2D(128, 3, 3, border_mode='same', init=init, activation='relu')) model.add(Convolution2D(128, 3, 3, border_mode='same', init=init, activation='relu')) model.add(Convolution2D(128, 3, 3, border_mode='same', init=init, activation='relu')) model.add(MaxPooling2D((2, 2))) model.add(Convolution2D(256, 3, 3, border_mode='same', init=init, activation='relu')) model.add(Convolution2D(256, 3, 3, border_mode='same', init=init, activation='relu')) model.add(Convolution2D(256, 3, 3, border_mode='same', init=init, activation='relu')) model.add(MaxPooling2D((2, 2))) model.add(Flatten()) model.add(Dense(2048, init=init, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(2048, init=init, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(poselet_classes, init=init, activation='softmax')) # Use a graph container for the sake of the supporting code container = Graph() container.add_input(input_shape=rgb_shape, name='images') container.add_node(model, input='images', name='sequential') container.add_output(input='sequential', name='poselet') losses = {'poselet': 'categorical_crossentropy'} container.compile( optimizer=solver, loss=losses ) return container
def setup(self): self.logger.info("Setting up layers.") self.model = Graph() # Graph model with two outputs. concatenated_size = self.vocabulary_size * self.window_size self.model.add_input(name='input', input_shape=(self.window_size,)) # Add embedding layer. self.model.add_node(Embedding(output_dim=self.embedding_size, input_dim=self.vocabulary_size, weights=[self.embeddings], input_length=self.window_size, trainable=not self.fix_embedding), name='embedding', inputs='input' ) self.logger.info("Embedded sequence output is %s" % str(self.model.output_shape)) self.model.add_node(Flatten(), name='flatten', inputs='embedding') self.logger.info("Flattened output is %s" % str(self.model.output_shape)) # Adding deep layers. for i in range(HParams.num_middle_layers): input_layer_name = 'flatten' if i == 0 else "inner%d" % (i - 1) self.model.add_node( Dense(output_dim=HParams.num_hidden_units, init='uniform', W_regularizer=HParams.regularizer, activation=HParams.hidden_activation), name='inner%d' % i, inputs=input_layer_name ) # Add output layer for the tagging. self.model.add_node( Dense(output_dim=self.pos_dim, init='uniform', W_regularizer=HParams.regularizer, activation=HParams.label_output_layer_type), name='pos_layer', inputs="inner%d" % (HParams.num_middle_layers - 1)) self.model.add_output(name='pos_output', input='pos_layer') # Add output layer for the auto encoder. self.model.add_node(Dense(output_dim=concatenated_size, init='uniform', W_regularizer=HParams.regularizer, activation=HParams.auto_output_layer_type), name='auto_layer', inputs='flatten') self.model.add_output(name='auto_output', input='auto_layer') self.model.compile( loss={'label_output': HParams.layer_output_loss, 'auto_output': HParams.auto_output_loss}, optimizer=HParams.optimizer) self.logger.info("Done setting layers.")
def build_model(self, window_size, n_feature_maps, word_vector_size, activation_function, filter_sizes, dense_layer_sizes, input_dropout_rate, hidden_dropout_rate, dropout, k_output): print "Window size: {}".format(window_size) print "N Feature Maps: {}".format(n_feature_maps) print "Word vector size: {}".format(word_vector_size) model = Graph() model.add_input('data', input_shape=(1, window_size * 2 + 1, word_vector_size)) for filter_size in filter_sizes: conv_layer = containers.Sequential() conv_layer.add(Convolution2D(n_feature_maps, filter_size, word_vector_size, input_shape=(1, window_size * 2 + 1, word_vector_size))) conv_layer.add(Activation(activation_function)) conv_layer.add(MaxPooling2D(pool_size=(window_size * 2 + 1 - filter_size + 1, 1))) conv_layer.add(Flatten()) model.add_node(conv_layer, name='filter_unit_' + str(filter_size), input='data') fully_connected_nn = containers.Sequential() fully_connected_nn.add(Dense(n_feature_maps * len(filter_sizes), input_dim=n_feature_maps * len(filter_sizes))) fully_connected_nn.add(Activation(activation_function)) if dropout: fully_connected_nn.add(Dropout(hidden_dropout_rate)) fully_connected_nn.add(Dense(k_output)) fully_connected_nn.add(Activation('softmax')) model.add_node(fully_connected_nn, name='fully_connected_nn', inputs=['filter_unit_' + str(n) for n in filter_sizes]) model.add_output(name='nn_output', input='fully_connected_nn') return model
def __init__(self, rnn_type, connection, dimensions): self.rnn_type = rnn_type self.connection = connection self.nT = dimensions['nT'] #10#400 self.nF = dimensions['nF'] #39#39 self.nH = dimensions['nH'] #7#100 self.restriction = [] self.speed_limit = {} self.speed_fine = {} self.rnn_layer = [] self.layer_type = {} self.layer_con = {} self.graph = Graph()
def __init__(self, network, input_size=2, batch_size=20): """ Initiate the neural network and compile it Input : network : string in ['Corrector', 'Symetric']""" self.batch_size = 20 self.network = network self.input_size = input_size # Instantiation the NN self.model = Graph() if self.network == 'Symetric': self.buildSymetricNN() elif self.network == 'Corrector': self.buildCorrectorNN() elif self.network == 'Siamese': self.buildSymetricNN() self.compile()
def time_block3(input_shape, nb_filter1, nb_filter2, nb_blocks, has_edge): convs = [] last_shape = input_shape for i in range(nb_blocks): is_edge = i == 0 and has_edge c = block3(last_shape,nb_filter1, nb_filter2, is_edge) last_shape = cinput_shape(c) convs.append(c) g = Graph() g.add_input("input", input_shape) last_name = "input" for i in range(len(convs)): name = "conv" + str(i) g.add_node(convs[i],name,last_name) last_name = name g.add_output("output",last_name) return g
def prep_model(self, module_prep_model, oact="sigmoid"): # Input embedding and encoding model = Graph() N = B.embedding( model, self.emb, self.vocab, self.s0pad, self.s1pad, self.c["inp_e_dropout"], self.c["inp_w_dropout"], add_flags=self.c["e_add_flags"], ) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) # Measurement if self.c["ptscorer"] == "1": # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = [final_outputs[1]] else: ptscorer = self.c["ptscorer"] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs["sum_mode"] = self.c["mlpsum"] kwargs["Dinit"] = self.c["Dinit"] if "f_add" in self.c: for inp in self.c["f_add"]: model.add_input(inp, input_shape=(1,)) # assumed scalar kwargs["extra_inp"] = self.c["f_add"] model.add_node( name="scoreS", input=ptscorer(model, final_outputs, self.c["Ddim"], N, self.c["l2reg"], **kwargs), layer=Activation(oact), ) model.add_output(name="score", input="scoreS") return model
def construct(self, structure): ''' structure - a list of (is_shared, layer_fn) tuples detailing the structure of the Siamese part of the network is_shared - boolean, whether or not the layer is shared layer_fn - a generator function for a layer ''' self.graph = Graph() input_left = self.INPUT_LEFT input_right = self.INPUT_RIGHT self.graph.add_input(name=input_left, input_shape=self.input_shape) self.graph.add_input(name=input_right, input_shape=self.input_shape) unique_name = 'name' for is_shared, layer_fn in structure: if is_shared: self.graph.add_shared_node( layer_fn(), name=unique_name, inputs=[input_left, input_right], outputs=[input_left+'\'', input_right+'\'']) else: self.graph.add_node( layer_fn(), input=input_left, name=input_left+'\'') self.graph.add_node( layer_fn(), input=input_right, name=input_right+'\'') input_left += '\'' input_right += '\'' unique_name += '0' self.graph.add_node(Lambda(l2dist), inputs=[input_left, input_right], merge_mode='join', name='dist') self.graph.add_output(name=self.OUTPUT, input='dist') if self.verbose: print 'Constructed a SiameseNet.'
def __init__(self): self.graph = Graph() reg = l2(0.001) CONV3_1 = TimeDistributedConvolution2D(nb_filter=8, nb_row=3, nb_col=3, init='he_uniform', activation='relu', border_mode='same', W_regularizer=reg) CONV3_2 = TimeDistributedConvolution2D(nb_filter=16, nb_row=3, nb_col=3, init='he_uniform', activation='relu', border_mode='same', W_regularizer=reg) RNN3 = SimpleRNN(output_dim=512, input_shape=(8,16386), input_length=8) FC3 = Dense(1024, activation='relu', init='he_uniform') FC4 = Dense(1024) self.graph.add_input(name='screen_in', input_shape=(8,1,32,32)) self.graph.add_node(CONV3_1, name='CONV3_1', input='screen_in') self.graph.add_node(CONV3_2, name='CONV3_2', input='CONV3_1') #self.graph.add_node(Flatten(), name='FLATTEN', input='CONV3_2') self.graph.add_input(name='control_in', input_shape=(8,2,)) #self.graph.add_node(Reshape((-1,2)), name='RESHAPE3', input='control_in') #self.graph.add_node(Reshape((-1, 16384)), name='RESHAPE', input='FLATTEN') #self.graph.add_node(Reshape((8,16386), batch_input_shape=(128,16386)), name='RESHAPE', inputs=['FLATTEN', 'control_in']) self.graph.add_node(TimeDistributedFlatten(), input='CONV3_2', name='FLATTEN') self.graph.add_node(RNN3, name='RNN3', inputs=['FLATTEN', 'control_in']) #self.graph.add_node(Reshape((-1, 256)), name='RESHAPE2', input='RNN3') self.graph.add_node(FC3, name='FC3', input='RNN3') self.graph.add_node(FC4, name='FC4', input='FC3') self.graph.add_node(Activation('softmax'), name='SOFTMAX', input='FC4') #self.graph. self.graph.add_output(name='screen_out', input='SOFTMAX') self.graph.compile(optimizer='adam', loss={'screen_out': 'binary_crossentropy'})
def main(argv): max_features = 100000 max_len = 100 lines = read_lines(argv[0]) labels, questions = [], [] for line in lines: tokens = line.split() labels += [tokens[0]] questions += [' '.join(tokens[1:])] # Train the tokenizer on both training and validation sets tokenizer = text.Tokenizer(nb_words=max_features) tokenizer.fit_on_texts(questions) sequences = [seq for seq in tokenizer.texts_to_sequences_generator(questions)] X = sequence.pad_sequences(sequences, maxlen=max_len) label2idx = {label: idx for idx, label in enumerate(sorted(set(labels)), 0)} nb_classes = len(label2idx) labels_idx = [label2idx[label] for label in labels] y = np_utils.to_categorical(labels_idx, nb_classes) logging.info('X is: %s' % str(X.shape)) logging.info('y is: %s' % str(y.shape)) graph = Graph() graph = build_graph(graph, l2=1e-4) graph.add_output(name='output', input='softmax') graph.compile(optimizer='adadelta', loss={'output': 'categorical_crossentropy'}) graph.fit({'input_query': X, 'output': y}, epochs=100)
def define(self): try: self.model = Graph() self.model.add_input(name='input', input_shape=(self.cfgs['n_channels'], self.cfgs['image_height'], self.cfgs['image_width'])) # # Part of the network which is defined in the config file should move here # cgfs_nodes = self.cfgs['nodes'] for node in cgfs_nodes: if not node['type'] == 'Activation': self.add_to_graph( layer_dict[node['type']](**node['parameter']), name=node['name'], input=node['input']) else: self.add_to_graph(layer_dict[node['type']]( node['parameter']['mode']), name=node['name'], input=node['input']) #if self.io.print_info('Added {1}:{0}'.format( node['type'], node['name'])) #for self.add_inceptionA(input_layer=self.last_added_node, list_nb_filter=((64, ), (48, 64), (64, 96, 96), (32, )), base_name='mixed_0') self.add_inceptionA(input_layer=self.last_added_node, list_nb_filter=((64, ), (48, 64), (64, 96, 96), (64, )), base_name='mixed_1') self.add_inceptionA(input_layer=self.last_added_node, list_nb_filter=((64, ), (48, 64), (64, 96, 96), (64, )), base_name='mixed_2') self.add_to_graph(layer_dict[self.cfgs['pooling_type']]( pool_size=(self.cfgs['pooling_kernel_size'], self.cfgs['pooling_kernel_size'])), name='mixed_pool_0', input='mixed_0') self.io.print_info('Added {1}:{0}'.format( self.cfgs['pooling_type'], self.last_added_node)) self.add_to_graph(layer_dict[self.cfgs['pooling_type']]( pool_size=(self.cfgs['pooling_kernel_size'], self.cfgs['pooling_kernel_size'])), name='mixed_pool_1', input='mixed_1') self.io.print_info('Added {1}:{0}'.format( self.cfgs['pooling_type'], self.last_added_node)) self.add_to_graph(layer_dict[self.cfgs['pooling_type']]( pool_size=(self.cfgs['pooling_kernel_size'], self.cfgs['pooling_kernel_size'])), name='mixed_pool_2', input='mixed_2') self.io.print_info('Added {1}:{0}'.format( self.cfgs['pooling_type'], self.last_added_node)) self.add_to_graph(Flatten(), name='mixed_flatten_0', input='mixed_pool_0') self.add_to_graph(Dense(self.cfgs['nb_dense'], activation=self.cfgs['fc_non_linearity']), name='mixed_dense_0', input='mixed_flatten_0') self.io.print_info('Added {1}:{0}'.format('Dense', self.last_added_node)) self.add_to_graph(Flatten(), name='mixed_flatten_1', input='mixed_pool_1') self.add_to_graph(Dense(self.cfgs['nb_dense'], activation=self.cfgs['fc_non_linearity']), name='mixed_dense_1', input='mixed_flatten_1') self.io.print_info('Added {1}:{0}'.format('Dense', self.last_added_node)) self.add_to_graph(Flatten(), name='mixed_flatten_2', input='mixed_pool_2') self.add_to_graph(Dense(self.cfgs['nb_dense'], activation=self.cfgs['fc_non_linearity']), name='mixed_dense_2', input='mixed_flatten_2') self.io.print_info('Added {1}:{0}'.format('Dense', self.last_added_node)) self.add_to_graph( Activation("linear"), name='concat_0', inputs=['mixed_dense_0', 'mixed_dense_1', 'mixed_dense_2'], merge_mode="concat", concat_axis=1) _ = self.add_to_graph(BatchNormalization(mode=0, epsilon=0.0001, axis=1), name='bn_norm_concat_0', input='concat_0') self.add_to_graph(Activation(self.cfgs['concat_non_linearity']), name='bn_concat_0_nonlin', input='bn_norm_concat_0', merge_mode="concat", concat_axis=1) self.add_to_graph(Dense(self.cfgs['nb_classes'], activation=self.cfgs['activation']), name='aesthetics', input='bn_concat_0_nonlin') self.model.add_output(name='output', input='aesthetics') if not self.cfgs['model_weights_file'] == None: self.init_from_this() #if except Exception as err: self.io.print_error('Error configuring the model, {0}'.format(err)) self.init = False return #try self.init = True
def test_2o_1i_sample_weights(): # test a non-sequential graph with 1 input and 2 outputs with sample weights graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_node(Dense(16), name='dense1', input='input1') graph.add_node(Dense(4), name='dense2', input='input1') graph.add_node(Dense(1), name='dense3', input='dense1') graph.add_output(name='output1', input='dense2') graph.add_output(name='output2', input='dense3') weights1 = np.random.uniform(size=y_train_graph.shape[0]) weights2 = np.random.uniform(size=y2_train_graph.shape[0]) weights1_test = np.random.uniform(size=y_test_graph.shape[0]) weights2_test = np.random.uniform(size=y2_test_graph.shape[0]) graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'}) graph.fit( { 'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph }, nb_epoch=10, sample_weight={ 'output1': weights1, 'output2': weights2 }) out = graph.predict({'input1': X_test_graph}) assert (type(out == dict)) assert (len(out) == 2) loss = graph.test_on_batch( { 'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph }, sample_weight={ 'output1': weights1_test, 'output2': weights2_test }) loss = graph.train_on_batch( { 'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph }, sample_weight={ 'output1': weights1, 'output2': weights2 }) loss = graph.evaluate( { 'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph }, sample_weight={ 'output1': weights1, 'output2': weights2 })
def dcmogan(generator_fn, discriminator_fn, batch_size=128): nb_g_z = 20 nb_grid_config = NUM_CONFIGS + NUM_MIDDLE_CELLS + len(CONFIG_ROTS) ff_generator = generator_fn(input_dim=2 * nb_g_z, nb_output_channels=2) g = Graph() g.add_input("z", (nb_g_z, )) g.add_input("grid_config", (nb_grid_config, )) g.add_node(Dense(nb_g_z, activation='relu'), "dense1", input="grid_config") g.add_node(ff_generator, "dcgan", inputs=["z", "dense1"], merge_mode='concat') g.add_output("output", input="dcgan") g.add_node(Dense(1, activation='sigmoid'), "alpha", input="dense1", create_output=True) def reconstruct(g_outmap): g_out = g_outmap["output"] alpha = g_outmap["alpha"] alpha = 0.5 * alpha + 0.5 alpha = alpha.reshape((batch_size, 1, 1, 1)) m = g_out[:, :1] v = g_out[:, 1:] return (alpha * m + (1 - alpha) * v).reshape( (batch_size, 1, TAG_SIZE, TAG_SIZE)) grid_loss_weight = theano.shared(np.cast[np.float32](1)) def grid_loss(grid_idx, g_outmap): g_out = g_outmap['output'] m = g_out[:, :1] b = binary_mask(grid_idx, ignore=0.0, white=1.) return grid_loss_weight * mse(b, m) gan = GAN(g, asgraph(discriminator_fn(), input_name=GAN.d_input), z_shape=(batch_size, nb_g_z), reconstruct_fn=reconstruct) mogan = MOGAN(gan, grid_loss, lambda: Adam(lr=0.0002, beta_1=0.5), gan_regulizer=GAN.L2Regularizer()) return mogan, grid_loss_weight
from keras.layers.core import Dense, Activation, Flatten, Merge, Reshape from keras.optimizers import SGD from keras.utils import np_utils from keras.utils.io_utils import HDF5Matrix from seya.layers.attention import GaussianModel, BallModel, ConvertToXY from PIL import Image BATCHSIZE = 1 NB_EPOCH = 100 HEIGHT = 96 WIDTH = 96 BALLNUM = 48 SIGMA = 30 ###96*96 graph = Graph() graph.add_input(name='data96', ndim=4) graph.add_node(Convolution2D(16, 1, 5, 5, activation='relu'), name='conv196', input='data96') graph.add_node(MaxPooling2D(poolsize=(4, 4)), name='pool196', input='conv196') graph.add_node(Convolution2D(32, 16, 2, 2, activation='relu'), name='conv296', input='pool196') graph.add_node(MaxPooling2D(poolsize=(2, 2)), name='pool296', input='conv296') graph.add_node(Flatten(), name='flatten96', input='pool296') ###48*48 graph.add_node(MaxPooling2D(poolsize=(2, 2)), name='data48', input='data96') graph.add_node(Convolution2D(16, 1, 3, 3, activation='relu'), name='conv148', input='data48')
def get_model(inputdim, outputdim, regularization_strength=0.01, lr=0.000, cosine=False, **kwargs): transformation = Dense(inputdim, init='identity', W_constraint=Orthogonal()) model = Graph() model.add_input(name='embeddings1', input_shape=(inputdim, )) model.add_input(name='embeddings2', input_shape=(inputdim, )) model.add_shared_node(transformation, name='transformation', inputs=['embeddings1', 'embeddings2'], outputs=['transformed1', 'transformed2']) model.add_node(Lambda(lambda x: x[:, :outputdim]), input='transformed1', name='projected1') model.add_node(Lambda(lambda x: -x[:, :outputdim]), input='transformed2', name='negprojected2') if cosine: model.add_node( Lambda(lambda x: x / K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))), name='normalized1', input='projected1') model.add_node( Lambda(lambda x: x / K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))), name='negnormalized2', input='negprojected2') model.add_node( Lambda(lambda x: K.reshape(K.sum(x, axis=1), (x.shape[0], 1))), name='distances', inputs=['normalized1', 'negnormalized2'], merge_mode='mul') else: model.add_node(Lambda(lambda x: K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))), name='distances', inputs=['projected1', 'negprojected2'], merge_mode='sum') model.add_output(name='y', input='distances') model.compile(loss={ 'y': lambda y, d: K.mean(y * d) }, optimizer=SimpleSGD()) return model
def test_1o_1i(self): print('test a non-sequential graph with 1 input and 1 output') graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_node(Dense(16), name='dense1', input='input1') graph.add_node(Dense(4), name='dense2', input='input1') graph.add_node(Dense(4), name='dense3', input='dense1') graph.add_output(name='output1', inputs=['dense2', 'dense3'], merge_mode='sum') graph.compile('rmsprop', {'output1': 'mse'}) history = graph.fit({ 'input1': X_train, 'output1': y_train }, nb_epoch=10) out = graph.predict({'input1': X_test}) assert (type(out == dict)) assert (len(out) == 1) loss = graph.test_on_batch({'input1': X_test, 'output1': y_test}) loss = graph.train_on_batch({'input1': X_test, 'output1': y_test}) loss = graph.evaluate({'input1': X_test, 'output1': y_test}) print(loss) assert (loss < 2.5)
X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print( 'Convert class vector to binary class matrix (for use with categorical_crossentropy)' ) Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) # ===== Beginning of model definition ===== print('Build model...') # Since we have multiple convolutional layers in parallel, # Graph is needed instead of Sequential. model = Graph() model.add_input(name='input', input_shape=(maxlen, ), dtype='int') # Start off with an efficient embedding layer which maps # our vocab indices into embedding_dims dimensions embedding_layer = Embedding(max_features, embedding_dims, input_length=maxlen, weights=[w2v_word_vec], trainable=True) model.add_node(embedding_layer, name='embedding', input='input') model.add_node(Dropout(0.5), name='embedding_dropout', input='embedding') # Add several Convolutional1D, learning nb_filter word group filters # with different filter_lengths
if args.vocab_size: print "Overriding original vocabulary size", vocab_size vocab_size = args.vocab_size print "Vocabulary size:", vocab_size, "Texts: ", texts.shape if args.rnn == 'GRU': RNN = recurrent.GRU elif args.rnn == 'LSTM': RNN = recurrent.LSTM else: assert False, "Invalid RNN" print "Creating model..." if args.bidirectional: model = Graph() model.add_input(name="input", batch_input_shape=(args.batch_size, ) + texts.shape[1:], dtype="uint") model.add_node(Embedding(vocab_size, args.embed_size, mask_zero=True), name="embed", input='input') for i in xrange(args.layers): model.add_node(RNN(args.hidden_size, return_sequences=False if i + 1 == args.layers else True), name='forward' + str(i + 1), input='embed' if i == 0 else 'dropout' + str(i) if args.dropout > 0 else None, inputs=['forward' + str(i), 'backward' + str(i)] if i > 0 and args.dropout == 0 else [])
class Aesthetics(): def __init__(self): self.layer_list = [] self.config_file = None self.io = EmbeddingIO(None) self.init = False self.cfgs = None #def def configure(self, config_file): self.config_file = config_file self.init = False if not os.path.exists(self.config_file): self.io.print_error( 'Could not find config file for InceptionV3 {0}'.format( self.config_file)) self.init = False return #if pfile = open(self.config_file, 'r') self.cfgs = yaml.load(pfile) pfile.close() self.init = True #def def add_to_graph(self, *args, **kwargs): self.model.add_node(*args, **kwargs) self.last_added_node = kwargs['name'] self.layer_list.append(kwargs['name']) return kwargs['name'] #def def add_bn_conv_layer(self, *args, **kwargs): layer_name = kwargs['name'] input_layer = kwargs['input'] del kwargs['name'] del kwargs['input'] if 'padding' in kwargs: layer_name = layer_name + '_pad' self.add_to_graph(ZeroPadding2D(padding=kwargs['padding']), name=layer_name, input=input_layer) input_layer = layer_name del kwargs['padding'] #if # CONV with linear activation by default layer_name = layer_name + '_conv' self.add_to_graph(Convolution2D(*args, **kwargs), name=layer_name, input=input_layer) # Batch normalization added directly on output of a linear layer input_layer = layer_name layer_name = layer_name + '_bn' _ = self.add_to_graph(BatchNormalization(mode=0, epsilon=0.0001, axis=1), name=layer_name, input=input_layer) # Standard normalization input_layer = layer_name layer_name = layer_name + '_nonlin' _ = self.add_to_graph(Activation('relu'), name=layer_name, input=input_layer) return layer_name def add_inceptionF(self, input_layer, list_nb_filter, base_name): tower_1_1 = self.add_bn_conv_layer(name=base_name + 'tower_1_1', input=input_layer, nb_filter=list_nb_filter[0], nb_row=1, nb_col=1) tower_2_1 = self.add_bn_conv_layer(name=base_name + 'tower_2_1', input=input_layer, nb_filter=list_nb_filter[1], nb_row=1, nb_col=1) tower_2_2 = self.add_bn_conv_layer(name=base_name + 'tower_2_2', input=tower_2_1, nb_filter=list_nb_filter[2], nb_row=3, nb_col=3) tower_3_1 = self.add_bn_conv_layer(name=base_name + 'tower_3_1', input=input_layer, nb_filter=list_nb_filter[3], nb_row=1, nb_col=1) tower_3_2 = self.add_bn_conv_layer(name=base_name + 'tower_3_2', input=tower_3_1, nb_filter=list_nb_filter[4], nb_row=5, nb_col=5) tower_4_1 = self.add_to_graph(MaxPooling2D((3, 3), strides=(1, 1), border_mode='same'), name=base_name + 'tower_4_1', input=input_layer) tower_4_2 = self.add_bn_conv_layer(name=base_name + 'tower_4_2', input=tower_4_1, nb_filter=list_nb_filter[5], nb_row=1, nb_col=1) self.add_to_graph(Activation("linear"), name=base_name, inputs=[tower_1_1, tower_2_2, tower_3_2, tower_4_2], merge_mode="concat", concat_axis=1) self.io.print_info('Added Inception {0}'.format(base_name)) def add_inceptionA(self, input_layer, list_nb_filter, base_name): l1_1 = self.add_bn_conv_layer(name=base_name + '_l1_1', input=input_layer, nb_filter=list_nb_filter[0][0], nb_row=1, nb_col=1) l2_1 = self.add_bn_conv_layer(name=base_name + '_l2_1', input=input_layer, nb_filter=list_nb_filter[1][0], nb_row=1, nb_col=1) l2_2 = self.add_bn_conv_layer(name=base_name + '_l2_2', input=l2_1, nb_filter=list_nb_filter[1][1], nb_row=5, nb_col=5, padding=(2, 2)) l3_1 = self.add_bn_conv_layer(name=base_name + '_l3_1', input=input_layer, nb_filter=list_nb_filter[2][0], nb_row=1, nb_col=1) l3_2 = self.add_bn_conv_layer(name=base_name + '_l3_2', input=l3_1, nb_filter=list_nb_filter[2][1], nb_row=3, nb_col=3, padding=(1, 1)) l3_3 = self.add_bn_conv_layer(name=base_name + '_l3_3', input=l3_2, nb_filter=list_nb_filter[2][2], nb_row=3, nb_col=3, padding=(1, 1)) l4_1 = self.add_to_graph(ZeroPadding2D(padding=(1, 1)), name=base_name + '_14_1', input=input_layer) l4_2 = self.add_to_graph(AveragePooling2D(pool_size=(3, 3), strides=(1, 1)), name=base_name + '_14_2', input=l4_1) l4_3 = self.add_bn_conv_layer(name=base_name + '_l4_3', input=l4_2, nb_filter=list_nb_filter[3][0], nb_row=1, nb_col=1) self.add_to_graph(Activation("linear"), name=base_name, inputs=[l1_1, l2_2, l3_3, l4_3], merge_mode="concat", concat_axis=1) self.io.print_info('Added Inception-A {0}'.format(base_name)) # https://github.com/fchollet/keras/issues/391 def add_inceptionB(self, input_layer, list_nb_filter, base_name): l1_1 = self.add_bn_conv_layer(name=base_name + '_l1_1', input=input_layer, nb_filter=list_nb_filter[0][0], nb_row=3, nb_col=3, subsample=(2, 2)) l2_1 = self.add_bn_conv_layer(name=base_name + '_l2_1', input=input_layer, nb_filter=list_nb_filter[1][0], nb_row=1, nb_col=1) l2_2 = self.add_bn_conv_layer(name=base_name + '_l2_2', input=l2_1, nb_filter=list_nb_filter[1][1], nb_row=3, nb_col=3, padding=(1, 1)) l2_3 = self.add_bn_conv_layer(name=base_name + '_l2_3', input=l2_2, nb_filter=list_nb_filter[1][2], nb_row=3, nb_col=3, subsample=(2, 2)) l3_1 = self.add_to_graph(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)), name=base_name + '_13_1', input=input_layer) self.add_to_graph(Activation("linear"), name=base_name, inputs=[l1_1, l2_3, l3_1], merge_mode="concat", concat_axis=1) self.io.print_info('Added Inception-B {0}'.format(base_name)) # https://github.com/fchollet/keras/issues/391 def add_inceptionC(self, input_layer, list_nb_filter, base_name): l1_1 = self.add_bn_conv_layer(name=base_name + '_l1_1', input=input_layer, nb_filter=list_nb_filter[0][0], nb_row=1, nb_col=1) l2_1 = self.add_bn_conv_layer(name=base_name + '_l2_1', input=input_layer, nb_filter=list_nb_filter[1][0], nb_row=1, nb_col=1) l2_2 = self.add_bn_conv_layer(name=base_name + '_l2_2', input=l2_1, nb_filter=list_nb_filter[1][1], nb_row=1, nb_col=7, padding=(0, 3)) l2_3 = self.add_bn_conv_layer(name=base_name + '_l2_3', input=l2_2, nb_filter=list_nb_filter[1][2], nb_row=7, nb_col=1, padding=(3, 0)) ## padding and nb_row might not match with the lasagne weights l3_1 = self.add_bn_conv_layer(name=base_name + '_l3_1', input=input_layer, nb_filter=list_nb_filter[2][0], nb_row=1, nb_col=1) l3_2 = self.add_bn_conv_layer(name=base_name + '_l3_2', input=l3_1, nb_filter=list_nb_filter[2][1], nb_row=7, nb_col=1, padding=(3, 0)) l3_3 = self.add_bn_conv_layer(name=base_name + '_l3_3', input=l3_2, nb_filter=list_nb_filter[2][2], nb_row=1, nb_col=7, padding=(0, 3)) l3_4 = self.add_bn_conv_layer(name=base_name + '_l3_4', input=l3_3, nb_filter=list_nb_filter[2][3], nb_row=7, nb_col=1, padding=(3, 0)) l3_5 = self.add_bn_conv_layer(name=base_name + '_l3_5', input=l3_4, nb_filter=list_nb_filter[2][4], nb_row=1, nb_col=7, padding=(0, 3)) l4_1 = self.add_to_graph(ZeroPadding2D(padding=(1, 1)), name=base_name + '_14_1', input=input_layer) l4_2 = self.add_to_graph(AveragePooling2D(pool_size=(3, 3), strides=(1, 1)), name=base_name + '_14_2', input=l4_1) l4_3 = self.add_bn_conv_layer(name=base_name + '_l4_3', input=l4_2, nb_filter=list_nb_filter[3][0], nb_row=1, nb_col=1) self.add_to_graph(Activation("linear"), name=base_name, inputs=[l1_1, l2_3, l3_5, l4_3], merge_mode="concat", concat_axis=1) self.io.print_info('Added Inception-C {0}'.format(base_name)) # https://github.com/fchollet/keras/issues/391 def add_inceptionD(self, input_layer, list_nb_filter, base_name): l1_1 = self.add_bn_conv_layer(name=base_name + '_l1_1', input=input_layer, nb_filter=list_nb_filter[0][0], nb_row=1, nb_col=1) l1_2 = self.add_bn_conv_layer(name=base_name + '_l1_2', input=l1_1, nb_filter=list_nb_filter[0][1], nb_row=3, nb_col=3, subsample=(2, 2)) l2_1 = self.add_bn_conv_layer(name=base_name + '_l2_1', input=input_layer, nb_filter=list_nb_filter[1][0], nb_row=1, nb_col=1) l2_2 = self.add_bn_conv_layer(name=base_name + '_l2_2', input=l2_1, nb_filter=list_nb_filter[1][1], nb_row=1, nb_col=7, padding=(0, 3)) l2_3 = self.add_bn_conv_layer(name=base_name + '_l2_3', input=l2_2, nb_filter=list_nb_filter[1][2], nb_row=7, nb_col=1, padding=(3, 0)) l2_4 = self.add_bn_conv_layer(name=base_name + '_l2_4', input=l2_3, nb_filter=list_nb_filter[1][2], nb_row=3, nb_col=3, subsample=(2, 2)) l3_1 = self.add_to_graph(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)), name=base_name + '_13_1', input=input_layer) self.add_to_graph(Activation("linear"), name=base_name, inputs=[l1_2, l2_4, l3_1], merge_mode="concat", concat_axis=1) self.io.print_info('Added Inception-D {0}'.format(base_name)) # https://github.com/fchollet/keras/issues/391 def add_inceptionE(self, input_layer, list_nb_filter, base_name, pool_mode): l1_1 = self.add_bn_conv_layer(name=base_name + '_l1_1', input=input_layer, nb_filter=list_nb_filter[0][0], nb_row=1, nb_col=1) l2_1 = self.add_bn_conv_layer(name=base_name + '_l2_1', input=input_layer, nb_filter=list_nb_filter[1][0], nb_row=1, nb_col=1) l2_2a = self.add_bn_conv_layer(name=base_name + '_l2_2a', input=l2_1, nb_filter=list_nb_filter[1][1], nb_row=1, nb_col=3, padding=(0, 1)) l2_2b = self.add_bn_conv_layer(name=base_name + '_l2_2b', input=l2_1, nb_filter=list_nb_filter[1][1], nb_row=3, nb_col=1, padding=(1, 0)) l3_1 = self.add_bn_conv_layer(name=base_name + '_l3_1', input=input_layer, nb_filter=list_nb_filter[2][0], nb_row=1, nb_col=1) l3_2 = self.add_bn_conv_layer(name=base_name + '_l3_2', input=l3_1, nb_filter=list_nb_filter[2][1], nb_row=3, nb_col=3, padding=(1, 1)) l3_3a = self.add_bn_conv_layer(name=base_name + '_l3_3a', input=l3_2, nb_filter=list_nb_filter[2][2], nb_row=1, nb_col=3, padding=(0, 1)) l3_3b = self.add_bn_conv_layer(name=base_name + '_l3_3b', input=l3_2, nb_filter=list_nb_filter[2][3], nb_row=3, nb_col=1, padding=(1, 0)) l4_1 = self.add_to_graph(ZeroPadding2D(padding=(1, 1)), name=base_name + '_14_1', input=input_layer) l4_2 = self.add_to_graph(pooling_dict[pool_mode](pool_size=(3, 3), strides=(1, 1)), name=base_name + '_14_2', input=l4_1) l4_3 = self.add_bn_conv_layer(name=base_name + '_l4_3', input=l4_2, nb_filter=list_nb_filter[3][0], nb_row=1, nb_col=1) self.add_to_graph(Activation("linear"), name=base_name, inputs=[l1_1, l2_2a, l2_2b, l3_3a, l3_3b, l4_3], merge_mode="concat", concat_axis=1) self.io.print_info('Added Inception-E {0}'.format(base_name)) # https://github.com/fchollet/keras/issues/391 def define(self): try: self.model = Graph() self.model.add_input(name='input', input_shape=(self.cfgs['n_channels'], self.cfgs['image_height'], self.cfgs['image_width'])) # # Part of the network which is defined in the config file should move here # cgfs_nodes = self.cfgs['nodes'] for node in cgfs_nodes: if not node['type'] == 'Activation': self.add_to_graph( layer_dict[node['type']](**node['parameter']), name=node['name'], input=node['input']) else: self.add_to_graph(layer_dict[node['type']]( node['parameter']['mode']), name=node['name'], input=node['input']) #if self.io.print_info('Added {1}:{0}'.format( node['type'], node['name'])) #for self.add_inceptionA(input_layer=self.last_added_node, list_nb_filter=((64, ), (48, 64), (64, 96, 96), (32, )), base_name='mixed_0') self.add_inceptionA(input_layer=self.last_added_node, list_nb_filter=((64, ), (48, 64), (64, 96, 96), (64, )), base_name='mixed_1') self.add_inceptionA(input_layer=self.last_added_node, list_nb_filter=((64, ), (48, 64), (64, 96, 96), (64, )), base_name='mixed_2') self.add_to_graph(layer_dict[self.cfgs['pooling_type']]( pool_size=(self.cfgs['pooling_kernel_size'], self.cfgs['pooling_kernel_size'])), name='mixed_pool_0', input='mixed_0') self.io.print_info('Added {1}:{0}'.format( self.cfgs['pooling_type'], self.last_added_node)) self.add_to_graph(layer_dict[self.cfgs['pooling_type']]( pool_size=(self.cfgs['pooling_kernel_size'], self.cfgs['pooling_kernel_size'])), name='mixed_pool_1', input='mixed_1') self.io.print_info('Added {1}:{0}'.format( self.cfgs['pooling_type'], self.last_added_node)) self.add_to_graph(layer_dict[self.cfgs['pooling_type']]( pool_size=(self.cfgs['pooling_kernel_size'], self.cfgs['pooling_kernel_size'])), name='mixed_pool_2', input='mixed_2') self.io.print_info('Added {1}:{0}'.format( self.cfgs['pooling_type'], self.last_added_node)) self.add_to_graph(Flatten(), name='mixed_flatten_0', input='mixed_pool_0') self.add_to_graph(Dense(self.cfgs['nb_dense'], activation=self.cfgs['fc_non_linearity']), name='mixed_dense_0', input='mixed_flatten_0') self.io.print_info('Added {1}:{0}'.format('Dense', self.last_added_node)) self.add_to_graph(Flatten(), name='mixed_flatten_1', input='mixed_pool_1') self.add_to_graph(Dense(self.cfgs['nb_dense'], activation=self.cfgs['fc_non_linearity']), name='mixed_dense_1', input='mixed_flatten_1') self.io.print_info('Added {1}:{0}'.format('Dense', self.last_added_node)) self.add_to_graph(Flatten(), name='mixed_flatten_2', input='mixed_pool_2') self.add_to_graph(Dense(self.cfgs['nb_dense'], activation=self.cfgs['fc_non_linearity']), name='mixed_dense_2', input='mixed_flatten_2') self.io.print_info('Added {1}:{0}'.format('Dense', self.last_added_node)) self.add_to_graph( Activation("linear"), name='concat_0', inputs=['mixed_dense_0', 'mixed_dense_1', 'mixed_dense_2'], merge_mode="concat", concat_axis=1) _ = self.add_to_graph(BatchNormalization(mode=0, epsilon=0.0001, axis=1), name='bn_norm_concat_0', input='concat_0') self.add_to_graph(Activation(self.cfgs['concat_non_linearity']), name='bn_concat_0_nonlin', input='bn_norm_concat_0', merge_mode="concat", concat_axis=1) self.add_to_graph(Dense(self.cfgs['nb_classes'], activation=self.cfgs['activation']), name='aesthetics', input='bn_concat_0_nonlin') self.model.add_output(name='output', input='aesthetics') if not self.cfgs['model_weights_file'] == None: self.init_from_this() #if except Exception as err: self.io.print_error('Error configuring the model, {0}'.format(err)) self.init = False return #try self.init = True #def def init_from_this(self): weights_file = self.cfgs['model_weights_file'] if not weights_file == 'None': self.load_weights(weights_file) self.io.print_info( 'Weights Initalized from {0}'.format(weights_file)) #if #def def load_weights(self, filepath): if filepath.endswith('.npz'): pfile = open(filepath, 'r') graph = np.load(pfile)['graph'].item() for node_name, weights in graph.items(): if node_name in self.cfgs['ignore_while_loading']: self.io.print_warning( 'Ignoring weights from {0}'.format(node_name)) continue #if self.io.print_info( 'Transfering parameters from {0}'.format(node_name)) self.model.nodes[node_name].set_weights(weights) #for pfile.close() elif filepath.endswith('.hdf5'): self.model.load_weights(filepath) self.io.print_info('Ported weights from : {}'.format(filepath)) else: self.io.print_error( 'Unknown model weights file {}'.format(filepath)) #if def compile(self, compile_cfgs): try: opt = optimizer_dict[compile_cfgs['optimizer']]( lr=compile_cfgs['lr'], epsilon=compile_cfgs['epsilon']) self.model.compile( loss={'output': loss_functions_dict[compile_cfgs['loss']]}, optimizer=opt) except Exception as e: self.io.print_error('Error configuring the model, {0}'.format(e)) self.init = False return #try self.init = True
def test_TensorBoard(): import shutil import tensorflow as tf import keras.backend.tensorflow_backend as KTF old_session = KTF.get_session() filepath = './logs' (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples, nb_test=test_samples, input_shape=(input_dim, ), classification=True, nb_class=nb_class) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) def data_generator(train): if train: max_batch_index = len(X_train) // batch_size else: max_batch_index = len(X_test) // batch_size i = 0 while 1: if train: yield (X_train[i * batch_size:(i + 1) * batch_size], y_train[i * batch_size:(i + 1) * batch_size]) else: yield (X_test[i * batch_size:(i + 1) * batch_size], y_test[i * batch_size:(i + 1) * batch_size]) i += 1 i = i % max_batch_index def data_generator_graph(train): while 1: if train: yield {'X_vars': X_train, 'output': y_train} else: yield {'X_vars': X_test, 'output': y_test} # case 1 Sequential with tf.Graph().as_default(): session = tf.Session('') KTF.set_session(session) model = Sequential() model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(nb_class, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1) cbks = [tsb] # fit with validation data model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2) # fit with validation data and accuracy model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2) # fit generator with validation data model.fit_generator(data_generator(True), len(X_train), nb_epoch=2, validation_data=(X_test, y_test), callbacks=cbks) # fit generator without validation data model.fit_generator(data_generator(True), len(X_train), nb_epoch=2, callbacks=cbks) # fit generator with validation data and accuracy model.fit_generator(data_generator(True), len(X_train), nb_epoch=2, validation_data=(X_test, y_test), callbacks=cbks) # fit generator without validation data and accuracy model.fit_generator(data_generator(True), len(X_train), nb_epoch=2, callbacks=cbks) assert os.path.exists(filepath) shutil.rmtree(filepath) # case 2 Graph with tf.Graph().as_default(): session = tf.Session('') KTF.set_session(session) model = Graph() model.add_input(name='X_vars', input_shape=(input_dim, )) model.add_node(Dense(nb_hidden, activation="sigmoid"), name='Dense1', input='X_vars') model.add_node(Dense(nb_class, activation="softmax"), name='last_dense', input='Dense1') model.add_output(name='output', input='last_dense') model.compile(optimizer='sgd', loss={'output': 'mse'}) tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1) cbks = [tsb] # fit with validation model.fit({ 'X_vars': X_train, 'output': y_train }, batch_size=batch_size, validation_data={ 'X_vars': X_test, 'output': y_test }, callbacks=cbks, nb_epoch=2) # fit wo validation model.fit({ 'X_vars': X_train, 'output': y_train }, batch_size=batch_size, callbacks=cbks, nb_epoch=2) # fit generator with validation model.fit_generator(data_generator_graph(True), 1000, nb_epoch=2, validation_data={ 'X_vars': X_test, 'output': y_test }, callbacks=cbks) # fit generator wo validation model.fit_generator(data_generator_graph(True), 1000, nb_epoch=2, callbacks=cbks) assert os.path.exists(filepath) shutil.rmtree(filepath) KTF.set_session(old_session)
def test_create_output(): # test create_output argument graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_node(Dense(16), name='dense1', input='input1') graph.add_node(Dense(4), name='dense2', input='input1') graph.add_node(Dense(4), name='dense3', input='dense1') graph.add_node(Dense(4), name='output1', inputs=['dense2', 'dense3'], merge_mode='sum', create_output=True) graph.compile('rmsprop', {'output1': 'mse'}) history = graph.fit({ 'input1': X_train_graph, 'output1': y_train_graph }, nb_epoch=10) out = graph.predict({'input1': X_test_graph}) assert (type(out == dict)) assert (len(out) == 1) loss = graph.test_on_batch({ 'input1': X_test_graph, 'output1': y_test_graph }) loss = graph.train_on_batch({ 'input1': X_test_graph, 'output1': y_test_graph }) loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}) assert (loss < 2.5)
def test_count_params(): # test count params nb_units = 100 nb_classes = 2 graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_input(name='input2', input_shape=(32, )) graph.add_node(Dense(nb_units), name='dense1', input='input1') graph.add_node(Dense(nb_classes), name='dense2', input='input2') graph.add_node(Dense(nb_classes), name='dense3', input='dense1') graph.add_output(name='output', inputs=['dense2', 'dense3'], merge_mode='sum') n = 32 * nb_units + nb_units n += 32 * nb_classes + nb_classes n += nb_units * nb_classes + nb_classes assert (n == graph.count_params()) graph.compile('rmsprop', {'output': 'binary_crossentropy'}) assert (n == graph.count_params())
def test_lambda(): (X_train, y_train), (X_test, y_test) = _get_test_data() def func(X): s = X[0] for i in range(1, len(X)): s += X[i] return s def activation(X): return K.softmax(X) def output_shape(input_shapes): return input_shapes[0] left = Sequential() left.add(Dense(nb_hidden, input_shape=(input_dim, ))) left.add(Activation('relu')) right = Sequential() right.add(Dense(nb_hidden, input_shape=(input_dim, ))) right.add(Activation('relu')) model = Sequential() model.add( LambdaMerge([left, right], function=func, output_shape=output_shape)) model.add(Dense(nb_class)) model.add(Lambda(activation)) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_data=([X_test, X_test], y_test)) model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_data=([X_test, X_test], y_test)) model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_split=0.1) model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_split=0.1) model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False) loss = model.evaluate([X_test, X_test], y_test, verbose=0) assert (loss < 0.8) model.predict([X_test, X_test], verbose=0) model.predict_classes([X_test, X_test], verbose=0) model.predict_proba([X_test, X_test], verbose=0) model.get_config(verbose=0) # test weight saving fname = 'test_lambda_temp.h5' model.save_weights(fname, overwrite=True) left = Sequential() left.add(Dense(nb_hidden, input_shape=(input_dim, ))) left.add(Activation('relu')) right = Sequential() right.add(Dense(nb_hidden, input_shape=(input_dim, ))) right.add(Activation('relu')) model = Sequential() model.add( LambdaMerge([left, right], function=func, output_shape=output_shape)) model.add(Dense(nb_class)) model.add(Lambda(activation)) model.load_weights(fname) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') os.remove(fname) nloss = model.evaluate([X_test, X_test], y_test, verbose=0) assert (loss == nloss) # test "join" mode in Lambda def difference(input_dict): assert (len(input_dict) == 2) keys = list(input_dict.keys()) return input_dict[keys[0]] - input_dict[keys[1]] g = Graph() g.add_input(name='input_a', input_shape=(2, )) g.add_input(name='input_b', input_shape=(2, )) g.add_node(Lambda(difference), inputs=['input_a', 'input_b'], merge_mode='join', name='d') g.add_output(name='output', input='d') g.compile(loss={'output': 'categorical_crossentropy'}, optimizer='rmsprop')
def build_graph_noCNN(n_variables): ''' Creates the Graph component of the model, i.e., this creates the gru component with no CNN ''' graph = Graph() graph.add_input(name='data', input_shape=(N_TRACKS, n_variables)) n_gram = 1 graph.add_node(GRU(25), name='gru_fwd_%s' % n_gram, input='data') graph.add_node(GRU(25, go_backwards=True), name='gru_bwd_%s' % n_gram, input='data') pass_thru = Lambda(lambda x: x) graph.add_node(pass_thru, name='unit_{}'.format(n_gram), inputs=['gru_fwd_%s' % n_gram, 'gru_bwd_%s' % n_gram]) graph.add_node(Dropout(0.4), name='dropout', input='unit_{}'.format(n_gram), create_output=True) return graph
#################### # save graph model # #################### X = np.random.random((100, 32)) X2 = np.random.random((100, 32)) y = np.random.random((100, 4)) y2 = np.random.random((100, )) (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(32, ), classification=False, output_shape=(4, )) graph = Graph() graph.add_input(name='input1', ndim=2) graph.add_node(Dense(32, 16), name='dense1', input='input1') graph.add_node(Dense(32, 4), name='dense2', input='input1') graph.add_node(Dense(16, 4), name='dense3', input='dense1') graph.add_output(name='output1', inputs=['dense2', 'dense3'], merge_mode='sum') graph.compile('rmsprop', {'output1': 'mse'}) graph.get_config(verbose=1) history = graph.fit({'input1': X_train, 'output1': y_train}, nb_epoch=10) original_pred = graph.predict({'input1': X_test})
def __init__(self, task_types, model_params, initialize_raw_model=True): super(MultiTaskDNN, self).__init__(task_types, model_params, initialize_raw_model=initialize_raw_model) if initialize_raw_model: sorted_tasks = sorted(task_types.keys()) (n_inputs, ) = model_params["data_shape"] model = Graph() model.add_input(name="input", input_shape=(n_inputs, )) prev_layer = "input" for ind, layer in enumerate(range(model_params["nb_layers"])): dense_layer_name = "dense%d" % ind activation_layer_name = "activation%d" % ind batchnorm_layer_name = "batchnorm%d" % ind dropout_layer_name = "dropout%d" % ind model.add_node(Dense(model_params["nb_hidden"], init=model_params["init"]), name=dense_layer_name, input=prev_layer) prev_layer = dense_layer_name if model_params["batchnorm"]: model.add_node(BatchNormalization(), input=prev_layer, name=batchnorm_layer_name) prev_layer = batchnorm_layer_name model.add_node(Activation(model_params["activation"]), name=activation_layer_name, input=prev_layer) prev_layer = activation_layer_name if model_params["dropout"] > 0: model.add_node(Dropout(model_params["dropout"]), name=dropout_layer_name, input=prev_layer) prev_layer = dropout_layer_name for ind, task in enumerate(sorted_tasks): task_type = task_types[task] if task_type == "classification": model.add_node(Dense(2, init=model_params["init"], activation="softmax"), name="dense_head%d" % ind, input=prev_layer) elif task_type == "regression": model.add_node(Dense(1, init=model_params["init"]), name="dense_head%d" % ind, input=prev_layer) model.add_output(name="task%d" % ind, input="dense_head%d" % ind) loss_dict = {} for ind, task in enumerate(sorted_tasks): task_type, taskname = task_types[task], "task%d" % ind if task_type == "classification": loss_dict[taskname] = "binary_crossentropy" elif task_type == "regression": loss_dict[taskname] = "mean_squared_error" sgd = SGD(lr=model_params["learning_rate"], decay=model_params["decay"], momentum=model_params["momentum"], nesterov=model_params["nesterov"]) model.compile(optimizer=sgd, loss=loss_dict) self.raw_model = model
def test_2o_1i_weights(): # test a non-sequential graph with 1 input and 2 outputs graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_node(Dense(16), name='dense1', input='input1') graph.add_node(Dense(4), name='dense2', input='input1') graph.add_node(Dense(1), name='dense3', input='dense1') graph.add_output(name='output1', input='dense2') graph.add_output(name='output2', input='dense3') graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'}) graph.fit( { 'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph }, nb_epoch=10) out = graph.predict({'input1': X_test_graph}) assert (type(out == dict)) assert (len(out) == 2) loss = graph.test_on_batch({ 'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph }) loss = graph.train_on_batch({ 'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph }) loss = graph.evaluate({ 'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph }) assert (loss < 4.) # test weight saving fname = 'test_2o_1i_weights_temp.h5' graph.save_weights(fname, overwrite=True) graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_node(Dense(16), name='dense1', input='input1') graph.add_node(Dense(4), name='dense2', input='input1') graph.add_node(Dense(1), name='dense3', input='dense1') graph.add_output(name='output1', input='dense2') graph.add_output(name='output2', input='dense3') graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'}) graph.load_weights('test_2o_1i_weights_temp.h5') os.remove(fname) nloss = graph.evaluate({ 'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph }) assert (loss == nloss)
# Model learning and prediction # TODO: test different learning algorithms y_train = np.array(labels_train) y_test = np.array(labels_test) y_train = (y_train == 1).astype('float32') y_test = (y_test == 1).astype('float32') print("Pad sequences (samples x time)") X_train = sequence.pad_sequences(data_train, maxlen=maxlen) X_test = sequence.pad_sequences(data_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print('Build model...') model = Graph() model.add_input(name='input', input_shape=(maxlen, ), dtype=int) model.add_node(Embedding(max_features, 128, input_length=maxlen), name='embedding', input='input') model.add_node(LSTM(64), name='forward', input='embedding') model.add_node(LSTM(64, go_backwards=True), name='backward', input='embedding') model.add_node(Dropout(0.5), name='dropout', inputs=['forward', 'backward']) model.add_node(Dense(1, activation='sigmoid'), name='sigmoid', input='dropout') model.add_output(name='output', input='sigmoid') # try using different optimizers and different optimizer configs model.compile('adam', {'output': 'binary_crossentropy'}) print('Train...') model.fit({
def test_siamese_5(): graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_input(name='input2', input_shape=(32, )) graph.add_shared_node(Dense(16), name='shared1', inputs=['input1', 'input2']) graph.add_shared_node(Dense(4), name='shared2', inputs=['shared1']) graph.add_shared_node(Dense(4), name='shared3', inputs=['shared2'], outputs=['shared_output1', 'shared_output2']) graph.add_node(Dense(4), name='dense1', input='shared_output1') graph.add_node(Dense(4), name='dense2', input='shared_output2') graph.add_output(name='output1', inputs=['dense1', 'dense2'], merge_mode='sum') graph.compile('rmsprop', {'output1': 'mse'}) graph.fit( { 'input1': X_train_graph, 'input2': X2_train_graph, 'output1': y_train_graph }, nb_epoch=10) out = graph.predict({'input1': X_test_graph, 'input2': X2_test_graph}) assert (type(out == dict)) assert (len(out) == 1) loss = graph.test_on_batch({ 'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph }) loss = graph.train_on_batch({ 'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph }) loss = graph.evaluate({ 'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph }) assert (loss < 3.0) graph.get_config(verbose=1)
number_of_training_data, row_count, column_count, max_features, DEBUG) dt.saveImage(y_train[0], "gt.png", row_count, column_count, 3, True) print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print("Pad sequences (samples x time)") #X_train = sequence.pad_sequences(X_train, maxlen=maxlen) #X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) y_train = np.array(y_train) y_test = np.array(y_test) print('Build model...') model = Graph() model.add_input(name='input', input_shape=(None, max_features), dtype='float') #model.add_node(Embedding(3, 128, input_length=(maxlen, 3)),name='embedding', input='input') #model.add(TimeDistributedDense(2, init='uniform', input_dim=3), name = 'input') model.add_node(gl.GridLSTM(N_HIDDEN, return_sequences=True), name='forward', input='input') #model.add_node(LSTM(N_HIDDEN, return_sequences=True), name='forward_2', input='forward') #model.add_node(LSTM(N_HIDDEN, return_sequences=True), name='forward_3', input='forward_2') #model.add_node(LSTM(N_HIDDEN, return_sequences=True), name='forward_4', input='forward_3') #model.add_node(LSTM(N_HIDDEN, return_sequences=True), name='forward_5', input='forward_4') #model.add_node(LSTM(N_HIDDEN, return_sequences=True, go_backwards=True ), name='backward', input='input') model.add_node(Dropout(0.5), name='dropout', input='forward') #model.add_node(Merge(layers=['forward', 'backward'], mode='sum')) #model.add_node(Merge(layers=['forward', 'backward'], mode='sum'))
def test_1o_2i(): # test a non-sequential graph with 2 inputs and 1 output graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_input(name='input2', input_shape=(32, )) graph.add_node(Dense(16), name='dense1', input='input1') graph.add_node(Dense(4), name='dense2', input='input2') graph.add_node(Dense(4), name='dense3', input='dense1') graph.add_output(name='output1', inputs=['dense2', 'dense3'], merge_mode='sum') graph.compile('rmsprop', {'output1': 'mse'}) graph.fit( { 'input1': X_train_graph, 'input2': X2_train_graph, 'output1': y_train_graph }, nb_epoch=10) out = graph.predict({'input1': X_test_graph, 'input2': X2_test_graph}) assert (type(out == dict)) assert (len(out) == 1) loss = graph.test_on_batch({ 'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph }) loss = graph.train_on_batch({ 'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph }) loss = graph.evaluate({ 'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph }) assert (loss < 3.0) graph.get_config(verbose=1)
geodistance(results[i], test_labels[i]) for i in range(results.shape[0]) ]))) print("Median error = " + repr( np.median([ geodistance(results[i], test_labels[i]) for i in range(results.shape[0]) ]))) print("") print( "Method = CNN from the paper 'Convolutional Neural Networks for Sentence Classification'" ) np.random.seed(0) nb_filter = embeddings_dim model = Graph() model.add_input(name='input', input_shape=(max_sent_len, ), dtype=int) model.add_node(Embedding(max_features, embeddings_dim, input_length=max_sent_len, mask_zero=False, weights=[embedding_weights]), name='embedding', input='input') model.add_node(Dropout(0.25), name='dropout_embedding', input='embedding') for n_gram in [3, 5, 7]: model.add_node(Convolution1D(nb_filter=nb_filter, filter_length=n_gram, border_mode='valid', activation='relu', subsample_length=1,
def test_1o_1i_2(): # test a more complex non-sequential graph with 1 input and 1 output graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_node(Dense(16), name='dense1', input='input1') graph.add_node(Dense(4), name='dense2-0', input='input1') graph.add_node(Activation('relu'), name='dense2', input='dense2-0') graph.add_node(Dense(16), name='dense3', input='dense2') graph.add_node(Dense(4), name='dense4', inputs=['dense1', 'dense3'], merge_mode='sum') graph.add_output(name='output1', inputs=['dense2', 'dense4'], merge_mode='sum') graph.compile('rmsprop', {'output1': 'mse'}) graph.fit({'input1': X_train_graph, 'output1': y_train_graph}, nb_epoch=10) out = graph.predict({'input1': X_train_graph}) assert (type(out == dict)) assert (len(out) == 1) loss = graph.test_on_batch({ 'input1': X_test_graph, 'output1': y_test_graph }) loss = graph.train_on_batch({ 'input1': X_test_graph, 'output1': y_test_graph }) loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}) assert (loss < 2.5) graph.get_config(verbose=1) graph.summary()
def test_1o_1i(): # test a non-sequential graph with 1 input and 1 output np.random.seed(1337) graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_node(Dense(16), name='dense1', input='input1') graph.add_node(Dense(4), name='dense2', input='input1') graph.add_node(Dense(4), name='dense3', input='dense1') graph.add_output(name='output1', inputs=['dense2', 'dense3'], merge_mode='sum') graph.compile('rmsprop', {'output1': 'mse'}) graph.fit({'input1': X_train_graph, 'output1': y_train_graph}, nb_epoch=10) out = graph.predict({'input1': X_test_graph}) assert (type(out == dict)) assert (len(out) == 1) loss = graph.test_on_batch({ 'input1': X_test_graph, 'output1': y_test_graph }) loss = graph.train_on_batch({ 'input1': X_test_graph, 'output1': y_test_graph }) loss = graph.evaluate({ 'input1': X_test_graph, 'output1': y_test_graph }, verbose=0) assert (loss < 2.5) # test validation split graph.fit({ 'input1': X_train_graph, 'output1': y_train_graph }, validation_split=0.2, nb_epoch=1) # test validation data graph.fit({ 'input1': X_train_graph, 'output1': y_train_graph }, validation_data={ 'input1': X_train_graph, 'output1': y_train_graph }, nb_epoch=1)
def test_graph_fit_generator(): def data_generator_graph(train): while 1: if train: yield {'input1': X_train_graph, 'output1': y_train_graph} else: yield {'input1': X_test_graph, 'output1': y_test_graph} graph = Graph() graph.add_input(name='input1', input_shape=(32, )) graph.add_node(Dense(16), name='dense1', input='input1') graph.add_node(Dense(4), name='dense2', input='input1') graph.add_node(Dense(4), name='dense3', input='dense1') graph.add_output(name='output1', inputs=['dense2', 'dense3'], merge_mode='sum') graph.compile('rmsprop', {'output1': 'mse'}) graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4) graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4) graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4, validation_data={ 'input1': X_test_graph, 'output1': y_test_graph }) graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4, validation_data={ 'input1': X_test_graph, 'output1': y_test_graph }) graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4, validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3) graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4, validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3) gen_loss = graph.evaluate_generator(data_generator_graph(True), 128, verbose=0) assert (gen_loss < 3.) loss = graph.evaluate({ 'input1': X_test_graph, 'output1': y_test_graph }, verbose=0) assert (loss < 3.)
def run_model_varyembed(dataset, numhidden, hiddendim, idx2word, idx2label, w2v, basedir, embedding_dim=400, validate=True, num_epochs=30): train_toks, valid_toks, test_toks, \ train_lex, valid_lex, test_lex, \ train_y, valid_y, test_y = dataset maxlen = max([len(l) for l in train_lex]) if len(valid_lex) > 0: maxlen = max(maxlen, max([len(l) for l in valid_lex])) maxlen = max(maxlen, max([len(l) for l in test_lex])) vocsize = max(idx2word.keys()) + 1 nclasses = max(idx2label.keys()) + 1 # Pad inputs to max sequence length and turn into one-hot vectors train_lex = sequence.pad_sequences(train_lex, maxlen=maxlen) valid_lex = sequence.pad_sequences(valid_lex, maxlen=maxlen) test_lex = sequence.pad_sequences(test_lex, maxlen=maxlen) train_y = sequence.pad_sequences(train_y, maxlen=maxlen) valid_y = sequence.pad_sequences(valid_y, maxlen=maxlen) test_y = sequence.pad_sequences(test_y, maxlen=maxlen) train_y = vectorize_set(train_y, maxlen, nclasses) valid_y = vectorize_set(valid_y, maxlen, nclasses) test_y = vectorize_set(test_y, maxlen, nclasses) # Build the model ## BI-DIRECTIONAL print('Building the model...') H = numhidden model = Graph() model.add_input(name='input', input_shape=[maxlen], dtype='int') # Add embedding layer if w2v is None: model.add_node(Embedding(vocsize, embedding_dim, init='lecun_uniform', input_length=maxlen), name='embed', input='input') else: embeds = init_embedding_weights(idx2word, w2v) embed_dim = w2v.wv.syn0.shape[1] model.add_node(Embedding(vocsize, embed_dim, input_length=maxlen, weights=[embeds], mask_zero=True), name='embed', input='input') # Build first hidden layer model.add_node(LSTM(hiddendim, return_sequences=True, activation='tanh'), name='forward0', input='embed') model.add_node(Dropout(0.1), name='dropout0f', input='forward0') model.add_node(LSTM(hiddendim, return_sequences=True, go_backwards=True, activation='tanh'), name='backwards0', input='embed') model.add_node(Dropout(0.1), name='dropout0b', input='backwards0') # Build subsequent hidden layers if H > 1: for i in range(1, H): model.add_node(LSTM(hiddendim, return_sequences=True, activation='tanh'), name='forward%d' % i, input='dropout%df' % (i - 1)) model.add_node(Dropout(0.1), name='dropout%df' % i, input='forward%d' % i) model.add_node(LSTM(hiddendim, return_sequences=True, go_backwards=True, activation='tanh'), name='backwards%d' % i, input='dropout%db' % (i - 1)) model.add_node(Dropout(0.1), name='dropout%db' % i, input='backwards%d' % i) # Finish up the network model.add_node(TimeDistributedDense(nclasses), name='tdd', inputs=['dropout%df' % (H - 1), 'dropout%db' % (H - 1)], merge_mode='ave') model.add_node(Activation('softmax'), name='softmax', input='tdd') model.add_output(name='output', input='softmax') model.compile(optimizer='rmsprop', loss={'output': 'categorical_crossentropy'}) # Set up callbacks fileprefix = 'embed_varied_' am = approximateMatch.ApproximateMatch_SEQ(valid_toks, valid_y, valid_lex, idx2label, pred_dir=os.path.join( basedir, 'predictions'), fileprefix=fileprefix) mc = callbacks.ModelCheckpoint( os.path.join(basedir, 'models', 'embedding.model.weights.{epoch:02d}.hdf5')) cbs = [am, mc] if validate: early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=3) cbs.append(early_stopping) # Train the model print('Training...') hist = model.fit({ 'input': train_lex, 'output': train_y }, nb_epoch=num_epochs, batch_size=1, validation_data={ 'input': valid_lex, 'output': valid_y }, callbacks=cbs) if validate: val_f1, best_model = learning_curve( hist, preddir=os.path.join(basedir, 'predictions'), pltname=os.path.join( basedir, 'charts', 'hist_varyembed%d_nhidden%d.pdf' % (hiddendim, numhidden)), fileprefix=fileprefix) else: best_model = num_epochs - 1 val_f1 = 0.0 # Save model json_string = model.to_json() open(os.path.join(basedir, 'models', 'embedding_model_architecture.json'), 'w').write(json_string) # Test bestmodelfile = os.path.join( basedir, 'models', 'embedding.model.weights.%02d.hdf5' % best_model) shutil.copyfile(bestmodelfile, bestmodelfile.replace('.hdf5', '.best.hdf5')) if validate: model = model_from_json( open( os.path.join(basedir, 'models', 'embedding_model_architecture.json')).read()) model.load_weights(bestmodelfile) scores = predict_score(model, test_lex, test_toks, test_y, os.path.join(basedir, 'predictions'), idx2label, maxlen, fileprefix=fileprefix) scores['val_f1'] = val_f1 return scores, hist.history, best_model
class SiameseNet: ''' A rough implementation of Chopra et al. 2005's Siamese network. Essentially a wrapper of a Sequential which takes inputs from a Siamese portion and adds one more layer that merges the two identical legs (with a custom merge function). ''' # Defaults TRAINING_BATCH_SIZE = 64 TRAINING_NB_EPOCHS = 2 VALIDATION_BATCH_SIZE = 1 PREDICT_BATCH_SIZE = 1 INPUT_LEFT = 'left' INPUT_RIGHT = 'right' OUTPUT = 'output' def __init__(self, structure, input_shape, verbose=True): self.input_shape = (3, 32, 32) self.verbose = verbose self.construct(structure) def construct(self, structure): ''' structure - a list of (is_shared, layer_fn) tuples detailing the structure of the Siamese part of the network is_shared - boolean, whether or not the layer is shared layer_fn - a generator function for a layer ''' self.graph = Graph() input_left = self.INPUT_LEFT input_right = self.INPUT_RIGHT self.graph.add_input(name=input_left, input_shape=self.input_shape) self.graph.add_input(name=input_right, input_shape=self.input_shape) unique_name = 'name' for is_shared, layer_fn in structure: if is_shared: self.graph.add_shared_node( layer_fn(), name=unique_name, inputs=[input_left, input_right], outputs=[input_left + '\'', input_right + '\'']) else: self.graph.add_node(layer_fn(), input=input_left, name=input_left + '\'') self.graph.add_node(layer_fn(), input=input_right, name=input_right + '\'') input_left += '\'' input_right += '\'' unique_name += '0' self.graph.add_node(Lambda(l2dist), inputs=[input_left, input_right], merge_mode='join', name='dist') self.graph.add_output(name=self.OUTPUT, input='dist') if self.verbose: print 'Constructed a SiameseNet.' def compile(self): self.graph.compile(loss={'output': chopra_loss}, optimizer='adam') if self.verbose: print 'Successfully compiled the SiameseNet.' def _transform_data(self, x, y=None): data = {self.INPUT_LEFT: x[0], self.INPUT_RIGHT: x[1]} if y is not None: data[self.OUTPUT] = y return data def fit(self, x, y, validation_data=None, nb_epoch=TRAINING_NB_EPOCHS, batch_size=TRAINING_BATCH_SIZE, shuffle=True): ''' Train it. ''' self.graph.fit(self._transform_data(x, y), nb_epoch=nb_epoch, batch_size=batch_size) if self.verbose: print 'Done training the SiameseNet.' def evaluate(self, x, y, batch_size=VALIDATION_BATCH_SIZE): ''' Validate it. ''' validation_loss = self.graph.evaluate(self._transform_data(x, y), batch_size=batch_size) if self.verbose: print 'Validation loss is', validation_loss return validation_loss def predict(self, x, batch_size=PREDICT_BATCH_SIZE): ''' Predict it. (Not sure if this is helpful) ''' prediction = self.graph.predict(self._transform_data(x), batch_size=batch_size) if self.verbose: print 'Predicted probabilities are', prediction return prediction def save(self, filepath): self.graph.save_weights(filepath) def load(self, filepath): self.graph.load_weights(filepath) def similarity(self, x1, x2): pass # The crux of this project
early_stopping = EarlyStopping(monitor='loss', mode='min', patience=5) # print lr class lr_printer(Callback): def __init__(self): super(lr_printer, self).__init__() def on_epoch_begin(self, epoch, logs={}): print('lr:', self.model.optimizer.lr.get_value()) lr_print = lr_printer() ############## build model ############ model = Graph() model.add_input(input_shape=(seq_length, train_char), name='input') model.add_node(LSTM(units, return_sequences=True, init='orthogonal'), input='input', name='lstm1') model.add_node(Dropout(0.5), input='lstm1', name='drop1') model.add_node(LSTM(units, return_sequences=True, init='orthogonal'), input='drop1', name='lstm2') model.add_node(Dropout(0.5), input='lstm2', name='drop2') model.add_node(TimeDistributed(Dense(train_char, init='orthogonal')), input='drop2', name='fc1') model.add_node(Dropout(0.5), input='fc1', name='drop3') model.add_node(TimeDistributed( Dense(train_char, activation='softmax', init='orthogonal')),
def test_nested_call(): """Test nested Sequential and Graph models""" nb_samples, input_dim, output_dim = 3, 10, 5 X = K.placeholder(ndim=2) x = np.ones((nb_samples, input_dim)).astype(K.floatx()) # test Graph model nested inside Sequential model model = Graph() model.add_input('input', input_shape=(input_dim, )) model.add_node(Dense(output_dim=output_dim, input_dim=input_dim), input='input', name='output', create_output=True) model2 = Sequential() model2.add(model) model2.compile('sgd', 'mse') Y2 = model2(X) f = K.function([X], [Y2]) y1 = f([x])[0].astype(K.floatx()) y2 = model2.predict(x) # results of __call__ should match model.predict assert_allclose(y1, y2) # test Sequential model inside Graph model model3 = Sequential() model3.add(Dense(output_dim=output_dim, input_dim=input_dim)) model4 = Graph() model4.add_input('input', input_shape=(input_dim, )) model4.add_node(model3, input='input', name='output', create_output=True) model4.compile('sgd', {'output': 'mse'}) Y2 = model4(X) f = K.function([X], [Y2]) y1 = f([x])[0].astype(K.floatx()) y2 = model4.predict({'input': x})['output'] # results of __call__ should match model.predict assert_allclose(y1, y2)
def predict(self, X, batch_size=128, verbose=0): return Graph.predict(self, {self._input_name:X}, batch_size=batch_size, verbose=verbose)[self._output_name]
def build_graph(n_variables): ''' Creates the Graph component of the model, i.e., this creates the conv+gru with bi-directional ''' nb_feature_maps = 64 ngram_filters = [1, 2, 3, 4, 5] #, 6, 7, 8] graph = Graph() graph.add_input(name='data', input_shape=(N_TRACKS, n_variables)) for n_gram in ngram_filters: graph.add_node(Convolution1D(nb_feature_maps, n_gram, activation='relu', input_shape=(N_TRACKS, n_variables)), name='conv_%s' % n_gram, input='data') graph.add_node( GRU(25), name='gru_fwd_%s' % n_gram, input='conv_%s' % n_gram, ) graph.add_node( GRU(25, go_backwards=True), name='gru_bwd_%s' % n_gram, input='conv_%s' % n_gram, ) pass_thru = Lambda(lambda x: x) graph.add_node(pass_thru, name='unit_{}'.format(n_gram), inputs=['gru_fwd_%s' % n_gram, 'gru_bwd_%s' % n_gram]) graph.add_node(Dropout(0.4), name='dropout', inputs=['unit_{}'.format(n) for n in ngram_filters], create_output=True) return graph