def design_network(model_name): input1 = Input(shape=(input_length, 768)) if model_name == 'CNN': lcov1 = Conv1D(filters=units, kernel_size=1, activation=relu)(input1) out = MaxPooling1D(pool_size=1)(lcov1) if model_name == 'BiLSTM': out = Bidirectional(LSTM(units))(input1) if 'Bert' in model_name or 'Xlnet' in model_name: convs = [] for fsz in kernel_size: l_conv = Conv1D(filters=units, kernel_size=fsz, activation=relu)(input1) lpool = MaxPooling1D(input_length - fsz + 1)(l_conv) convs.append(lpool) merge = concatenate(convs, axis=1) # reshape = Reshape((units,3))(merge) permute = Permute((2, 1))(merge) if 'Att' in model_name: out = Bidirectional(LSTM(units, return_sequences=True))(permute) out = AttentionLayer(step_dim=units)(out) else: out = Bidirectional(LSTM(units))(permute) out = Dropout(keep_prob)(out) output = Dense(class_nums, activation=softmax)(out) model = Model(input1, output) model.compile(loss=losses.categorical_crossentropy, optimizer=optimizers.Adam(lr=learning_rate), metrics=['accuracy']) model.summary() return model
def k_1pipeline_mlp(yao_indices_dim, image_input, base_model, with_compile=True): ''' parameters image_input & base_model is produced by function k_base_model() ''' # output layer parameters _output_units = yao_indices_dim _output_kernel_regularizer = None _output_activation = 'sigmoid' print('Build 1 deeper pipeline + MLP model...') # base_model.summary() pipeline_1 = base_model.output gen_output = Dense(units=_output_units, kernel_regularizer=_output_kernel_regularizer, activation=_output_activation, name='gen_output')(pipeline_1) pipeline_mlp_model = Model(inputs=image_input, outputs=gen_output) print('deeper_pipeline_model structure...') pipeline_mlp_model.summary() if with_compile == True: return compiler(pipeline_mlp_model, scaling_activation='binary') else: # ready to joint in some other frameworks like Tensorflow return pipeline_mlp_model
def k_2pipeline_mlp(yao_indices_dim, image_input, base_model, with_compile=True): # output layer parameters _output_units = yao_indices_dim _output_kernel_regularizer = None _output_activation = 'sigmoid' print('Build 2 deeper pipeline + MLP model...') # base_model. summary() pipeline_1 = base_model.output pipeline_2 = base_model.output concatenated = concatenate([pipeline_1, pipeline_2], axis=-1) gen_output = Dense(units=_output_units, kernel_regularizer=_output_kernel_regularizer, activation=_output_activation, name='gen_output')(concatenated) pipeline_2_mlp_model = Model(inputs=image_input, outputs=gen_output) print('deeper_pipeline_model structure...') pipeline_2_mlp_model.summary() if with_compile == True: return compiler(pipeline_2_mlp_model, scaling_activation='binary') else: # ready to joint in some other frameworks like Tensorflow return pipeline_2_mlp_model
def create_model(gpu): with tf.device(gpu): input = Input((1280, 1918, len(dirs))) x = Lambda(lambda x: K.mean(x, axis=-1, keepdims=True))(input) model = Model(input, x) model.summary() return model
def Alexnet(): inputs=Input(shape=(227,227,3)) x=conv_block(inputs,96,(11,11),(4,4),'valid',name='conv1',fist_layer=True) x=Activation('relu')(x) x=MaxPooling2D(pool_size=(3,3),strides=(2,2))(x) x=conv_block(x,256,(5,5),(1,1),name='conv2',padding='same') x=Activation('relu')(x) x=MaxPooling2D(pool_size=(3,3),strides=(2,2))(x) x=conv_block(x,384,(3,3),strides=(1,1),name='conv3',padding='same') x=Activation('relu')(x) x=conv_block(x,384,(3,3),strides=(1,1),name='conv4',padding='same') x=Activation('relu')(x) x=conv_block(x,256,(3,3),strides=(1,1),name='conv5',padding='same') x=Activation('relu')(x) x=MaxPooling2D(pool_size=(3,3),strides=(2,2))(x) x=Flatten()(x) """ x=Dense(4096, kernel_regularizer=l2(0.0005), bias_initializer='ones',name='fc6')(x) """ x=dense_block(x,4096,name='fc6') x=Activation('relu')(x) x=Dropout(0.5)(x) ''' x=Dense(4096,kernel_regularizer=l2(0.0005), bias_initializer='ones',name='fc7')(x) ''' x=dense_block(x,4096,name='fc7') x=Activation('relu')(x) x=Dropout(0.5)(x) ''' x=Dense(1000,kernel_regularizer=l2(0.0005), bias_initializer='ones',name='fc8')(x) ''' x=dense_block(x,1000,name='fc8') predictions=Activation('softmax')(x) model = Model(inputs=inputs, outputs=predictions) ''' for i in ['fc7','fc8']:#['conv1','conv2','conv3','conv4','conv5','fc8']:#,'fc6','fc7']: layer=model.get_layer(name=i) #with h5.File('model_0.54.h5',mode='r') as f: with h5.File('model.h5',mode='r') as f: x1=f['model_14/'+i+'_9/kernel:0'].value x2=f['model_14/'+i+'_9/bias:0'].value K.set_value(layer.weights[0],x1) #设置权重的值 K.set_value(layer.weights[1],x2) ''' model.summary() return model
def create_model(self, cfg_file, net_type, **kwargs): with open(cfg_file) as json_file: arch_specs = json.load(json_file) arch = arch_specs[net_type] l2_reg = regularizers.l2(0.0) l2_bias_reg = regularizers.l2(0.0) dropout_rate = [0.0,0.0] batch_norm = False activation = 'elu' if 'activation' in kwargs: activation =kwargs['activation'] if 'reg_factor' in kwargs: l2_reg = regularizers.l2(kwargs['reg_factor']) if 'bias_reg_factor' in kwargs: l2_bias_reg = regularizers.l2(kwargs['bias_reg_factor']) if 'dropout_rate' in kwargs: dropout_rate = kwargs['dropout_rate'] if 'batch_norm' in kwargs: batch_norm =kwargs['batch_norm'] #build model # input image dimensions x = input_1 = Input(shape=self.input_shape) for layer in range(arch['num_block_layers']): x = Conv2D(filters=arch['filters'][layer], kernel_size=arch['kernel_size'][layer], padding='same', kernel_regularizer=l2_reg, bias_regularizer=l2_bias_reg)(x) if batch_norm: x = BatchNormalization()(x) x = Activation(activation=activation)(x) x = Conv2D(filters=arch['filters'][layer], kernel_size=arch['kernel_size'][layer], padding='same', kernel_regularizer=l2_reg, bias_regularizer=l2_bias_reg)(x) if batch_norm: x = BatchNormalization()(x) x = Activation(activation=activation)(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Dropout(rate=dropout_rate[0])(x) x = Flatten()(x) for layer in range(arch['num_dense_layers']): x = Dense(units=arch['units'][layer], kernel_regularizer=l2_reg, bias_regularizer=l2_bias_reg)(x) if batch_norm: x = BatchNormalization()(x) x = Activation(activation=activation)(x) x = Dropout(rate=dropout_rate[1])(x) x = Dense(units=self.num_classes, kernel_regularizer=l2_reg, bias_regularizer=l2_bias_reg)(x) if batch_norm: x = BatchNormalization()(x) x = Activation(activation='softmax')(x) model = Model(inputs=[input_1], outputs=[x]) model.summary() return model
def build_model(): """ builds full keras model and returns it """ in_x = x = Input((1, 8, 8)) # (batch, channels, height, width) x = Conv2D(filters=cnn_filter_num, kernel_size=cnn_first_filter_size, padding="same", data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg), name="input_conv-" + str(cnn_first_filter_size) + "-" + str(cnn_filter_num))(x) x = BatchNormalization(axis=1, name="input_batchnorm")(x) x = Activation("relu", name="input_relu")(x) for i in range(res_layer_num): x = _build_residual_block(x, i + 1) res_out = x # for policy output x = Conv2D(filters=2, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg), name="policy_conv-1-2")(res_out) x = BatchNormalization(axis=1, name="policy_batchnorm")(x) x = Activation("relu", name="policy_relu")(x) x = Flatten(name="policy_flatten")(x) # no output for 'pass' policy_out = Dense(n_labels, kernel_regularizer=l2(l2_reg), activation="softmax", name="policy_out")(x) # for value output x = Conv2D(filters=4, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg), name="value_conv-1-4")(res_out) x = BatchNormalization(axis=1, name="value_batchnorm")(x) x = Activation("relu", name="value_relu")(x) x = Flatten(name="value_flatten")(x) x = Dense(value_fc_size, kernel_regularizer=l2(l2_reg), activation="relu", name="value_dense")(x) value_out = Dense(1, kernel_regularizer=l2(l2_reg), activation="tanh", name="value_out")(x) model = Model(in_x, [policy_out, value_out], name="hex_model") sgd = optimizers.SGD(lr=learning_rate, momentum=momentum) losses = ['categorical_crossentropy', 'mean_squared_error'] model.compile(loss=losses, optimizer='adam', metrics=['accuracy', 'mae']) model.summary() return model
def xlblc3a(): input1 = Input(shape=(input_length, 768)) biout = Bidirectional(LSTM(units, return_sequences=True))(input1) convs = [] for fsz in kernel_size: l_conv = Conv1D(filters=units, kernel_size=fsz, activation=relu)(biout) lpool = MaxPooling1D(input_length - fsz + 1)(l_conv) convs.append(lpool) merge = concatenate(convs, axis=1) out = AttentionLayer(step_dim=units)(merge) out = Dropout(keep_prob)(out) output = Dense(class_nums, activation=softmax)(out) model = Model(input1, output) model.compile(loss=losses.categorical_crossentropy, optimizer=optimizers.Adam(lr=learning_rate), metrics=['accuracy']) model.summary()
def build(self): """ Builds the full Keras model and stores it in self.model. """ in_x = x = Input((1, 19, 19)) x = self.GoRes_blocks(x, 1) x = self.GoRes_blocks(x, 2) x = self.GoRes_blocks(x, 3) x = Flatten()(x) x = Dense(34, activation='softmax')(x) model = Model(inputs=in_x, outputs=x) model.summary() # x_test = np.array([[[1,2],[2,3],[3,4],[4,5]]]) # print (model.predict(x_test)) plot_model(model, to_file='lambda1.png', show_shapes=True) return model
class VGGNet: def __init__(self): self.input_shape = (224, 224, 3) self.weight = 'imagenet' self.pooling = 'max' self.model = VGG16(weights=self.weight, input_shape=(self.input_shape[0], self.input_shape[1], self.input_shape[2]), pooling=self.pooling, include_top=False) self.X = self.model.layers[-2].output self.X = Flatten()(self.X) self.X = Dense(units=4096, activation="relu")(self.X) self.X = Dense(units=4096, activation="relu")(self.X) self.X = Dense(units=3, activation="softmax")(self.X) for layers in (self.model.layers)[:19]: layers.trainable = False self.model.predict(np.zeros((1, 224, 224, 3))) self.model_final = Model(input=self.model.input, output=self.X) self.model_final.compile(loss="categorical_crossentropy", optimizer=optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"]) self.model_final.summary() ''' Use vgg16 model to extract features Output normalized feature vector ''' def extract_feat(self, img_path): img = image.load_img(img_path, target_size=(self.input_shape[0], self.input_shape[1])) img = image.img_to_array(img) img = np.expand_dims(img, axis=0) img = preprocess_input(img) feat = self.model.predict(img) norm_feat = feat[0] / LA.norm(feat[0]) return norm_feat
def build_model(X_train, row, cell): data_shape = (X_train.shape[1], X_train.shape[2], 1) input_main = Input((X_train.shape[1], X_train.shape[2], 1)) block1 = Conv2D(25, (1, 1), data_format='channels_last', input_shape=data_shape)(input_main) block1 = Conv2D(25, (X_train.shape[2], 1))(block1) block1 = BatchNormalization(axis=1)(block1) block1 = Activation('elu')(block1) block1 = MaxPooling2D(pool_size=(2, 1), strides=(1, 2))(block1) block1 = Dropout(0.5)(block1) block2 = Conv2D(50, (5, 1))(block1) block2 = BatchNormalization(axis=1)(block2) block2 = Activation('elu')(block2) block2 = MaxPooling2D(pool_size=(2, 1), strides=(1, 2))(block2) block2 = Dropout(0.5)(block2) block3 = Conv2D(100, (5, 1))(block2) block3 = BatchNormalization(axis=1)(block3) block3 = Activation('elu')(block3) block3 = MaxPooling2D(pool_size=(2, 1), strides=(1, 2))(block3) block3 = Dropout(0.5)(block3) block4 = Conv2D(200, (5, 1),data_format='channels_last' )(block3) block4 = BatchNormalization(axis=1)(block4) block4 = Activation('elu')(block4) block4 = MaxPooling2D(pool_size=(2, 1), strides=(1, 2))(block4) block4 = Dropout(0.5)(block4) flatten = Flatten()(block4) dense = Dense(3, kernel_constraint = max_norm(0.5))(flatten) softmax = Activation('softmax')(dense) model = Model(inputs=input_main, outputs=softmax) opt = optimizers.adam(lr=0.001) model.summary() model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy']) plot_model(model, to_file='model.png', show_shapes=True) return model
def Summary(self, model: training.Model): """ This function prints the summary of a model. :param model:training.Model: a build model """ try: AssertNotNone(model, 'plotting_tensor'), 'Plotting model was None!' print(model.summary(line_length=200)) except Exception as ex: template = "An exception of type {0} occurred in [ModelBuilder.Summary]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def test_trainable_weights_count_consistency(): """Tests the trainable weights consistency check of Model. This verifies that a warning is shown if model.trainable is modified and the model is summarized/run without a new call to .compile() Reproduce issue #8121 """ a = Input(shape=(3, ), name='input_a') model1 = Model(inputs=a, outputs=Dense(1)(a)) model1.trainable = False b = Input(shape=(3, ), name='input_b') y = model1(b) model2 = Model(inputs=b, outputs=Dense(1)(y)) model2.compile(optimizer='adam', loss='mse') model1.trainable = True # Should warn on .summary() with pytest.warns(UserWarning) as w: model2.summary() warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) assert warning_raised, 'No warning raised when trainable is modified without .compile.' # And on .fit() with pytest.warns(UserWarning) as w: model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1))) warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) assert warning_raised, 'No warning raised when trainable is modified without .compile.' # And shouldn't warn if we recompile model2.compile(optimizer='adam', loss='mse') with pytest.warns(None) as w: model2.summary() assert len( w ) == 0, "Warning raised even when .compile() is called after modifying .trainable"
def k_2pipeline_mlp_2outputs(yao_indices_dim, topics_dim, image_input, base_model, with_compile=True): # aux_mlp layer parameters follow cnn3_mlp_channel_2 _aux_mlp_units_1 = 80 _aux_mlp_activation_1 = 'relu' _aux_mlp_dropout_1 = 0.5 # output layer parameters _output_units = yao_indices_dim _output_kernel_regularizer = None _output_activation = 'sigmoid' _aux_output_units = topics_dim _aux_output_activation = 'softmax' print('Build 2 deeper pipeline + MLP model...') # base_model. summary() pipeline_1 = base_model.output pipeline_2 = base_model.output concatenated = concatenate([pipeline_1, pipeline_2], axis=-1) gen_output = Dense(units=_output_units, kernel_regularizer=_output_kernel_regularizer, activation=_output_activation, name='gen_output')(concatenated) # aux_output only get features from only cnn2_mlp channel_2 aux_output = Dense(units=_aux_output_units, activation=_aux_output_activation, name='aux_output')(concatenated) pipeline2_mlp_2output_model = Model(inputs=image_input, outputs=[gen_output, aux_output]) print('deeper_pipeline_model structure...') pipeline2_mlp_2output_model.summary() if with_compile == True: return double_output_compiler(pipeline2_mlp_2output_model, scaling_activation='binary') else: # ready to joint in some other frameworks like Tensorflow return pipeline2_mlp_2output_model
def test_trainable_weights_count_consistency(): """Tests the trainable weights consistency check of Model. This verifies that a warning is shown if model.trainable is modified and the model is summarized/run without a new call to .compile() Reproduce issue #8121 """ a = Input(shape=(3,), name='input_a') model1 = Model(inputs=a, outputs=Dense(1)(a)) model1.trainable = False b = Input(shape=(3,), name='input_b') y = model1(b) model2 = Model(inputs=b, outputs=Dense(1)(y)) model2.compile(optimizer='adam', loss='mse') model1.trainable = True # Should warn on .summary() with pytest.warns(UserWarning) as w: model2.summary() warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) assert warning_raised, 'No warning raised when trainable is modified without .compile.' # And on .fit() with pytest.warns(UserWarning) as w: model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1))) warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) assert warning_raised, 'No warning raised when trainable is modified without .compile.' # And shouldn't warn if we recompile model2.compile(optimizer='adam', loss='mse') with pytest.warns(None) as w: model2.summary() assert len(w) == 0, "Warning raised even when .compile() is called after modifying .trainable"
def get_model_with_classification_head(self, base_model): base_model.summary() x = base_model.output x = GlobalAveragePooling2D(name='flatten_1')(x) #x = Flatten(name='flatten_1')(x) x = Dense(1024, activation='relu', name='fc6_1')(x) x = Dense(1024, activation='relu', name='fc7_2')(x) x = Dropout(0.5)(x) predictions = Dense(self.nb_classes, activation='softmax', name='fc8_3')(x) # this is the model we will train model = Model(inputs=base_model.input, outputs=predictions) for layer in base_model.layers: layer.trainable = False model.compile(optimizer="rmsprop", loss=LOSS, metrics=['accuracy']) model.summary() return model
def VGG_16(): inputs = Input(shape=(224, 224, 3)) # Block 1 x = conv_block(inputs, 64, (3, 3), padding='same', name='block1_conv1', fist_layer=True) #x=BatchNormalization(axis=-1,name='BN1')(x) x = Activation('relu')(x) x = conv_block(x, 64, (3, 3), padding='same', name='block1_conv2') #x=BatchNormalization(axis=-1,name='BN2')(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block1_pool')(x) #Block 2 x = conv_block(x, 128, (3, 3), padding='same', name='block2_conv1') #x=BatchNormalization(axis=-1,name='BN3')(x) x = Activation('relu')(x) x = conv_block(x, 128, (3, 3), padding='same', name='block2_conv2') #x=BatchNormalization(axis=-1,name='BN4')(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = conv_block(x, 256, (3, 3), padding='same', name='block3_conv1') #x=BatchNormalization(axis=-1,name='BN5')(x) x = Activation('relu')(x) x = conv_block(x, 256, (3, 3), padding='same', name='block3_conv2') #x=BatchNormalization(axis=-1,name='BN6')(x) x = Activation('relu')(x) x = conv_block(x, 256, (3, 3), padding='same', name='block3_conv3') #x=BatchNormalization(axis=-1,name='BN7')(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block3_pool')(x) #Black 4 x = conv_block(x, 512, (3, 3), padding='same', name='block4_conv1') #x=BatchNormalization(axis=-1,name='BN8')(x) x = Activation('relu')(x) x = conv_block(x, 512, (3, 3), padding='same', name='block4_conv2') #x=BatchNormalization(axis=-1,name='BN9')(x) x = Activation('relu')(x) x = conv_block(x, 512, (3, 3), padding='same', name='block4_conv3') #x=BatchNormalization(axis=-1,name='BN10')(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block4_pool')(x) #Black 5 x = conv_block(x, 512, (3, 3), padding='same', name='block5_conv1') #x=BatchNormalization(axis=-1,name='BN11')(x) x = Activation('relu')(x) x = conv_block(x, 512, (3, 3), padding='same', name='block5_conv2') #x=BatchNormalization(axis=-1,name='BN12')(x) x = Activation('relu')(x) x = conv_block(x, 512, (3, 3), padding='same', name='block5_conv3') #x=BatchNormalization(axis=-1,name='BN13')(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block5_pool')(x) x = Flatten()(x) # Classification block, 全连接3层 x = dense_block(x, 4096, name='fc1') #x=BatchNormalization(axis=-1,name='BN14')(x) x = Activation('relu')(x) x = Dropout(0.5)(x) x = dense_block(x, 4096, name='fc2') x = Activation('relu')(x) #x=BatchNormalization(axis=-1,name='BN15')(x) x = Dropout(0.5)(x) x = dense_block(x, 1000, name='predictions') predictions = Activation('softmax')(x) model = Model(inputs=inputs, outputs=predictions) #model.load_weights('F:/weight_point/vgg16_weights.h5',by_name=True) model.summary() ''' layer=model.get_layer(name='fc3') print(K.eval(layer.weights[1])) ''' return model
def test_model_with_input_feed_tensor(): """We test building a model with a TF variable as input. We should be able to call fit, evaluate, predict, by only passing them data for the placeholder inputs in the model. """ import tensorflow as tf input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) model.summary() optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=['mean_squared_error'], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch(input_b_np, [output_a_np, output_b_np]) out = model.train_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.predict_on_batch({'input_b': input_b_np}) # test fit out = model.fit({'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=10) out = model.fit(input_b_np, [output_a_np, output_b_np], epochs=1, batch_size=10) # test evaluate out = model.evaluate({'input_b': input_b_np}, [output_a_np, output_b_np], batch_size=10) out = model.evaluate(input_b_np, [output_a_np, output_b_np], batch_size=10) # test predict out = model.predict({'input_b': input_b_np}, batch_size=10) out = model.predict(input_b_np, batch_size=10) assert len(out) == 2 # Now test a model with a single input # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Same, without learning phase # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4)
def build_CNN_model(inputType, do_training=False, model_inputs=None, loss_func='binary_crossentropy', optimize_proc='adam', is_IntermediateModel=False, load_weight_path=None, **kwargs): """ :param inputType: :param do_training: :param model_inputs: :param loss_func: :param optimize_proc: :param is_IntermediateModel: :param load_weight_path: :param kwargs: :return: """ # assert not do_training and model_inputs, "if do_training then must pass in model_inputs dictionary" EMBEDDING_TYPE = 'embeddingMatrix' ONEHOT_TYPE = '1hotVector' defined_input_types = {EMBEDDING_TYPE, ONEHOT_TYPE} assert inputType in defined_input_types, "unknown input type {0}".format( inputType) if inputType is ONEHOT_TYPE: review_input = Input(shape=(modelParameters.MaxLen_w, ), dtype='float32', name="ONEHOT_INPUT") layer = Embedding(modelParameters.VocabSize_w + modelParameters.INDEX_FROM, embedding_dims, embeddings_initializer=embedding_init, embeddings_regularizer=embedding_reg, input_length=modelParameters.MaxLen_w, name='1hot_embeddingLayer')(review_input) layer = SpatialDropout1D(0.50)(layer) elif inputType is EMBEDDING_TYPE: review_input = Input(shape=(modelParameters.MaxLen_w, embedding_dims), dtype="float32", name="EMBEDDING_INPUT") layer = review_input else: raise ValueError("Bad inputType arg to build_CNN_model") layer = Convolution1D(filters=num_filters1, kernel_size=filter_length1, padding=region, strides=1, activation=conv_activation1, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=conv_reg1, dilation_rate=1, name='ConvLayer1')(layer) layer = SpatialDropout1D(0.50)(layer) layer = MaxPooling1D(pool_size=pool_len1)(layer) # layer = Convolution1D(filters=num_filters2, # kernel_size=filter_length2, # padding=region, # strides=1, # activation=conv_activation2, # kernel_initializer=conv_init2, # kernel_regularizer=conv_reg2, # dilation_rate=1, # name='ConvLayer2')(layer) # # layer = SpatialDropout1D(0.50)(layer) # # layer = MaxPooling1D(pool_size=pool_len2)(layer) # layer = Convolution1D(filters=num_filters3, # kernel_size=filter_length3, # padding=region, # activation=conv_activation3, # kernel_initializer=conv_init3, # kernel_regularizer=conv_reg3, # dilation_rate=1, # name='ConvLayer3')(layer) # # layer = SpatialDropout1D(0.50)(layer) # # layer = MaxPooling1D(pool_size=pool_len3)(layer) # #layer = GlobalMaxPool1D()(layer) # # layer = Convolution1D(filters=num_filters4, # kernel_size=filter_length4, # padding=region, # activation=conv_activation4, # kernel_initializer=conv_init4, # kernel_regularizer=conv_reg4, # dilation_rate=1, # name='ConvLayer4')(layer) # # #layer = leaky_relu(layer) # # layer = SpatialDropout1D(0.50)(layer) # # layer = MaxPooling1D(pool_size=pool_len4)(layer) # #layer = GlobalMaxPool1D()(layer) # # # layer = BatchNormalization()(layer) layer = Flatten()(layer) layer = Dense(dense_dims0, activation=dense_activation0, kernel_regularizer=dense_reg0, kernel_initializer='glorot_normal', bias_initializer='zeros', name='dense0')(layer) layer = Dropout(0.50)(layer) layer = Dense(dense_dims1, activation=dense_activation1, kernel_regularizer=dense_reg1, kernel_initializer='glorot_normal', bias_initializer='zeros', name='dense1')(layer) layer = Dropout(0.50)(layer) # layer = Dense(dense_dims2, activation=dense_activation2, kernel_regularizer=dense_reg2, # kernel_initializer=dense_init2, # name='dense2')(layer) # # # layer = Dropout(0.50)(layer) # # layer = Dense(dense_dims3, activation=dense_activation3, kernel_regularizer=dense_reg3, # kernel_initializer=dense_init3, # name='dense3_outA')(layer) # #layer = leaky_relu(layer) # if is_IntermediateModel: return Model(inputs=[review_input], outputs=[layer], name="CNN_model") # # layer = Dropout(0.5)(layer) layer = Dense(dense_dims_final, activation=dense_activation_final, kernel_initializer=dense_init_final, kernel_regularizer=dense_reg0, name='output_Full')(layer) CNN_model = Model(inputs=[review_input], outputs=[layer], name="CNN_model") CNN_model.compile(optimizer=Adam(lr=0.001, decay=0.0), loss=loss_func, metrics=[binary_accuracy]) if load_weight_path is not None: CNN_model.load_weights(load_weight_path) hist = "" if do_training: weightPath = os.path.join(modelParameters.WEIGHT_PATH, filename) configPath = os.path.join(modelParameters.WEIGHT_PATH, filename_config) with open(configPath + ".json", 'wb') as f: f.write(CNN_model.to_json()) checkpoint = ModelCheckpoint(weightPath + '_W.{epoch:02d}-{val_loss:.4f}.hdf5', verbose=1, save_best_only=True, save_weights_only=False, monitor='val_loss') earlyStop = EarlyStopping(patience=3, verbose=1, monitor='val_loss') LRadjuster = ReduceLROnPlateau(monitor='val_loss', factor=0.30, patience=0, verbose=1, cooldown=1, min_lr=0.00001, epsilon=1e-2) call_backs = [checkpoint, earlyStop, LRadjuster] CNN_model.summary() hist = CNN_model.fit(*model_inputs['training'], batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=model_inputs['dev'], callbacks=call_backs) return {"model": CNN_model, "hist": hist}
class PolicyValueNet(): def __init__(self, n=15, filename=None): self.n = n self.l2_const = 1e-4 self.pvnet_fn_lock = Lock() if filename != None and os.path.exists(filename): self.model = load_model(filename) else: self.build_model() self.model._make_predict_function() self.graph = tf.get_default_graph() print(self.model.summary()) def build_model(self): print("build_model") x = net = Input((self.n, self.n, 4)) net = conv_block(net, (3, 3), 128, self.l2_const) for i in range(block_sz): net = residual_block(net, (3, 3), 128, self.l2_const) policy_net = Conv2D(filters=2, kernel_size=(1, 1), kernel_regularizer=l2(self.l2_const))(net) policy_net = BatchNormalization()(policy_net) policy_net = Activation('relu')(policy_net) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.n * self.n, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) value_net = Conv2D(filters=1, kernel_size=(1, 1), kernel_regularizer=l2(self.l2_const))(net) value_net = BatchNormalization()(value_net) value_net = Activation('relu')(value_net) value_net = Flatten()(value_net) value_net = Dense(256, activation='relu', kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(x, [self.policy_net, self.value_net]) print(self.model.summary()) def get_train_fn(self): losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=Adam(lr=0.002), loss=losses) batch_size = config.pvn_config['batch_size'] epochs = config.pvn_config['epochs'] def train_fn(board, policy, value): with self.graph.as_default(): history = self.model.fit( np.asarray(board), [np.asarray(policy), np.asarray(value)], batch_size=batch_size, epochs=epochs, verbose=0) print("train history:", history.history) return train_fn def get_pvnet_fn(self, single=True): def pvnet_fn(board): nparr_board = board.get_board() self.pvnet_fn_lock.acquire() with self.graph.as_default(): probs, value = self.model.predict( nparr_board.reshape(1, self.n, self.n, 4)) self.pvnet_fn_lock.release() #policy_move = nparr_board[:,:,0].reshape(self.n * self.n).nonzero()[0] policy_move = board.get_available().nonzero()[0] policy_probs = probs[0][policy_move] return (policy_move, policy_probs), value[0][0] def pvnet_fn_m(boards): nparr_boards = np.asarray( [b.get_board().reshape(self.n, self.n, 4) for b in boards]) with self.graph.as_default(): probs, value = self.model.predict(nparr_boards) policy_moves = [b.get_available().nonzero()[0] for b in boards] #if len(policy_move) == 0: # policy_moves = [ b[:,:,0].reshape(self.n * self.n).nonzero()[0] for b in nparr_boards] policy_probs = [p[policy_moves[i]] for i, p in enumerate(probs)] return zip(policy_moves, policy_probs, value.ravel()) return pvnet_fn if single else pvnet_fn_m # def get_policy_param(self): # net_params = self.model.get_weights() # return net_params def save_model(self, model_file): if os.path.exists(model_file): os.remove(model_file) self.model.save(model_file)
class GAN(Model): """ Generative Adversarial Network (GAN). """ def __init__(self, generator, discriminator): super(GAN, self).__init__() assert generator != None assert discriminator != None assert discriminator.optimizer != None, "Discriminator must be compiled!" self.generator = generator self.discriminator = discriminator # Create the GAN. z_shape = generator.inputs[0].shape[1:] gan_input = layers.Input(shape=z_shape) gan_output = gan_input gan_output = self.generator(gan_output) self.discriminator.trainable = False gan_output = self.discriminator(gan_output) self.gan = Model(gan_input, gan_output) def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """ Compiles the model. Same as vanilla Keras. """ self.gan.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit( self, x=None, y=None, batch_size=None, epochs=1, sample_interval=None, # TODO document! verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ Trains the GAN. This is almost the same as in vanilla Keras. """ # Adversarial ground truths valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) for epoch in range(epochs): # Select a random batch of images idx = np.random.randint(0, x.shape[0], batch_size) imgs = x[idx] # Create some noise. noise = np.random.normal(0, 1, (batch_size, 100)) # Generate a batch of new images. gen_imgs = self.generator.predict(noise) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(imgs, valid) d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # Create some noise. noise = np.random.normal(0, 1, (batch_size, 100)) # Train the generator (to have the discriminator label samples as valid). g_loss = self.gan.train_on_batch(noise, valid) if type(g_loss) == list: g_loss = g_loss[0] # Plot the progress. print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss), end="\r") # If at save interval => save generated image samples if sample_interval != None and epoch % sample_interval == 0: self.sample_images(epoch) def sample_images(self, epoch): """ Samples images. """ r, c = 5, 5 noise = np.random.normal(0, 1, (r * c, 100)) gen_imgs = self.generator.predict(noise) # Rescale images 0 - 1 gen_imgs = 0.5 * gen_imgs + 0.5 fig, axs = plt.subplots(r, c) cnt = 0 for i in range(r): for j in range(c): axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') axs[i, j].axis('off') cnt += 1 #fig.savefig("images/%d.png" % epoch) plt.show() plt.close() def summary(self): """ Provides a summary. """ print("Generator:") self.generator.summary() print("Discriminator:") self.discriminator.summary() print("GAN:") self.gan.summary() def save(self, path): """ Saves the GAN. This includes the whole autoencoder plus the encoder and the decoder. The encoder and decoder use the path plus a respective annotation. This code >>> ae.save("myae.h5") will create the files *myae.h5*, *myae-encoder.h5*, and *myae-decoder.h5*. """ self.gan.save(path) self.generator.save(append_to_filepath(path, "-generator")) self.discriminator.save(append_to_filepath(path, "-discriminator"))
class PolicyValueNet(): """policy-value network """ def __init__(self, board_width, board_height, model_file=None): self.board_width = board_width self.board_height = board_height self.l2_const = 1e-4 # coef of l2 penalty self.create_policy_value_net() self._loss_train_op() if model_file: net_params = pickle.load(open(model_file, 'rb')) self.model.set_weights(net_params) def create_policy_value_net(self): """create the policy value network """ in_x = network = Input((4, self.board_width, self.board_height)) # conv layers network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) # state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(in_x, [self.policy_net, self.value_net]) self.model.summary() def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_value_fn(self, board): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ legal_positions = board.availables current_state = board.current_state() act_probs, value = self.policy_value( current_state.reshape(-1, 4, self.board_width, self.board_height)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) return act_probs, value[0][0] def _loss_train_op(self): """ Three loss terms: loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 """ # get the train op opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): net_params = self.model.get_weights() return net_params def save_model(self, model_file): """ save model params to file """ net_params = self.get_policy_param() pickle.dump(net_params, open(model_file, 'wb'), protocol=2)
def build_CNN_model(inputType, do_training=False, model_inputs=None, loss_func='binary_crossentropy', optimize_proc='adam', is_IntermediateModel=False, load_weight_path=None, **kwargs): """ :param inputType: :param do_training: :param model_inputs: :param loss_func: :param optimize_proc: :param is_IntermediateModel: :param load_weight_path: :param kwargs: :return: """ # assert not do_training and model_inputs, "if do_training then must pass in model_inputs dictionary" EMBEDDING_TYPE = 'embeddingMatrix' ONEHOT_TYPE = '1hotVector' defined_input_types = {EMBEDDING_TYPE, ONEHOT_TYPE} assert inputType in defined_input_types, "unknown input type {0}".format(inputType) if inputType is ONEHOT_TYPE: review_input = Input(shape=(modelParameters.MaxLen_w,), dtype='float32', name="ONEHOT_INPUT") layer = Embedding(modelParameters.VocabSize_w + modelParameters.INDEX_FROM, embedding_dims, embeddings_initializer=embedding_init, embeddings_regularizer=embedding_reg, input_length=modelParameters.MaxLen_w, name='1hot_embeddingLayer')(review_input) layer = SpatialDropout1D(0.50)(layer) elif inputType is EMBEDDING_TYPE: review_input = Input(shape=(modelParameters.MaxLen_w, embedding_dims), dtype="float32", name="EMBEDDING_INPUT") layer = review_input else: raise ValueError("Bad inputType arg to build_CNN_model") layer = Convolution1D(filters=num_filters1, kernel_size=filter_length1, padding=region, strides=1, activation=conv_activation1, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=conv_reg1, dilation_rate=1, name='ConvLayer1')(layer) layer = SpatialDropout1D(0.50)(layer) layer = MaxPooling1D(pool_size=pool_len1)(layer) # layer = Convolution1D(filters=num_filters2, # kernel_size=filter_length2, # padding=region, # strides=1, # activation=conv_activation2, # kernel_initializer=conv_init2, # kernel_regularizer=conv_reg2, # dilation_rate=1, # name='ConvLayer2')(layer) # # layer = SpatialDropout1D(0.50)(layer) # # layer = MaxPooling1D(pool_size=pool_len2)(layer) # layer = Convolution1D(filters=num_filters3, # kernel_size=filter_length3, # padding=region, # activation=conv_activation3, # kernel_initializer=conv_init3, # kernel_regularizer=conv_reg3, # dilation_rate=1, # name='ConvLayer3')(layer) # # layer = SpatialDropout1D(0.50)(layer) # # layer = MaxPooling1D(pool_size=pool_len3)(layer) # #layer = GlobalMaxPool1D()(layer) # # layer = Convolution1D(filters=num_filters4, # kernel_size=filter_length4, # padding=region, # activation=conv_activation4, # kernel_initializer=conv_init4, # kernel_regularizer=conv_reg4, # dilation_rate=1, # name='ConvLayer4')(layer) # # #layer = leaky_relu(layer) # # layer = SpatialDropout1D(0.50)(layer) # # layer = MaxPooling1D(pool_size=pool_len4)(layer) # #layer = GlobalMaxPool1D()(layer) # # # layer = BatchNormalization()(layer) layer = Flatten()(layer) layer = Dense(dense_dims0, activation=dense_activation0, kernel_regularizer=dense_reg0, kernel_initializer='glorot_normal', bias_initializer='zeros', name='dense0')(layer) layer = Dropout(0.50)(layer) layer = Dense(dense_dims1, activation=dense_activation1, kernel_regularizer=dense_reg1, kernel_initializer='glorot_normal', bias_initializer='zeros', name='dense1')(layer) layer = Dropout(0.50)(layer) # layer = Dense(dense_dims2, activation=dense_activation2, kernel_regularizer=dense_reg2, # kernel_initializer=dense_init2, # name='dense2')(layer) # # # layer = Dropout(0.50)(layer) # # layer = Dense(dense_dims3, activation=dense_activation3, kernel_regularizer=dense_reg3, # kernel_initializer=dense_init3, # name='dense3_outA')(layer) # #layer = leaky_relu(layer) # if is_IntermediateModel: return Model(inputs=[review_input], outputs=[layer], name="CNN_model") # # layer = Dropout(0.5)(layer) layer = Dense(dense_dims_final, activation=dense_activation_final, kernel_initializer=dense_init_final, kernel_regularizer=dense_reg0, name='output_Full')(layer) CNN_model = Model(inputs=[review_input], outputs=[layer], name="CNN_model") CNN_model.compile(optimizer=Adam(lr=0.001, decay=0.0), loss=loss_func, metrics=[binary_accuracy]) if load_weight_path is not None: CNN_model.load_weights(load_weight_path) hist = "" if do_training: weightPath = os.path.join(modelParameters.WEIGHT_PATH, filename) configPath = os.path.join(modelParameters.WEIGHT_PATH, filename_config) with open(configPath + ".json", 'wb') as f: f.write(CNN_model.to_json()) checkpoint = ModelCheckpoint(weightPath + '_W.{epoch:02d}-{val_loss:.4f}.hdf5', verbose=1, save_best_only=True, save_weights_only=False, monitor='val_loss') earlyStop = EarlyStopping(patience=3, verbose=1, monitor='val_loss') LRadjuster = ReduceLROnPlateau(monitor='val_loss', factor=0.30, patience=0, verbose=1, cooldown=1, min_lr=0.00001, epsilon=1e-2) call_backs = [checkpoint, earlyStop, LRadjuster] CNN_model.summary() hist = CNN_model.fit(*model_inputs['training'], batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=model_inputs['dev'], callbacks=call_backs) return {"model": CNN_model, "hist": hist}
def Alexnet(): inputs = Input(shape=(227, 227, 3)) x = Conv2D(96, (11, 11), strides=(4, 4), input_shape=(227, 227, 3), padding='valid', name='conv1', kernel_initializer=RandomNormal(0.0, 0.01))(inputs) ''' x=BatchNormalization(axis=-1,momentum=0.99, epsilon=0.001, center=True, scale=True )(x) ''' x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) x = Conv2D(256, (5, 5), strides=(1, 1), name='conv2', padding='same', bias_initializer='ones', kernel_initializer=RandomNormal(0.0, 0.01))(x) ''' x=BatchNormalization(axis=-1,momentum=0.99, epsilon=0.001, center=True, scale=True )(x) ''' x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) x = Conv2D(384, (3, 3), strides=(1, 1), name='conv3', padding='same', kernel_initializer=RandomNormal(0, 0.01))(x) ''' x=BatchNormalization(axis=-1,momentum=0.99, epsilon=0.001, center=True, scale=True )(x) ''' x = Activation('relu')(x) x = Conv2D(384, (3, 3), strides=(1, 1), name='conv4', padding='same', bias_initializer='ones', kernel_initializer=RandomNormal(0, 0.01))(x) ''' x=BatchNormalization(axis=-1,momentum=0.99, epsilon=0.001, center=True, scale=True )(x) ''' x = Activation('relu')(x) x = Conv2D(256, (3, 3), strides=(1, 1), name='conv5', padding='same', bias_initializer='ones', kernel_initializer=RandomNormal(0, 0.01))(x) ''' x=BatchNormalization(axis=-1,momentum=0.99, epsilon=0.001, center=True, scale=True )(x) ''' x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) x = Flatten()(x) x = Dense(4096, kernel_initializer=RandomNormal(0.0, 0.01), bias_initializer='ones', name='fc6')(x) ''' x=BatchNormalization(axis=1,momentum=0.99, epsilon=0.001, center=True, scale=True )(x) ''' x = Activation('relu')(x) x = Dropout(0.5)(x) x = Dense(4096, kernel_initializer=RandomNormal(0.0, 0.01), bias_initializer='ones', name='fc7')(x) ''' x=BatchNormalization(axis=1,momentum=0.99, epsilon=0.001, center=True, scale=True )(x) ''' x = Activation('relu')(x) x = Dropout(0.5)(x) x = Dense(1000, kernel_initializer=RandomNormal(0.0, 0.01), bias_initializer='ones', name='fc8')(x) predictions = Activation('softmax')(x) model = Model(inputs=inputs, outputs=predictions) model.summary() #model.load_weights('again_model_10.hdf5',by_name=True) for i in [ 'conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'fc8', 'fc6', 'fc7' ]: layer = model.get_layer(name=i) #with h5.File('model_0.54.h5',mode='r') as f: with h5.File('model.h5', mode='r') as f: x1 = f['model_14/' + i + '_9/kernel:0'].value x2 = f['model_14/' + i + '_9/bias:0'].value K.set_value(layer.weights[0], x1) #设置权重的值 K.set_value(layer.weights[1], x2) #model.load_weights('F:/weight_point/epoch_10.h5py',by_name=True) return model
def build_generator(self): input_channel = 82 output_channel = 66 input_shape = (input_channel, 32, 32) img_input = Input(shape=input_shape, name='input') depth = input_channel # In: 100 # Out: dim x dim x depth c1 = Conv2D(depth * 2, (3, 3), strides=(1, 1), activation='relu', input_shape=input_shape, padding='same', data_format='channels_first')(img_input) b1 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c1) act1 = Activation('relu')(b1) c2 = Conv2D(depth * 2, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act1) b2 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c2) act2 = Activation('relu')(b2) c3 = Conv2D(depth * 4, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act2) b3 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c3) act3 = Activation('relu')(b3) c4 = Conv2D(depth * 4, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act3) b4 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c4) act4 = Activation('relu')(b4) c5 = Conv2D(depth * 8, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act4) b5 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c5) act5 = Activation('relu')(b5) c6 = Conv2D(depth * 8, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act5) b6 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c6) act6 = Activation('relu')(b6) c7 = Conv2D(depth * 8, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act6) b7 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c7) act7 = Activation('relu')(b7) ct1 = Conv2DTranspose(depth * 8, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act7) act8 = Activation('relu')(ct1) act8_output = Lambda(lambda x: x, name='act8_output')(act8) act8_output = keras.layers.Add()([act6, act8_output]) ct2 = Conv2DTranspose(depth * 8, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act8_output) act9 = Activation('relu')(ct2) act9_output = Lambda(lambda x: x, name='act9_output')(act9) act9_output = keras.layers.Add()([act5, act9_output]) ct3 = Conv2DTranspose(depth * 4, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act9_output) act10 = Activation('relu')(ct3) act10_output = Lambda(lambda x: x, name='act10_output')(act10) act10_output = keras.layers.Add()([act4, act10_output]) ct4 = Conv2DTranspose(depth * 4, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act10_output) act11 = Activation('relu')(ct4) act11_output = Lambda(lambda x: x, name='act11_output')(act11) act11_output = keras.layers.Add()([act3, act11_output]) ct5 = Conv2DTranspose(depth * 2, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act11_output) act12 = Activation('relu')(ct5) act12_output = Lambda(lambda x: x, name='act12_output')(act12) act12_output = keras.layers.Add()([act2, act12_output]) ct6 = Conv2DTranspose(depth * 2, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act12_output) act13 = Activation('relu')(ct6) act13_output = Lambda(lambda x: x, name='act13_output')(act13) act13_output = keras.layers.Add()([act1, act13_output]) ct7 = Conv2DTranspose(depth, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act13_output) act14 = Activation('relu')(ct7) act14_output = Lambda(lambda x: x, name='output')(act14) act14_output = keras.layers.Add()([img_input, act14_output]) ct8 = Conv2DTranspose(output_channel, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act14_output) act15 = Activation('relu')(ct8) img_output = act15 model = Model(inputs=[img_input], outputs=[img_output]) model.summary() return model
def __build_model(game_size): in_x = x = Input((1, game_size, game_size)) #cnn_filter_num = game_size*game_size*2 cnn_filter_num = 128 n_labels = game_size * game_size value_fc_size = game_size * game_size cnn_first_filter_size = 2 cnn_filter_size = 2 l2_reg = 0.0001 res_layer_num = 4 learning_rate = 1 momentum = 0.9 # begin layers x = Conv2D(filters=cnn_filter_num, kernel_size=cnn_first_filter_size, padding="same", data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg))(x) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) # centre residual layers for i in range(res_layer_num): temp_x = x index = i + 1 x = Conv2D(filters=cnn_filter_num, kernel_size=cnn_filter_size, padding="same", data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg))(x) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Conv2D(filters=cnn_filter_num, kernel_size=cnn_filter_size, padding="same", data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg))(x) x = BatchNormalization(axis=1)(x) x = Add()([temp_x, x]) x = Activation("relu")(x) # end layers res_out = x # policy_out x = Conv2D(filters=2, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg))(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) policy_out = Dense(n_labels, kernel_regularizer=l2(l2_reg), activation="softmax", name="policy_out")(x) # value_out x = Conv2D(filters=4, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg))(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) x = Dense(value_fc_size, kernel_regularizer=l2(l2_reg), activation="relu")(x) value_out = Dense(1, kernel_regularizer=l2(l2_reg), activation="tanh", name="value_out")(x) # compile model model = Model(in_x, [policy_out, value_out]) sgd = optimizers.SGD(lr=learning_rate, momentum=momentum) losses = ['categorical_crossentropy', 'mean_squared_error'] model.compile(loss=losses, optimizer='adam', metrics=['accuracy', 'mae']) model.summary() return model
class QNetwork: def __init__(self, config: Config) -> None: self.config = config self.digest = None def build(self) -> None: mc = self.config.model in_x = x = Input((4, 5, 5)) x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(x) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) for _ in range(mc.res_layer_num): x = self._build_residual_block(x) res_out = x # for policy output x = Conv2D(filters=2, kernel_size=1, data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) # no output for 'pass' out = Dense(100, kernel_regularizer=l2(mc.l2_reg), activation="softmax", name="out")(x) # x = Dense(mc.value_fc_size, kernel_regularizer=l2(mc.l2_reg), # activation="relu")(x) # value_out = Dense(1, kernel_regularizer=l2(mc.l2_reg), # activation="tanh", name="value_out")(x) self.model = Model(in_x, out, name="slipe_model") self.model.compile(loss='mse', optimizer=Adam(lr=mc.learning_rate)) self.model.summary() def _build_residual_block(self, x): mc = self.config.model in_x = x x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(x) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(x) x = BatchNormalization(axis=1)(x) x = Add()([in_x, x]) x = Activation("relu")(x) return x # 重みの学習 def replay(self, memory: Memory, batch_size: int, gamma: float, targetQN: 'QNetwork') -> None: inputs = np.zeros((batch_size, 4, 5, 5)) targets = np.zeros((batch_size, 100)) mini_batch = memory.sample(batch_size) for i, (state_b, action_b, reward_b, next_state_b) in enumerate(mini_batch): inputs[i] = state_b # shape=(4, 5, 5) target = reward_b # type: int # if not (next_state_b == 0).all(): # 価値計算(DDQNにも対応できるように、行動決定のQネットワークと価値関数のQネットワークは分離) retmainQs = self.model.predict(next_state_b) next_action = np.argmax(retmainQs) # 最大の報酬を返す行動を選択する target = reward_b + gamma * \ targetQN.model.predict(next_state_b)[0][next_action] targets[i] = self.model.predict(state_b)[0][0] # Qネットワークの出力 # 教師信号 action_b: int <= 100 targets[i, action_b] = target # epochsは訓練データの反復回数、verbose=0は表示なしの設定 self.model.fit(inputs, targets, epochs=1, verbose=0) @staticmethod def fetch_digest(weight_path: str): if os.path.exists(weight_path): m = hashlib.sha256() with open(weight_path, "rb") as f: m.update(f.read()) return m.hexdigest() def load(self, config_path: str, weight_path: str) -> bool: if os.path.exists(weight_path): # os.path.exists(config_path) and logger.debug(f"loading model from {config_path}") with open(config_path, "rt") as f: self.model = Model.from_config(json.load(f)) self.model.load_weights(weight_path) self.model.compile( loss='mse', optimizer=Adam(lr=self.config.model.learning_rate)) self.model.summary() self.digest = self.fetch_digest(weight_path) logger.debug(f"loaded model digest = {self.digest}") return True else: logger.debug( f"model files does not exist at {config_path} and {weight_path}" ) return False def save(self, config_path: str, weight_path: str) -> None: logger.debug(f"save model to {config_path}") with open(config_path, "wt") as f: json.dump(self.model.get_config(), f) self.model.save_weights(weight_path) self.digest = self.fetch_digest(weight_path) logger.debug(f"saved model digest {self.digest}")
x = MaxPooling2D((2, 2), strides=(2, 2))(x) x = Dropout(0.3)(x) x = Flatten()(x) x = Dense(512, activation=None)(x) x = BatchNormalization()(x) x = advanced_activations.LeakyReLU(alpha=0.1)(x) logits = Dense(num_classes, activation=None)(x) output = Activation('softmax')(logits) opt = keras.optimizers.Adam(lr=0.003, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model = Model(input_layer, output) model.summary() plot_model(model, show_shapes=True, to_file='teacher_model.png') model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) print('Using real-time data augmentation.') datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening zca_epsilon=1e-06, # epsilon for ZCA whitening rotation_range=20, # randomly rotate images in the range (degrees, 0 to 180)
class AE(Model): """ Autoencoder. This is a simple autoencoder consisting of an encoder and a decoder. You can use the class like this: >>> encoder = ... >>> decoder = ... >>> ae = Autoencoder(encoder=encoder, decoder=decoder) >>> ae.compile(...) >>> ae.fit(...) """ def __init__(self, encoder=None, decoder=None, autoencoder=None): super(AE, self).__init__() # For calling this as a super-constructor. parameters = [encoder, decoder] if all(v is None for v in parameters): return # From loading. if encoder != None and decoder != None and autoencoder != None: self.encoder = encoder self.decoder = decoder self.autoencoder = autoencoder return # Check preconditions. assert len(encoder.outputs) == 1 assert len(decoder.inputs) == 1 assert encoder.outputs[0].shape[1:] == decoder.inputs[0].shape[ 1:], str(encoder.outputs[0].shape) + " " + str( decoder.inputs[0].shape) self.latent_dim = encoder.outputs[0].shape[1] self.encoder = encoder self.decoder = decoder # Creating the AE. inputs = self.encoder.inputs[0] outputs = self.decoder(self.encoder(inputs)) self.autoencoder = Model(inputs, outputs, name='ae') def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """ Compiles the model. This is the same as compilation in Keras. """ assert "reconstruction_loss" not in kwargs, "Not expected to use reconstruction_loss in AE." self.autoencoder.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ Trains the autoencoder. """ return self.autoencoder.fit(x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs) def fit_generator(self, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """ Trains the autoencoder with a generator. """ return self.autoencoder.fit_generator( generator, steps_per_epoch, epochs, verbose=verbose, callbacks=callbacks, validation_data=validation_data, validation_steps=validation_steps, class_weight=class_weight, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, shuffle=shuffle, initial_epoch=initial_epoch) def evaluate(self, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None): """ Evaluates the autoencoder. """ return self.autoencoder.evaluate(x, y, batch_size, verbose, sample_weight, steps=None) def predict(self, x, batch_size=None, verbose=0, steps=None): """ Does a prediction. This is the same as :func:`~ngdlm.models.AE.predict_reconstruct_from_samples` """ return self.predict_reconstruct_from_samples(x, batch_size, verbose, steps) def predict_reconstruct_from_samples(self, x, batch_size=None, verbose=0, steps=None): """ Reconstructs samples. Samples are firstly mapped to latent space using the encoder. The resulting latent vectors are then mapped to reconstruction space via the decoder. """ return self.autoencoder.predict(x, batch_size, verbose, steps) def predict_embed_samples_into_latent(self, x, batch_size=None, verbose=0, steps=None): """ Embeds samples into latent space using the encoder. """ return self.encoder.predict(x, batch_size, verbose, steps) def predict_reconstruct_from_latent(self, x, batch_size=None, verbose=0, steps=None): """ Maps latent vectors to reconstruction space using the decoder. """ return self.decoder.predict(x, batch_size, verbose, steps) def summary(self): """ Provides a summary. """ print("Encoder:") self.encoder.summary() print("Decoder:") self.decoder.summary() print("Autoencoder:") self.autoencoder.summary() def save(self, path): """ Saves the autoencoder. This includes the whole autoencoder plus the encoder and the decoder. The encoder and decoder use the path plus a respective annotation. This code >>> ae.save("myae.h5") will create the files *myae.h5*, *myae-encoder.h5*, and *myae-decoder.h5*. """ self.autoencoder.save(path) self.encoder.save(append_to_filepath(path, "-encoder")) self.decoder.save(append_to_filepath(path, "-decoder"))
def test_model_with_input_feed_tensor(): """We test building a model with a TF variable as input. We should be able to call fit, evaluate, predict, by only passing them data for the placeholder inputs in the model. """ import tensorflow as tf input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) model.summary() optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=['mean_squared_error'], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch(input_b_np, [output_a_np, output_b_np]) out = model.train_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.predict_on_batch({'input_b': input_b_np}) # test fit out = model.fit({'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=10) out = model.fit(input_b_np, [output_a_np, output_b_np], epochs=1, batch_size=10) # test evaluate out = model.evaluate({'input_b': input_b_np}, [output_a_np, output_b_np], batch_size=10) out = model.evaluate(input_b_np, [output_a_np, output_b_np], batch_size=10) # test predict out = model.predict({'input_b': input_b_np}, batch_size=10) out = model.predict(input_b_np, batch_size=10) assert len(out) == 2 # Now test a model with a single input # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Same, without learning phase # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4)
class TL(Model): """ Triplet-Loss trained Neural Network. https://arxiv.org/abs/1503.03832 """ def __init__(self, base=None, siamese=None): super(TL, self).__init__() # Store the base model. assert (base != None) self.base = base # For loading. if base != None and siamese != None: self.base = base self.siamese = siamese self.latent_dim = self.base.outputs[0].shape[1] return # Get the latent dimension. assert len(self.base.outputs) == 1 assert len(self.base.outputs[0].shape) == 2 self.latent_dim = self.base.outputs[0].shape[1] # Get the input shape. input_shape = self.base.inputs[0].shape.as_list()[1:] # Create the anchor. input_anchor = layers.Input(shape=input_shape) output_anchor = input_anchor output_anchor = self.base(output_anchor) # Create the positive. input_positive = layers.Input(shape=input_shape) output_positive = input_positive output_positive = self.base(output_positive) # Create the negative. input_negative = layers.Input(shape=input_shape) output_negative = input_negative output_negative = self.base(output_negative) # Create a dummy output. output = layers.concatenate( [output_anchor, output_positive, output_negative]) # Create the model. self.siamese = Model([input_anchor, input_positive, input_negative], output, name="triplet_model") def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, triplet_loss="mse", **kwargs): """ Compiles the TL. Additionally to the default functionality of *compile*, it adds the triplet-loss. In order to do so you have to provide it via the parameter *triplet_loss*. The VAE loss is similar to >>> vae_loss = max(0.0, pos_dist - neg_dist + alpha) See the literature for details. Additional args: triplet_loss (string): The base-loss for the triplet-loss. Values are either *euclidean* for euclidean norm or *cosine* for cosine similarity. """ assert loss == None, "Not expected to provide an explicit loss for TL. Use 'triplet_loss'" self.triplet_loss = triplet_loss def triplet_loss_function(y_true, y_pred, alpha=0.4): anchor = y_pred[:, 0:self.latent_dim] positive = y_pred[:, self.latent_dim:self.latent_dim * 2] negative = y_pred[:, self.latent_dim * 2:self.latent_dim * 3] if triplet_loss == "euclidean": pos_dist = euclidean_loss(positive, anchor) neg_dist = euclidean_loss(negative, anchor) elif triplet_loss == "cosine": pos_dist = cosine_loss(positive, anchor) neg_dist = cosine_loss(negative, anchor) else: raise Exception("Unexpected: " + triplet_loss) basic_loss = pos_dist - neg_dist + alpha loss = K.maximum(basic_loss, 0.0) return loss loss = triplet_loss_function self.siamese.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit(self, x=None, y=None, batch_size=None, minibatch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ This is basically the same as in vanilla Keras. Additional args: minibatch_size (int): The model internally does some sampling. The *minibatch_size* specifies how many candidates to use in order to create a triplet for training. """ assert minibatch_size != None, "ERROR! Must provide 'minibatch_size'." assert steps_per_epoch != None, "ERROR! Must provide 'steps_per_epoch'." assert validation_steps != None, "ERROR! Must provide 'validation_steps'." y_dummy = np.zeros((batch_size, self.latent_dim * 3)) # Template generator. def triplet_loss_generator(x_generator, y_generator, model, sampling): # Get the classes. classes = sorted(list(set(y_generator))) # Sort by classes for easy indexing. class_indices = {} for c in classes: class_indices[c] = [] for index, c in enumerate(y_generator): class_indices[c].append(index) # Compute the complements. class_complements = {} for c in classes: class_complements[c] = [c2 for c2 in classes if c2 != c] # Generator loop. while True: x_input_anchors = [] x_input_positives = [] x_input_negatives = [] # Generate a whole batch. for _ in range(batch_size): anchor_class = random.choice(classes) anchor_index = random.choice(class_indices[anchor_class]) anchor_input = x_generator[anchor_index] #print("anchor_class", anchor_class) anchor_latent = self.base.predict( np.expand_dims(anchor_input, axis=0))[0] # Generate some positive candidates. positive_candidates = [] while len(positive_candidates) < minibatch_size: positive_class = anchor_class positive_index = random.choice( class_indices[positive_class]) positive_input = x_generator[positive_index] assert positive_class == y_generator[positive_index] #print("positive_class", positive_class) positive_candidates.append(positive_input) # Find the farthest positive candidate. positive_candidates = np.array(positive_candidates) positive_latents = self.base.predict(positive_candidates) positive_extremum = compute_latent_extremum( anchor_latent, positive_latents, "argmax", self.triplet_loss) positive_input = positive_candidates[positive_extremum] # Generate some negative candidates. negative_candidates = [] while len(negative_candidates) < minibatch_size: negative_class = random.choice( class_complements[anchor_class]) negative_index = random.choice( class_indices[negative_class]) negative_input = x_generator[negative_index] assert negative_class == y_generator[negative_index] #print("negative_class", negative_class) negative_candidates.append(negative_input) # Find the closest negative candidate. negative_candidates = np.array(negative_candidates) negative_latents = self.base.predict(negative_candidates) negative_extremum = compute_latent_extremum( anchor_latent, negative_latents, "argmin", self.triplet_loss) negative_input = negative_candidates[negative_extremum] # Done. x_input_anchors.append(anchor_input) x_input_positives.append(positive_input) x_input_negatives.append(negative_input) x_input_anchors = np.array(x_input_anchors) x_input_positives = np.array(x_input_positives) x_input_negatives = np.array(x_input_negatives) x_input = [ x_input_anchors, x_input_positives, x_input_negatives ] yield x_input, y_dummy # Create the generators. training_generator = triplet_loss_generator(x, y, batch_size, self.siamese) if validation_data != None: validation_generator = triplet_loss_generator( validation_data[0], validation_data[1], batch_size, self.siamese) else: validation_generator = None # Create the history. history_keys = ["loss", "val_loss"] history = {} for history_key in history_keys: history[history_key] = [] # Training the model for epoch in range(epochs): print("Epoch " + str(epoch + 1) + "/" + str(epochs) + "...") # Generating data for training. training_input, training_output = next(training_generator) if validation_generator != None: validation_input, validation_output = next( validation_generator) model_history = self.siamese.fit( training_input, training_output, validation_data=(validation_input, validation_output), epochs=1, steps_per_epoch=steps_per_epoch, verbose=0, validation_steps=validation_steps) # Update the history. for history_key in history_keys: history_value = model_history.history[history_key] history[history_key].append(history_value) print(history_key, history_value) return history def fit_generator(self, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """ Coming soon... """ print("TODO: implement fit_generator!") raise Exception("Not implemented!") return self.siamese.fit_generator(generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch) def evaluate(self, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None): """ Evaluates the model. Same as vanilla Keras. """ return self.siamese.evaluate(x, y, batch_size, verbose, sample_weight, steps=None) def predict(self, x, batch_size=None, verbose=0, steps=None): """ Does a prediction. Same as vanilla Keras. """ return self.siamese.predict(x, batch_size, verbose, steps) def summary(self): """ Provides a summary. """ print("Basemodel:") self.base.summary() print("Siamese model:") self.siamese.summary() def save(self, path): """ Saves the TL. This includes the whole Siamese Net plus the base-model. This code >>> tl.save("myae.h5") will create the files *tl.h5*, and *tl-base.h5*. """ self.siamese.save(path) self.base.save(append_to_filepath(path, "-base"))
loss = "mse" metrics = ["mae"] labels_train = y_train labels_test = y_test resulting_layer = permute_apply else: loss = "categorical_crossentropy" metrics = ["accuracy"] labels_train = enc_p_train.astype(int) labels_test = enc_p_test.astype(int) resulting_layer = sinkhorn model = Model(input, resulting_layer) model.compile(loss=loss, optimizer="adam", metrics=metrics) print(model.summary()) print("Fitting...") history = model.fit(x_train, labels_train, batch_size=args.batch_size, epochs=args.epochs, verbose=1, validation_split=0.1, callbacks=[callbacks.EarlyStopping(min_delta=0.00001, verbose=1), callbacks.ReduceLROnPlateau(verbose=1)]) K.set_learning_phase(0) print("********************************************************************") N = 5
class RLPlayer: """ Implements a player that uses neural networks to play chess. Attributes: """ def __init__(self, config = None): """ Class constructor for minimax player. """ self.board = None self.my_color = None self.model = None if config == None: self.config_model = neural_network_config.ModelConfig() self.config = neural_network_config.Config() self.node_lock = defaultdict(Lock) self.game_tree = {} # Set up multiprocessing for speed self.feed_input, self.return_policy_value = self.create_pipes() # Build a model self.build_model() losses = ['categorical_crossentropy', 'mean_squared_error'] # avoid overfit for supervised self.model.compile(optimizer=Adam(), loss=losses, loss_weights=self.config.trainer.loss_weights) # Dictionary to facilitate converting from network outputs to move self.move_code = {i: chess.Move.from_uci(move) for move, i in zip(neural_network_config.create_uci_labels(), range(len(neural_network_config.create_uci_labels())))} self.move_lookup = {chess.Move.from_uci(move): i for move, i in zip(neural_network_config.create_uci_labels(), range(len(neural_network_config.create_uci_labels())))} # Start a thread to listen on the pipe and make predictions self.prediction_worker = Thread(target=self._predict_batch_worker, name = "prediction_worker") self.prediction_worker.daemon = True self.prediction_worker.start() def create_pipes(self): self.feed_input, self.return_policy_value = [], [] for thread in range(30): me, you = Pipe() self.feed_input.append(me) self.return_policy_value.append(you) return self.feed_input, self.return_policy_value def get_move(self): # Perform Monte-Carlo Tree Search (updating internal variables) self.MCTS() # Choose the most visited node (highest exponentiated visited) state = state_key(self.board) candidate_moves = self.game_tree[state]['action'] # Temperature controls exploration depending on stage of game board_input, move_counts = self.save_move(candidate_moves) if self.board.fullmove_number < 45: if self.board.fullmove_number < 30: temperature = 0.95 ** self.board.fullmove_number else: temperature = 0.1 exp_move_counts = np.power(move_counts, 1./temperature) exp_move_counts /= np.sum(exp_move_counts) else: exp_move_counts = np.zeros_like(move_counts) exp_move_counts[np.argmax(move_counts)] = 1 move = np.random.choice([move for move in candidate_moves], p = exp_move_counts) self.board.push(move) # Return data (useful only for self-play generation) normalized_move_counts = move_counts / np.sum(move_counts) return board_input, normalized_move_counts def build_model(self): """ Builds the full Keras model and stores it in self.model. """ mc = self.config_model in_x = x = Input((18, 8, 8)) # (batch, channels, height, width) x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_first_filter_size, padding="same", data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg), name="input_conv-"+str(mc.cnn_first_filter_size)+"-"+str(mc.cnn_filter_num))(x) x = BatchNormalization(axis=1, name="input_batchnorm")(x) x = Activation("relu", name="input_relu")(x) for i in range(mc.res_layer_num): x = self._build_residual_block(x, i + 1) res_out = x # for policy output x = Conv2D(filters=2, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg), name="policy_conv-1-2")(res_out) x = BatchNormalization(axis=1, name="policy_batchnorm")(x) x = Activation("relu", name="policy_relu")(x) x = Flatten(name="policy_flatten")(x) # no output for 'pass' policy_out = Dense(self.config.n_labels, kernel_regularizer=l2(mc.l2_reg), activation="softmax", name="policy_out")(x) # for value output x = Conv2D(filters=4, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg), name="value_conv-1-4")(res_out) x = BatchNormalization(axis=1, name="value_batchnorm")(x) x = Activation("relu",name="value_relu")(x) x = Flatten(name="value_flatten")(x) x = Dense(mc.value_fc_size, kernel_regularizer=l2(mc.l2_reg), activation="relu", name="value_dense")(x) value_out = Dense(1, kernel_regularizer=l2(mc.l2_reg), activation="tanh", name="value_out")(x) self.model = Model(in_x, [policy_out, value_out], name="chess_model") def _build_residual_block(self, x, index): mc = self.config_model in_x = x res_name = "res"+str(index) x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg), name=res_name+"_conv1-"+str(mc.cnn_filter_size)+"-"+str(mc.cnn_filter_num))(x) x = BatchNormalization(axis=1, name=res_name+"_batchnorm1")(x) x = Activation("relu",name=res_name+"_relu1")(x) x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg), name=res_name+"_conv2-"+str(mc.cnn_filter_size)+"-"+str(mc.cnn_filter_num))(x) x = BatchNormalization(axis=1, name="res"+str(index)+"_batchnorm2")(x) x = Add(name=res_name+"_add")([in_x, x]) x = Activation("relu", name=res_name+"_relu2")(x) return x def visualize_model(self): """ Print out model summary (contains layer names, shape of input, number of parameters, and connection to) """ self.model.summary() def MCTS(self): """ Using 30 workers (max_workers=self.play_config.search_threads) self.play_config.simulation_num_per_move = 800 """ futures = [] with ThreadPoolExecutor(max_workers = 30) as executor: for _ in range(800): # self.select_move(board=self.board.copy(),is_root_node=True) future = executor.submit(self.select_move,board=self.board.copy(),is_root_node=True) # if future.exception(): # raise ValueError # The board is copied so I don't need to pop the move # vals = [f.result() for f in futures] def select_move(self, board, is_root_node=False): """ They use virtual_loss """ # print (self.node_lock) state = state_key(board) with self.node_lock[state]: if state not in self.game_tree: # print(state) policy, value = self.forward_pass(board) # print(policy, value) # if state not in self.game_tree: # print ("I'm evaluating leaf", board.move_stack) # self.game_tree[state] = {} self.game_tree[state]['policy'] = policy self.game_tree[state]['action'] = defaultdict(NodeStatistics) self.game_tree[state]['total_visits'] = 1 # print (self.game_tree) # # I must have visited once before to call best_q_move method return value action = self.best_q_move(board, is_root_node) # print (action) board.push(action) # Simulate enemy_move enemy_value = self.select_move(board) value = -enemy_value actions = self.game_tree[state]['action'] with self.node_lock[state]: self.game_tree[state]['total_visits'] += 1 actions[action].n += 1 actions[action].w += value actions[action].q = actions[action].w / actions[action].n return value def best_q_move(self, board, is_root_node): """ c_puct = 1.5 """ # print ("Hi") state = state_key(board) policy = self.game_tree[state]['policy'] actions = self.game_tree[state]['action'] unnormalized_prior = [policy[self.move_lookup[move]] for move in board.legal_moves] # print (unnormalized_prior) prior = unnormalized_prior / sum(unnormalized_prior) sqrt_total_visits = np.sqrt(self.game_tree[state]['total_visits']) c_puct = 1.5 dirichlet_alpha = 0.3 noise_eps = 0.25 best_q = -np.inf best_move = None num_legal_moves = len(list(board.legal_moves)) if is_root_node: dirichlet_noise = np.random.dirichlet([dirichlet_alpha] * num_legal_moves) for index, move in enumerate(board.legal_moves): candidate_q = (actions[move].q + c_puct * prior[index] * sqrt_total_visits / (1 + actions[move].n)) if is_root_node: #add noise for exploration candidate_q = ((1 - noise_eps) * candidate_q + noise_eps * dirichlet_noise[index]) if (best_q < candidate_q): best_q = candidate_q best_move = move return best_move def forward_pass(self, board): input_planes = self.board_to_input(board, board.turn) # print (input_planes) input_pipe = self.feed_input.pop() input_pipe.send(input_planes) policy, value = input_pipe.recv() self.feed_input.append(input_pipe) return policy, value def _predict_batch_worker(self): """ Thread worker which listens on each pipe in self.pipes for an observation, and then outputs the predictions for the policy and value networks when the observations come in. Repeats. ## CITE """ while True: ready = connection.wait(self.return_policy_value,timeout=0.001) if not ready: continue data, result_pipes = [], [] for pipe in ready: while pipe.poll(): data.append(pipe.recv()) result_pipes.append(pipe) data = np.asarray(data, dtype=np.float32) # print (data.shape) policy_array, value_array = self.model.predict_on_batch(data) # print (policy_array, value_array) for pipe, policy, value in zip(result_pipes, policy_array, value_array): pipe.send((policy, float(value))) def board_to_input(self, board, my_color = None): """ FIX YOUR COLOR PROBLEM: ASSUME THAT THE NEURAL NETWORK RECEIVES THE INPUT FROM WHITE'S PERSPECTIVE Input: 18 planes of size (8,8) representing the entire board Boolean values: first 6 planes represent my pawn, knight, bishop, rook, queen, king Next 6 planes represent opponent's pieces (in the same order) Next 4 planes represent my king queen castling and opponents king queen castling Next plane represents half move clock (50 move without pawn advance or piece capture is a draw) Next plane represents the en passant square (if available) """ if my_color == None: my_color = self.my_color pieces_planes = np.zeros(shape=(12, 8, 8), dtype=np.float32) board_colors = [not my_color, my_color] en_passant = np.zeros((8, 8), dtype=np.float32) # print (board_colors) if my_color == 0: for my_board, color in enumerate(board_colors): for piece in range(1, 7): my_piece_position = board.pieces(piece, color) rank, file = np.array([[(int(i / 8)) for i in list(my_piece_position) ], [(7-(i % 8)) for i in list(my_piece_position) ]]) pieces_planes[(piece - 1) + (my_board + 1) % 2 * 6, rank, file] = 1 if board.ep_square != None: en_passant[int(board.ep_square / 8), 7 - (board.ep_square % 8)] = 1 else: # print ("Yo my color is", my_color) for my_board, color in enumerate(board_colors): for piece in range(1, 7): my_piece_position = board.pieces(piece, color) rank, file = np.array([[(7 - int(i / 8)) for i in list(my_piece_position) ], [(i % 8) for i in list(my_piece_position) ]]) pieces_planes[(piece - 1) + (my_board + 1) % 2 * 6, rank, file] = 1 if board.ep_square != None: en_passant[7 - int(board.ep_square / 8), (board.ep_square % 8)] = 1 # print("Hi") auxiliary_planes = np.array([np.full((8, 8), board.has_kingside_castling_rights(my_color), dtype=np.float32), np.full((8, 8), board.has_queenside_castling_rights(my_color), dtype=np.float32), np.full((8, 8), board.has_kingside_castling_rights(not self.my_color), dtype=np.float32), np.full((8, 8), board.has_queenside_castling_rights(not my_color), dtype=np.float32), np.full((8, 8), board.halfmove_clock, dtype=np.float32), en_passant]) # print (np.vstack((pieces_planes, auxiliary_planes))) return (np.vstack((pieces_planes, auxiliary_planes))) def save_move(self, candidate_moves): """ Used by the self-play generator to generate move-policy data """ board_input = self.board_to_input(self.board.copy()) move_counts = np.array([candidate_moves[move].n for move in candidate_moves]) return board_input, move_counts