def do_train_noweights(model, dataset_batches): optimizer = AdamOptimizer() loss_history = [] for (batch, (invec, labels)) in enumerate(dataset_batches.take(1000)): with tf.GradientTape() as tape: logits = model(invec, training=True) loss_value = sparse_softmax_cross_entropy(labels, logits) loss_history.append(loss_value.numpy()) grads = tape.gradient(loss_value, model.trainable_variables) optimizer.apply_gradients( zip(grads, model.trainable_variables), global_step=tf.compat.v1.train.get_or_create_global_step()) return loss_history
def __init__(self, dimensions, batch_size, initialize_loss=True, lr=0.0001, lr_stair_width=10, lr_decay=0.95): self.batch_size = batch_size self.dimensions = dimensions self.scale_factor = 2 self.layer_params = [] self.inputs = placeholder( tf.float32, [batch_size, dimensions[1], dimensions[0], 3], name='input_images' ) scaled_inputs = self.inputs / 256.0 print("inputs shape: " + str(self.inputs.get_shape())) resized = resize_bicubic( scaled_inputs, [dimensions[1] * self.scale_factor, dimensions[0] * self.scale_factor], name="scale_bicubic") self.layer_params.append({ 'filter_count': 64 * 3, 'filter_shape': [9, 9] }) patch_extraction_layer = self.conv_layer("patch_extraction", self.layer_params[-1], resized) self.layer_params.append({ 'filter_count': 32 * 3, 'filter_shape': [1, 1] }) non_linear_mapping_layer = self.conv_layer("non_linear_mapping_layer", self.layer_params[-1], patch_extraction_layer) self.layer_params.append({ 'filter_count': 3, 'filter_shape': [5, 5] }) self.output = self.conv_layer("reconstruction_layer", self.layer_params[-1], non_linear_mapping_layer, linear=True) if initialize_loss: self.real_images = placeholder(tf.float32, [self.batch_size, dimensions[1] * self.scale_factor, dimensions[0] * self.scale_factor, 3], name='real_images') self.loss = self.get_loss() self.summary = tf.summary.scalar("loss", self.loss) self.epoch = placeholder(tf.int32, name='epoch') self.learning_rate = exponential_decay( lr, self.epoch, lr_stair_width, lr_decay, staircase=True) self.optimized = AdamOptimizer( self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08).minimize(self.loss) self.sess = Session() self.saver = Saver()
def __init__(self, action_size, env_name): self.env_name = env_name # 상태와 행동의 크기 정의 self.state_size = (84, 84, 4) self.action_size = action_size # A3C 하이퍼파라미터 self.discount_factor = 0.99 self.no_op_steps = 30 self.lr = 1e-4 # 쓰레드의 갯수 self.threads = 16 # 글로벌 인공신경망 생성 self.global_model = ActorCritic(self.action_size, self.state_size) # 글로벌 인공신경망의 가중치 초기화 self.global_model.build(tf.TensorShape((None, *self.state_size))) # 인공신경망 업데이트하는 옵티마이저 함수 생성 self.optimizer = AdamOptimizer(self.lr, use_locking=True) # 텐서보드 설정 self.writer = tf.summary.create_file_writer('summary/breakout_a3c') # 학습된 글로벌신경망 모델을 저장할 경로 설정 self.model_path = os.path.join(os.getcwd(), 'save_model', 'model')
# -- imports -- import tensorflow as tf from tensorflow.compat.v1 import placeholder from tensorflow.compat.v1.train import AdamOptimizer # -- placeholders -- x = placeholder(dtype=tf.float32, shape=[None, None]) y = placeholder(dtype=tf.float32, shape=[None]) # -- variables -- # f(x) = xm + b m = tf.Variable([[0.1], [0.2]]) b = tf.Variable(0.3) # -- induction -- f1 = tf.matmul(x, m) + b fx = tf.reshape(f1, [-1]) # -- loss -- # let's use RMS as our error function rms_error = tf.sqrt(tf.reduce_mean(tf.square(fx - y))) learn = AdamOptimizer(0.01).minimize(rms_error) def printEquation(sess): print("f(x) = x * m + b") print("m =", sess.run(m)) print("b =", sess.run(b))
def AlexNet(input_shape, num_classes, learning_rate, graph): """ Construct the AlexNet model. input_shape: The shape of input (`list` like) num_classes: The number of output classes (`int`) learning_rate: learning rate for optimizer (`float`) graph: The tf computation graph (`tf.Graph`) """ with graph.as_default(): X = tf.placeholder(tf.float32, input_shape, name='X') Y = tf.placeholder(tf.float32, [None, 10], name='Y') print("net___input_shape=", input_shape) DROP_RATE = tf.placeholder(tf.float32, name='drop_rate') X = tf.reshape(X, [-1, 28, 28, 1]) # 1st Layer: Conv (w ReLu) -> Lrn -> Pool # conv1 = conv(X, 11, 11, 96, 4, 4, padding='VALID', name='conv1') # conv include Add biases Apply relu function # def conv(x, filter_height, filter_width, num_filters, # stride_y, stride_x, name, padding='SAME', groups=1): conv1 = conv(X, 5, 5, 20, 1, 1, padding='VALID', name='conv1') # norm1 = lrn(conv1, 2, 2e-05, 0.75, name='norm1') # pool1 = max_pool(norm1, 3, 3, 2, 2, padding='VALID', name='pool1') # 2nd Layer: Conv (w ReLu) -> Lrn -> Pool with 2 groups # conv2 = conv(norm1, 5, 5, 64, 1, 1, groups=2, name='conv2') conv2 = conv(conv1, 5, 5, 50, 1, 1, padding='VALID', name='conv2') # norm2 = lrn(conv2, 2, 2e-05, 0.75, name='norm2') # 论文里写为平均池化 pool3 = max_pool(conv2, 2, 2, 2, 2, padding='VALID', name='pool3') # 6th Layer: Flatten -> FC (w ReLu) -> Dropout # flattened = tf.reshape(pool5, [-1, 6*6*256]) # fc6 = fc(flattened, 6*6*256, 4096, name='fc6') # h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1) # flattened = tf.reshape(pool3, [-1, 1 * 1 * 256]) flattened = tf.reshape(pool3, [-1, 10 * 10 * 50]) # h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) # fc_layer(x, input_size, output_size, name, relu=true) fc4 = fc_layer(flattened, 10 * 10 * 50, 256, name='fc4') dropout4 = dropout(fc4, DROP_RATE) # 7th Layer: FC (w ReLu) -> Dropout # fc7 = fc(dropout6, 4096, 4096, name='fc7') # fc5 = fc_layer(dropout4, 1024, 2048, name='fc5') # dropout5 = dropout(fc5, DROP_RATE) # 8th Layer: FC and return unscaled activations logits = fc_layer(dropout4, 256, num_classes, relu=False, name='fc5') # loss and optimizer loss_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y)) optimizer = AdamOptimizer( learning_rate=learning_rate) train_op = optimizer.minimize(loss_op) # Evaluate model prediction = tf.nn.softmax(logits) pred = tf.argmax(prediction, 1) # accuracy correct_pred = tf.equal(pred, tf.argmax(Y, 1)) accuracy = tf.reduce_mean( tf.cast(correct_pred, tf.float32)) return X, Y, DROP_RATE, train_op, loss_op, accuracy
def AlexNet(input_shape, num_classes, learning_rate, graph): """ Construct the AlexNet model. input_shape: The shape of input (`list` like) num_classes: The number of output classes (`int`) learning_rate: learning rate for optimizer (`float`) graph: The tf computation graph (`tf.Graph`) """ with graph.as_default(): X = tf.placeholder(tf.float32, input_shape, name='X') Y = tf.placeholder(tf.float32, [None, num_classes], name='Y') DROP_RATE = tf.placeholder(tf.float32, name='drop_rate') # 1st Layer: Conv (w ReLu) -> Lrn -> Pool # conv1 = conv(X, 11, 11, 96, 4, 4, padding='VALID', name='conv1') conv1 = conv(X, 11, 11, 96, 2, 2, name='conv1') norm1 = lrn(conv1, 2, 2e-05, 0.75, name='norm1') pool1 = max_pool(norm1, 3, 3, 2, 2, padding='VALID', name='pool1') # 2nd Layer: Conv (w ReLu) -> Lrn -> Pool with 2 groups conv2 = conv(pool1, 5, 5, 256, 1, 1, groups=2, name='conv2') norm2 = lrn(conv2, 2, 2e-05, 0.75, name='norm2') pool2 = max_pool(norm2, 3, 3, 2, 2, padding='VALID', name='pool2') # 3rd Layer: Conv (w ReLu) conv3 = conv(pool2, 3, 3, 384, 1, 1, name='conv3') # 4th Layer: Conv (w ReLu) splitted into two groups conv4 = conv(conv3, 3, 3, 384, 1, 1, groups=2, name='conv4') # 5th Layer: Conv (w ReLu) -> Pool splitted into two groups conv5 = conv(conv4, 3, 3, 256, 1, 1, groups=2, name='conv5') pool5 = max_pool(conv5, 3, 3, 2, 2, padding='VALID', name='pool5') # 6th Layer: Flatten -> FC (w ReLu) -> Dropout # flattened = tf.reshape(pool5, [-1, 6*6*256]) # fc6 = fc(flattened, 6*6*256, 4096, name='fc6') flattened = tf.reshape(pool5, [-1, 1 * 1 * 256]) fc6 = fc_layer(flattened, 1 * 1 * 256, 1024, name='fc6') dropout6 = dropout(fc6, DROP_RATE) # 7th Layer: FC (w ReLu) -> Dropout # fc7 = fc(dropout6, 4096, 4096, name='fc7') fc7 = fc_layer(dropout6, 1024, 2048, name='fc7') dropout7 = dropout(fc7, DROP_RATE) # 8th Layer: FC and return unscaled activations logits = fc_layer(dropout7, 2048, num_classes, relu=False, name='fc8') # loss and optimizer loss_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y)) optimizer = AdamOptimizer( learning_rate=learning_rate) train_op = optimizer.minimize(loss_op) # Evaluate model prediction = tf.nn.softmax(logits) pred = tf.argmax(prediction, 1) # accuracy correct_pred = tf.equal(pred, tf.argmax(Y, 1)) accuracy = tf.reduce_mean( tf.cast(correct_pred, tf.float32)) return X, Y, DROP_RATE, train_op, loss_op, accuracy
def __init__(self, observation_size, net_arch, initializer, activation, clip_range, value_coef, entropy_coef, learning_rate, pre_training_learning_rate, action_bounds, policy): """ :param observation_size: :param net_arch: :param initializer: :param activation: :param clip_range: :param value_coef: :param entropy_coef: :param learning_rate: :param pre_training_learning_rate: :param action_bounds: :param policy: """ """Set class constants""" self.observation_size = observation_size self.net_arch = net_arch self.initializer = initializer self.activation = activation self.clip_range = clip_range self.value_coef = value_coef self.entropy_coef = entropy_coef if action_bounds is None: action_bounds = [0.0, 1.5] self.action_bounds = action_bounds self.learning_rate = learning_rate self.pre_training_learning_rate = pre_training_learning_rate if policy is None: policy = GaussFull() self.policy = policy """Set up the tensorflow graph""" self.graph = Graph() with self.graph.as_default(): self.sess = Session(graph=self.graph) """ core """ # place holders self.observation_string_ph = placeholder( shape=(None, 1), dtype=string, name="observation_string_ph") self.action_ph = placeholder(dtype=float32, shape=(None, 1), name="action_ph") self.old_neg_logits = placeholder(dtype=float32, shape=(None, 1), name="old_neg_logits") self.advantage_ph = placeholder(dtype=float32, shape=(None, 1), name="advantage_ph") self.value_target_ph = placeholder(dtype=float32, shape=(None, 1), name="value_target_ph") # learning rate tensors self.learning_rate_ph = placeholder_with_default( input=self.learning_rate, shape=()) self.pre_training_learning_rate_ph = placeholder_with_default( input=self.pre_training_learning_rate, shape=()) # observation tensor replaced1 = regex_replace(self.observation_string_ph, "/", "_") replaced2 = regex_replace(replaced1, r"\+", "-") byte_tensor = decode_base64(replaced2) decoded = decode_raw(byte_tensor, out_type=float32) squeezed = squeeze(decoded, axis=1) self.observation_input = ensure_shape( squeezed, shape=(None, self.observation_size), name="observation_input") # policy net latent_policy = net_core(self.observation_input, self.net_arch, self.initializer, self.activation) self.policy.construct(latent_policy=latent_policy) self.clipped_action = clip_by_value( cast(self.policy.action, float32), self.action_bounds[0], self.action_bounds[1], "clipped_action") # value net latent_value = net_core(self.observation_input, self.net_arch, self.initializer, self.activation) self.value = identity( input=Dense(units=1, activation=None, kernel_initializer=self.initializer)(latent_value), name="value") """loss calculation""" # policy loss self.neg_logits = self.policy.neg_logits_from_actions( self.action_ph) ratio = exp(self.old_neg_logits - self.neg_logits) standardized_adv = (self.advantage_ph - reduce_mean( self.advantage_ph)) / (reduce_std(self.advantage_ph) + 1e-8) raw_policy_loss = -standardized_adv * ratio clipped_policy_loss = -standardized_adv * clip_by_value( ratio, 1 - self.clip_range, 1 + self.clip_range) self.policy_loss = reduce_mean( maximum(raw_policy_loss, clipped_policy_loss)) self.value_loss = mean_squared_error(self.value_target_ph, self.value) # entropy loss self.entropy_loss = -reduce_mean(self.policy.entropy) # total loss self.total_loss = self.policy_loss + self.value_coef * self.value_loss + self.entropy_coef * self.entropy_loss # optimizer optimizer = AdamOptimizer(learning_rate=self.learning_rate_ph) # training ops self.training_op = optimizer.minimize(self.total_loss) # pre training self.dist_param_target_ph = placeholder( dtype=float32, shape=(None, self.policy.dist_params.shape[1]), name="dist_param_label_ph") self.pre_training_loss = mean_squared_error( self.dist_param_target_ph, self.policy.dist_params) pre_training_optimizer = GradientDescentOptimizer( learning_rate=self.pre_training_learning_rate_ph) self.pre_training_op = pre_training_optimizer.minimize( self.pre_training_loss) """utility nodes""" # inspect model weights self.trainable_variables = trainable_variables() # saviour self.saver = Saver() # tensorboard summaries self.summary = merge([ histogram("values", self.value), histogram("advantages", standardized_adv), histogram("actions", self.clipped_action), histogram("det_actions", replace_nan(self.policy.det_action, 0.0)), histogram("value_targets", self.value_target_ph), scalar("policy_loss", self.policy_loss), scalar("value_loss", self.value_loss), scalar("entropy_loss", self.entropy_loss) ]) self.pre_summary = merge([ histogram("pretraining_actions", self.clipped_action), scalar("pretraining_loss", self.pre_training_loss) ]) # initialization init = global_variables_initializer() self.sess.run(init)
def build(self, word_length, num_labels, num_intent_labels, word_vocab_size, char_vocab_size, word_emb_dims=100, char_emb_dims=30, char_lstm_dims=30, tagger_lstm_dims=100, dropout=0.2): self.word_length = word_length self.num_labels = num_labels self.num_intent_labels = num_intent_labels self.word_vocab_size = word_vocab_size self.char_vocab_size = char_vocab_size words_input = Input(shape=(None, ), name='words_input') embedding_layer = Embedding(word_vocab_size, word_emb_dims, name='word_embedding') word_embeddings = embedding_layer(words_input) word_embeddings = Dropout(dropout)(word_embeddings) word_chars_input = Input(shape=(None, word_length), name='word_chars_input') char_embedding_layer = Embedding(char_vocab_size, char_emb_dims, input_length=word_length, name='char_embedding') char_embeddings = char_embedding_layer(word_chars_input) char_embeddings = TimeDistributed(Bidirectional( LSTM(char_lstm_dims)))(char_embeddings) char_embeddings = Dropout(dropout)(char_embeddings) # first BiLSTM layer (used for intent classification) first_bilstm_layer = Bidirectional( LSTM(tagger_lstm_dims, return_sequences=True, return_state=True)) first_lstm_out = first_bilstm_layer(word_embeddings) lstm_y_sequence = first_lstm_out[:1][ 0] # save y states of the LSTM layer states = first_lstm_out[1:] hf, _, hb, _ = states # extract last hidden states h_state = concatenate([hf, hb], axis=-1) intents = Dense(num_intent_labels, activation='softmax', name='intent_classifier_output')(h_state) # create the 2nd feature vectors combined_features = concatenate([lstm_y_sequence, char_embeddings], axis=-1) # 2nd BiLSTM layer (used for entity/slots classification) second_bilstm_layer = Bidirectional( LSTM(tagger_lstm_dims, return_sequences=True))(combined_features) second_bilstm_layer = Dropout(dropout)(second_bilstm_layer) bilstm_out = Dense(num_labels)(second_bilstm_layer) # feed BiLSTM vectors into crf_layer.CRF crf_layer.CRF = crf_layer.CRF(num_labels, name='intent_slot_crf_layer.CRF') entities = crf_layer.CRF(bilstm_out) model = Model(inputs=[words_input, word_chars_input], outputs=[intents, entities]) loss_f = { 'intent_classifier_output': 'categorical_crossentropy', 'intent_slot_crf_layer.CRF': crf_layer.CRF.loss } metrics = { 'intent_classifier_output': 'categorical_accuracy', 'intent_slot_crf_layer.CRF': crf_layer.CRF.viterbi_accuracy } model.compile(loss=loss_f, optimizer=AdamOptimizer(), metrics=metrics) self.model = model
def optimize_graph(y, y_conv): return AdamOptimizer(1e-3).minimize( reduce_mean(sigmoid_cross_entropy_with_logits(labels = y, logits = y_conv)))
def do_train(model, dataset_batches, k, epochs=None, num_epochs=None, optimizer_params=None, saveloc=None): optimizer = AdamOptimizer(**optimizer_params) loss_history = [] label_losses_history = [] if num_epochs: epochs = range(num_epochs) #weights = tf.constant(np.ones((32, 1))) weights_dict = {0: 1., 1: 1., 2: 1., 3: 1.} for epoch in epochs: for (batch, (invec, labels, _)) in enumerate(tqdm(dataset_batches.take(k))): if labels.shape[0] < 32: continue weights = np.array( [weights_dict[labels[i].numpy()] for i in range(32)]) # NOTE: is this tape in the right place? with tf.GradientTape() as tape: logits = model(invec, training=True) loss_value = sparse_softmax_cross_entropy(labels, logits, weights=weights) indices_vec = [[ i for i in range(32) if labels[i].numpy() == label ] for label in [0, 1, 2, 3]] losses = [ sparse_softmax_cross_entropy(labels.numpy()[indices], logits.numpy()[indices], weights=weights[indices]) for indices in indices_vec ] weights_dict = weights_for_losses(losses) loss_history.append(loss_value.numpy()) label_losses_history.append([x.numpy() for x in losses]) grads = tape.gradient(loss_value, model.trainable_variables) optimizer.apply_gradients( zip(grads, model.trainable_variables), global_step=tf.compat.v1.train.get_or_create_global_step()) prefix = (f'{saveloc}/epoch_{str(epoch).zfill(3)}' f'_batch_{str(batch).zfill(5)}') if batch % 10 == 0: save_model(model, (f'{prefix}_model.h5')) to_json_local(loss_history, f'{prefix}_train_loss_history.json') to_json_local(label_losses_history, f'{prefix}_train_label_losses_history.json') return loss_history
import matplotlib.pyplot as plt #Импортируем модуль для визуализации logging.set_verbosity(logging.ERROR) #Убираем лишние предупреждения model = Sequential([ #Создаём модель последовательных слоёв для обучения #Функция активации этого слоя для одномерных данных - выпрямитель Dense( 64, activation='relu', input_dim=2 ), #Второй слой принимает двухмерного инпут (вход+выход) и передаёт результаты 64 нейронам (спрятанный слой) #Функция активации этого слоя для одномерных данных - сигмоида Dense(1, activation='sigmoid') ]) #Третий слой принимает инфу из 64 нейронов спрятанного слоя и выдаёт результат на 1 нейрон #Добавляем конфигурацию к модели model.compile( optimizer=AdamOptimizer( 0.01), #Используем в нашей модели оптимизатор Адама с шагом в 0.01 loss='mse', #Функция потери - среднее квадратичное ошибки metrics=['binary_accuracy'] ) #Собираем метрику - бинарная точность, вероятность, что модель выберет "правильный" результат data = array([[0, 0], [0, 1], [1, 0], [1, 1]], "float32") #4 разных входных состояний в виде двухмерного массива labels = array([[0], [1], [1], [0]], "float32") #4 выхода, соответствующим входным данным hist = model.fit( data, labels, epochs=10, verbose=2 ) #Обучение модели, 10 эпох, размер набора по дефолту 32, печатаем 1 линию для каждой эпохи fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8)) #Создаём 2 графика
from tensorflow.compat.v2.keras.optimizers import SGD from tensorflow.compat.v1.math import softmax from tensorflow.compat.v1.math import argmax # Train the model # -------------------------------------- # Training file training_data = IrisTrainingData('iris_training.csv') # Test Model model = IrisModel(hidden_size=10, feature_size=training_data.feature_size, class_count=training_data.class_count) model.compile(optimizer=AdamOptimizer(0.01), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(training_data.data, training_data.labels, epochs=500, batch_size=32) # Evaluate model # -------------------------------------- evaluate_model(model, training_data) # Evaluate a few single iris samples # -------------------------------------- # This is from test data. It should be an iris virginica (2) print('\n\nTest iris 1:')
b_fc1 = bias_variable([256]) h_pool_flat = tf.reshape(h_pool, [-1, 10 * 10 * 50]) print("h_pool_flat_shape=", h_pool_flat.shape) h_fc1 = tf.nn.relu(tf.matmul(h_pool_flat, w_fc1) + b_fc1) keep_prob = tf.placeholder("float32") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # 第五层:输出层 w_fc2 = weight_variable([256, 10]) b_fc2 = bias_variable([10]) y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, w_fc2) + b_fc2) # 最小化交叉熵损失 loss_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_conv, labels=y_)) optimizer = AdamOptimizer(learning_rate=1e-4) train_op = optimizer.minimize(loss_op) # Evaluate model prediction = tf.nn.softmax(y_) pred = tf.argmax(prediction, 1) # 计算准确率:一样返回True,否则返回False correct_pred = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) sess = tf.Session() # tf.global_variables_initializer writer = tf.summary.FileWriter('./mylogs', sess.graph) sess.run(tf.initialize_all_variables()) # 下载mnist的手写数字的数据集