def _eager_train_step(self): """Runs training step ops in eager mode. """ with tf.GradientTape() as tape: def loss_f(): self._replay_net_outputs = self.online_convnet(tf.convert_to_tensor(self._replay.states, dtype=tf.uint8)) self._replay_next_target_net_outputs = self.target_convnet( self._replay.next_states) replay_action_one_hot = tf.one_hot( self._replay.actions, self.num_actions, 1., 0., name='action_one_hot') replay_chosen_q = tf.reduce_sum( self._replay_net_outputs.q_values * replay_action_one_hot, name='replay_chosen_q') target = tf.stop_gradient(self._build_target_q_op()) loss = tf.losses.Huber(reduction=tf.losses.Reduction.NONE)( target, replay_chosen_q) return loss # weights_f = lambda: self.network.trainable_weights # grad_f = tape.gradient(tf.py_function(lambda: loss_f(), inp=[], Tout=tf.float32), weights_f) # self.optimizer.apply_gradients(zip(grad_f, self.network.trainable_weights)) if self.summary_writer is not None: summary.scalar('HuberLoss', loss_f(), step=self.training_steps) self.optimizer.minimize(loss_f) # self.optimizer.minimize(tf.py_function(loss_f, inp=[], Tout=tf.float32), self.network.trainable_weights) summary.record_if( self.summary_writer is not None and self.training_steps > 0 and self.training_steps % self.summary_writing_frequency == 0 )
def step(self): self.steps += 1 ob_str = self.box_to_string(self.ob) a = self.act(ob_str) ob_next, r, done, _ = self.env.step(a) if self.visualise: self.env.render() ob_next_str = self.box_to_string(ob_next) reward_in = self.modify_reward(r, done) self.update_Q(ob_str, a, reward_in, ob_next_str, done) self.reward += r if done: self.episodes += 1 if self.tf_writer: with self.tf_writer.as_default(): tf_summary.scalar('episode reward', self.reward, step=self.episodes) if self.verbose: print( f"steps: {self.steps} - episode: {self.episodes} - r: {self.reward} - epsilon: {self.epsilon.value: .3f}" ) self.rewards.append(self.reward) self.epsilon.update_epsilon() self.reward = 0.0 self.ob = self.env.reset() else: self.ob = ob_next
def train(X_train, log_dir, writer): fixed_seed = make_noise(BATCH_SIZE) for epoch in range(NB_EPOCH): g_train_loss, d_train_loss = metrics.Mean(), metrics.Mean() for _ in range(TRAINING_RATIO): d_loss = train_discriminator(X_train) d_train_loss(d_loss) g_loss = train_generator() g_train_loss(g_loss) da_loss = -np.mean(np.array(d_train_loss.result())) ga_loss = -np.mean(np.array(g_train_loss.result())) g_train_loss.reset_states() d_train_loss.reset_states() with writer.as_default(): scalar("da_loss", da_loss, step=epoch) scalar("ga_loss", ga_loss, step=epoch) save_images(generator, fixed_seed, writer, epoch) writer.flush() print( f"Epoch: {epoch:05d}/{NB_EPOCH} = da_loss {da_loss:.5f}, ga_loss {ga_loss:.5f}" )
def host_call_fn(**kwargs): writer = contrib_summary.create_file_writer(summary_dir, max_queue=1000) always_record = contrib_summary.always_record_summaries() with writer.as_default(), always_record: for name, scalar in kwargs.items(): contrib_summary.scalar(name, tf.reduce_mean(input_tensor=scalar)) return contrib_summary.all_summary_ops()
def Train(self): if self.exp_replay.Length() < self.expreplaycap: return batch = self.exp_replay.RandomBatch(self.batchsize) batchLen = len(batch) batch = list(zip(*batch)) current_states = np.array(batch[0]) next_states = np.array(batch[3]) Qs = self.Q.predict( current_states.reshape(-1, self.input_shape[0], self.input_shape[1], self.input_shape[2])) targetQs = self.target_Q.predict( next_states.reshape(-1, self.input_shape[0], self.input_shape[1], self.input_shape[2])) x = np.zeros((batchLen, self.input_shape[0], self.input_shape[1])) y = np.zeros((batchLen, self.num_actions)) for i in range(batchLen): a = batch[1][i] reward = batch[2][i] targetQ = Qs[i] if batch[4][i] >= 1: targetQ[a] = reward else: targetQ[a] = reward + self.gamma * np.max(targetQs[i]) x[i] = current_states[i] y[i] = targetQ hist = self.TrainOnBatch( x.reshape(-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]), y) if self.steps % 500 == 0 and self.steps is not 0: with writer.as_default(): summary.scalar("Loss", hist[0], step=self.steps) summary.scalar("Accuracy", hist[1], step=self.steps) summary.scalar("Epsilon", self.epsilon, step=self.steps) summary.scalar("Last episode score", self.lastscore, step=self.steps) img = np.reshape(x[0:10], (-1, self.input_shape[0], self.input_shape[1], self.input_shape[2])) summary.image("State", img, max_outputs=1, step=self.steps) for act_i in range(self.num_actions): summary.scalar("Average Q for action {0}".format(act_i), np.average(Qs[:][act_i]), step=self.steps) writer.flush()
def _build_model(self): """ Build the TF graph :return: """ # Placeholder for input # Input are 4 RGB frames of shape 84, 84 each self.X_pl = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name='X') # Placeholder for TD Target Value self.y_pl = tf.placeholder(shape=[None], dtype=tf.float32, name='y') # Integer id of which action was selected self.actions_pl = tf.placeholder(shape=[None], dtype=tf.int32, name='actions') X = tf.to_float(self.X_pl) / 255.0 batch_size = tf.shape(self.X_pl)[0] # Three convolutional layers conv1 = layers.conv2d(X, 32, 8, 4, activation_fn=nn.relu) conv2 = layers.conv2d(conv1, 64, 4, 2, activation_fn=nn.relu) conv3 = layers.conv2d(conv2, 64, 3, 1, activation_fn=nn.relu) # FC layers flattened = layers.flatten(conv3) fc1 = layers.fully_connected(flattened, 512) self.predictions = layers.fully_connected(fc1, len(VALID_ACTIONS)) # Get the prediction for the chosen action only gather_indices = tf.range(batch_size) * tf.shape( self.predictions)[1] + self.actions_pl self.actions_preds = tf.gather(tf.reshape(self.predictions, [-1]), gather_indices) # Calculate loss self.losses = tf.squared_difference(self.y_pl, self.actions_preds) self.loss = tf.reduce_mean(self.losses) # Optimizer params # From original papers self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6) self.train_op = self.optimizer.minimize( self.loss, global_step=tf.train.get_global_step()) # Summaries for Tensorboard self.summaries = summary.merge([ summary.scalar('loss', self.loss), summary.histogram('loss_hist', self.losses), summary.histogram('q_values_hist', self.predictions), summary.scalar('max_q_value', tf.reduce_max(self.predictions)) ])
def train(self, NB_EPOCH, save_checkpoint, resume_from): if resume_from != 0: print(f"RESUMED_FROM: {resume_from}/{NB_EPOCH}") fixed_seed = np.load(os.path.join(self.log_dir, 'seed.npy')) fixed_seed = tf.constant(fixed_seed) resume_from += 1 else: fixed_seed = make_noise(self.sample_shape[0] * self.sample_shape[1], self.Z_SIZE) np.save(os.path.join(self.log_dir, 'seed'), fixed_seed.numpy()) print(f"Training for {NB_EPOCH} epochs, NB_BATCHES: {self.NB_BATCHES}") time_left = "is be determined" for epoch in range(resume_from, NB_EPOCH): ga_loss, da_loss = [], [] g_train_loss, d_train_loss = metrics.Mean(), metrics.Mean() start_time = time.time() for i, image_batch in enumerate(self.X_train.take(self.NB_BATCHES)): print(f"Epoch: {epoch:05d}/{NB_EPOCH} in progress {i}/{self.NB_BATCHES} ending {time_left}", end='\r') if self.tensor_to_img is True: image_batch = self.extract_images_tensor(self.data_dir, image_batch) for _ in range(self.TRAINING_RATIO): d_loss = self.train_discriminator(image_batch) d_train_loss(d_loss) g_loss = self.train_generator() g_train_loss(g_loss) da_loss.append(d_train_loss.result()) ga_loss.append(g_train_loss.result()) g_train_loss.reset_states() d_train_loss.reset_states() da_loss = -np.mean(da_loss) ga_loss = -np.mean(ga_loss) with self.writer.as_default(): scalar("da_loss", da_loss, step=epoch) scalar("ga_loss", ga_loss, step=epoch) self.save_images(self.generator, fixed_seed, self.writer, epoch) self.writer.flush() if epoch % save_checkpoint == 0: self.checkpoint.save(file_prefix=self.checkpoint_prefix) date = time.strftime('%d/%m/%Y %H:%M:%S') time_spent = time.time() - start_time time_left = "in " + format_time(time_spent * (NB_EPOCH - epoch)) time_spent = format_time(time_spent) if epoch % save_checkpoint == 0: print(f"Epoch CHK: {epoch:05d}/{NB_EPOCH} {date} = da_loss {da_loss:.5f}, ga_loss {ga_loss:.5f}, time_spent {time_spent}") else: print(f"Epoch : {epoch:05d}/{NB_EPOCH} {date} = da_loss {da_loss:.5f}, ga_loss {ga_loss:.5f}, time_spent {time_spent}")
def __init__(self, input_, initialLearningRate, loggerFactory_=None): """ :type input_: dict :type initialLearningRate: float """ assert 'x' in input_ and 'y' in input_ and 'numSeqs' in input_ assert initialLearningRate > 0 self.initialLearningRate = initialLearningRate self._lr = tf.Variable(self.initialLearningRate, name='learningRate') self.loggerFactory = loggerFactory_ self.print = print if loggerFactory_ is None else loggerFactory_.getLogger( 'Model').info self.print('Model name: ' + self.__class__.__name__) self.print('initial learning rate: %0.7f' % initialLearningRate) self.input = input_ self.x = input_['x'] self.y = input_['y'] self.numSeqs = input_['numSeqs'] self.vecDim = self.x.get_shape()[-1].value self.numClasses = self.y.get_shape()[-1].value self.outputs = [] self.layers = [] self.make_graph() with name_scope('predictions'): self.pred = tf.argmax(self.output, 1) self.trueY = tf.argmax(self.y, 1) with name_scope('metrics'): # self.l2Loss = self.l2RegLambda * sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.output, labels=self.y)) \ + self.l2Loss self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.pred, self.trueY), tf.float32)) summary.scalar('cost', self.cost) summary.scalar('accuracy', self.accuracy) with name_scope('optimizer'): self.optimizer = tf.train.AdamOptimizer( learning_rate=self._lr).minimize(self.cost) self.merged_summaries = summary.merge_all()
def _save_tensorboard_summaries(self, iteration, num_episodes_train, average_reward_train, num_episodes_eval, average_reward_eval): """Save statistics as tensorboard summaries. Args: iteration: int, The current iteration number. num_episodes_train: int, number of training episodes run. average_reward_train: float, The average training reward. num_episodes_eval: int, number of evaluation episodes run. average_reward_eval: float, The average evaluation reward. """ if tf.executing_eagerly(): with self._summary_writer.as_default(): tf_summary.scalar('Train/NumEpisodes', num_episodes_train, step=iteration) tf_summary.scalar('Train/AverageReturns', num_episodes_train, step=iteration) tf_summary.scalar('Eval/NumEpisodes', num_episodes_eval, step=iteration) tf_summary.scalar('Eval/AverageReturns', average_reward_eval, step=iteration) self._summary_writer.flush()
def on_epoch_end(self, epoch, logs=None): iter_no = tf_core.get_value(self.model.optimizer.iterations) train_loss = logs["loss"] val_loss = logs["val_loss"] scalar("train_loss", data=train_loss, step=iter_no) scalar("val_loss", data=val_loss, step=iter_no) self.iters_since_last_model_save += 1 if self.least_loss < 0 or train_loss < self.least_loss: self.least_loss = train_loss print("\nLoss decreased in iter {}".format(iter_no)) if self.iters_since_last_model_save > self.save_every: print("\nSaving model at iteration {} with loss {}".format( iter_no, train_loss)) # save_model(self.model, os.path.join(self.base_dir, "models", "model-{0:.4f}.h5".format(train_loss))) self.iters_since_last_model_save = 0
def add_summary(self, tensor, name, family, scalar=False, histogram=False): import tensorflow.summary as su if not (scalar ^ histogram): raise ValueError('must specify one of scalar or histogram') if scalar: self.summaries.append(su.scalar(name=name, tensor=tensor, family=family)) elif histogram: self.summaries.append(su.histogram(name=name, values=tensor, family=family))
def _build_train_op(self): """Builds a training op. Returns: train_op: An op performing one step of training from replay data. """ replay_action_one_hot = tf.one_hot( self._replay.actions, self.num_actions, 1., 0., name='action_one_hot') replay_chosen_q = tf.reduce_sum( self._replay_net_outputs.q_values * replay_action_one_hot, name='replay_chosen_q') target = tf.stop_gradient(self._build_target_q_op()) loss = tf.keras.losses.Huber( reduction=tf.losses.Reduction.NONE)(target, replay_chosen_q) if self.summary_writer is not None: summary.scalar('HuberLoss', tf.reduce_mean(loss)) return self.optimizer.minimize(tf.reduce_mean(loss))
def _save_tensorboard_summaries(self, iteration, num_episodes, average_reward): """Save statistics as tensorboard summaries.""" if tf.executing_eagerly(): with self._summary_writer.as_default(): tf_summary.scalar('Train/NumEpisodes', num_episodes, step=iteration) tf_summary.scalar('Train/AverageReturns', average_reward, step=iteration) else: summary = tf.Summary(value=[ tf.Summary.Value(tag='Train/NumEpisodes', simple_value=num_episodes), tf.Summary.Value(tag='Train/AverageReturns', simple_value=average_reward), ]) self._summary_writer.add_summary(summary, iteration) self._summary_writer.flush()
def assemble(self): #define all the summaries, visualizations, etc with tf.name_scope('summary'): summ.scalar('loss', self.loss_) #just to keep it consistent if not const.eager: self.summary = tf.summary.merge_all() else: self.summary = None self.evaluator = Munch(loss=self.loss_) self.build_vis() #these are the tensors which will be run for a single train, test, val step self.test_run = Munch(evaluator=self.evaluator, vis=self.vis, summary=self.summary) self.train_run = Munch(opt=self.opt, summary=self.summary) self.val_run = Munch(loss=self.loss_, summary=self.summary, vis=self.vis)
def boss(env, nb_AP, nb_Users, action_queues, matrix_queues, logger_folder, max_epsiode_steps): step = 0 writer = summary.create_file_writer('logs/' + logger_folder + '/boss') writer.set_as_default() summary.experimental.set_step(step) while True: step += max_epsiode_steps W = np.zeros((nb_AP, nb_Users)).astype('float32') for i in range(nb_AP): W[i:] = action_queues[i].get() W = W / np.linalg.norm(W, axis=1).reshape(W.shape[0], 1) for q in matrix_queues: q.put(W) env.set_W(W) r = np.sum(np.log2(1 + env.sinr())) summary.scalar(name='Episode/Reward', data=r, step=step) print( "********* \nReward {0:5.6f} Step {1: 6} norm {2: 4.5f}\n************" .format(np.sum(np.log2(1 + env.sinr())), step, np.linalg.norm(W)))
def train(self, FLAGS): # Set up optimizer with fallback train_step = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.lr).minimize(self.loss) tf.global_variables_initializer().run() # Set up logging loss_summary = summary.scalar('loss', self.loss) writer = summary.FileWriter(FLAGS.output_dir + '/logs', self.session.graph) # Load MNIST dataset dataset = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) n_batches = int(dataset.train.num_examples / FLAGS.bs) # Learn for epoch in range(FLAGS.epochs): train_loss = None val_loss = None # Iterate over batches for i in range(n_batches): inputs, labels = dataset.train.next_batch(FLAGS.bs) _, loss = self.session.run([train_step, self.loss], feed_dict={ self.inputs: inputs, self.labels: labels }) train_loss = loss # Log validation loss at every 100th minibatch or end of epoch if (i % 150 == 0) or (i == n_batches - 1): _, summary_str, val_loss = self.session.run([train_step, loss_summary, self.loss], feed_dict={ self.inputs: dataset.validation.images, self.labels: dataset.validation.labels }) # Log progress writer.add_summary(summary_str, epoch * n_batches + i) frac = (i + 1)/ n_batches sys.stdout.write('\r') sys.stdout.write((\ col(CYAN, 'Epoch (%d/%d):') + col(BOLD, '\t[%-10s] %d%% \t') + \ col(YELLOW, 'Train Loss:') + col(None, ' %.8f ') + '\t' + \ col(YELLOW, 'Val Loss:') + col(None, ' %.8f')) % \ (epoch + 1, FLAGS.epochs, '='*int(frac*10), int(frac*100), train_loss, val_loss)) sys.stdout.flush() sys.stdout.write('\n')
def get_writer(self, sess): def summary_details(fd): writer.add_summary(sess.run(model_merge, feed_dict=fd), self.writer_step) self.writer_step += 1 def summary_scores(s2v): p2v = {s2p[s]: s2v[s] for s in s2p.keys()} writer.add_summary(sess.run(score_merge, feed_dict=p2v), len(self.history)) import tensorflow.summary as su writer = su.FileWriter(self.writer_path, sess.graph) model_merge = su.merge_all() s2p = {t: tf.placeholder(dtype=tf.float32) for t in au.eval_scores} score_merge = su.merge( [su.scalar(s, p, family='eval') for s, p in s2p.items()]) return summary_details, summary_scores
def generate_labels(training_data, lookahead, range_cl, range_aapl): end = training_data.shape[0] #print('end: ', end) labels = np.zeros((end, 1)) for begin in np.arange(end): #if begin % 5000 == 0: # print('progress...', begin, ' records') #print("begin:", begin) close = training_data.iloc[begin, 3] #print('close at begin:', close) look_ahead_begin = begin + 1 look_ahead_end = (look_ahead_begin + lookahead) % end #print('look_ahead_end: ', look_ahead_end) look_ahead_data = training_data.loc[look_ahead_begin:look_ahead_end, ['Open', 'Close', 'High', 'Low', 'Symbol_AAPL', 'Symbol_CLZ16']] for rownum in range(look_ahead_begin,look_ahead_end): #print('rownum: ', rownum) row = look_ahead_data.loc[rownum,:] #print('got row: ', row) max_price = np.max(row) #print('maxprice: ', max_price) if row['Symbol_CLZ16'] == 1: if np.abs(close - max_price) >= range_cl: #print('range in CL: ', np.abs(close - max_price), ' Close: ', close, ' Max: ', max_price) labels[begin] = 1 #else: # labels[begin][1] = 1 elif row['Symbol_AAPL'] == 1: if np.abs(close - max_price) >= range_aapl: #print('range in AAPL: ', np.abs(close - max_price), ' Close: ', close, ' Max: ', max_price) labels[begin] = 1 #else: # labels[begin][1] = 1 #print('found ', np.sum(labels), 'possible trades') with tf.name_scope("labels"): summary.scalar("LookAheadPeriod", lookahead) summary.scalar("RangeCL", range_cl) summary.scalar("RangeAAPL", range_aapl) summary.scalar("PotentialTrades", np.sum(labels)) return labels
else: reward = 0 draws += 1 rewards.append(reward) episode_reward = np.sum(rewards) q_loss = engine_white.q_learn(reward, white_positions[-1], white_moves[-1], chess.Board('8/8/8/8/8/8/8/8 w - - 0 1')) q_losses.append(q_loss) # log diagnostic info with writer.as_default(): summary.scalar('Reward', episode_reward, step) summary.scalar('Result', reward / 100, step) summary.scalar('Average Loss', np.mean(q_losses), step) summary.scalar('P', engine_white.weights[0], step) summary.scalar('N', engine_white.weights[1], step) summary.scalar('B', engine_white.weights[2], step) summary.scalar('R', engine_white.weights[3], step) summary.scalar('Q', engine_white.weights[4], step) summary.scalar('M', engine_white.weights[6], step) if step % 10 == 0: # plot weights pieces = ['P', 'N', 'B', 'R', 'Q', 'K'] piece_weights = engine_white.weights[11:(11 + 64 * 6)].reshape( (64, 6)) max_weight = piece_weights.max()
optimizer_G.step() # counting metrics and output to tensorboard if phase == 'train': SR = (sr_img[0] + 1) / 2 SR = SR.cpu().detach().numpy() SR = SR.transpose(1, 2, 0) HR = (hr_img[0] + 1) / 2 HR = HR.cpu().detach().numpy() HR = HR.transpose(1, 2, 0) PSNR = compare_psnr(HR, SR, data_range=255) PIXEL_MAX = 255 (score, diff) = compare_ssim(SR, HR, full=True, multichannel=True) with train_summary_writer.as_default(): summary.scalar('train_loss_d', loss_d.item(), step=k_t) summary.scalar('train_loss_g', loss_g.item(), step=k_t) summary.scalar('train_advers_loss', adversarial_loss.item(), step=k_t) summary.scalar('train_content_loss', content_loss.item(), step=k_t) summary.scalar('train_psnr', PSNR, step=k_t) summary.scalar('train_ssim', score, step=k_t) k_t += 1 z += 1 if i % 50 == 0: print('\nPhase[{}], Epoch [{}/{}], Step [{}], Loss_D: {:.5f}, Loss_G: {:.5f}'.format(phase, epoch + 1, num_epoch, i + 1, loss_d.item(),
loss_g.backward()#retain_graph=True) optimizer_G.step() if (epoch+1) == 100: lrD = 0.000001 lrG = 0.00001 optimizer_G = optim.Adam(net_G.parameters(), lr=lrG) optimizer_D = optim.Adam(net_D.parameters(), lr=lrD) if (i+1) % 50 ==0: out_loss.write('{0} {1}\n'.format(loss_d.item(), loss_g.item())) # TENSORBOARD with train_summary_writer.as_default(): summary.scalar('loss_d', loss_d.item(), step=k) summary.scalar('loss_g', loss_g.item(), step=k) k += 1 print('Epoch [{}/{}], Step [{}], Loss_D: {:.5f}, Loss_G: {:.5f}'.format(epoch + 1, num_epoch, i + 1, loss_d.item(), loss_g.item())) # сохранение картинок if (i + 1) % 20 == 0: # % 2 torch.save(net_D.state_dict(), '/content/drive/MyDrive/data_download/dataset_images/modelD_3.pth') torch.save(net_G.state_dict(), '/content/drive/MyDrive/data_download/dataset_images/modelG_3.pth') if (i + 1) % 103 == 0: torch.save(net_D.state_dict(), '/content/drive/MyDrive/data_download/dataset_images/modelD_4.pth') torch.save(net_G.state_dict(), '/content/drive/MyDrive/data_download/dataset_images/modelG_4.pth') # сохранение картинок
def _write_logs(self, logs, index): with self.writer.as_default(): for name, value in logs.items(): summary.scalar(name, value, step=index) self.step += 1 self.writer.flush()
loss_fn = torch.nn.MSELoss() true_fl = None for i in range(NUM_EPOCHS): if i % EVAL_EPOCH == 0: mse_loss = eval_loss(torch.nn.MSELoss(reduce=None), net, trainX, trainY, None) fl = eval_loss(flow_loss, net, trainX, trainY, train_samples) spl = eval_loss(sp_loss, net, trainX, trainY, train_samples) print("{}, mse: {}, flow loss: {}, spl: {}".format( i, mse_loss, fl, spl)) # print("{}, mse: {}, flow loss: {}, true_fl: {}, spl: {}".format( # i, mse_loss, fl, true_fl, spl)) with tf_summary_writer.as_default(): tf_summary.scalar("mse_loss", mse_loss, step=i) tf_summary.scalar("flow_loss", fl, step=i) tf_summary.scalar("sp_loss", spl, step=i) if USE_FLOW is None: continue tidx = random.randint(0, len(trainX) - 1) xbatch = trainX[tidx] ybatch = trainY[tidx] sample = train_samples[tidx] pred = net(xbatch).squeeze(1) assert pred.shape == ybatch.shape if USE_FLOW == 3: loss = loss_fn1(sample, pred, ybatch) # loss = loss + 0.01*loss_fn2(pred, ybatch) loss = loss + 0.1 * loss_fn2(pred[1:2], ybatch[1:2])
def saver(val): nonlocal step_counter with self.summary_writer.as_default(): tf_summary.scalar(name, val, step=step_counter) step_counter += 1
def create_graph(n,K,dlambda,ns_hidden): """ create_graph函数用来创建神经网络 @para n 输入特征数 @para K 输出向量维度 @para dlambda 正则化小数 0.00001 @para ns_hidden [第一隐层数目,第二隐层数目,...最后一个隐层数目 ] @return 图,可用get_tensor_by_name等函数获得其中的各类结构,主要结构: 含义 类型 名称 1. 输入向量(占位符) tensor network/input:0 2. lambda 正则化因子 tensor network/regular:0 3. 线性转移矩阵 tensor network/W1:0 ~ Wm:0, m为层数。 4. 线性转移偏执 tensor network/b1:0 ~ bm:0, m为层数。 5. 线性转移结果z=aW+b tensor network/z1:0 ~ zm:0, m为层数。 6. 各隐层输出 a=f(z) tensor network/a1:0 ~ am:0, m为层数。 7. 输出向量 tensor network/output:0 8. 训练样本参考(占位符) tensor loss/tr_out:0 9. 网络代价 tensor loss/loss:0,不包括正则化 10.训练器 operation train/train """ ns_array = ns_hidden[:] #Output is the last layer, append to last ns_array.append(K) hidden_layer_size = len(ns_array) #-------------------------------------------------------------- #create graph graph = tf.Graph() with graph.as_default(): with tf.name_scope('network'): s = [n] a = [tf.placeholder(tf.float32,[None,s[0]],name="input")] W = [] b = [] z = [] punish = tf.constant(0.0, name='regular') for idx in range(0,hidden_layer_size) : s.append(int(ns_array[idx])) W.append(tf.Variable(tf.random_uniform([s[idx],s[idx+1]],0,1),\ name='W'+str(idx+1))) b.append(tf.Variable(tf.random_uniform([1,s[idx+1]],0,1),\ name='b'+str(idx+1))) z.append(tf.add(tf.matmul(a[idx],W[idx]) , b[idx],\ name='z'+str(idx+1))) if (idx < hidden_layer_size - 1): a_name = 'a'+str(idx+1) else: a_name = 'output' a.append(tf.nn.tanh(z[idx],name=a_name)) punish = punish + tf.reduce_sum(W[idx]**2) * dlambda tfs.histogram('W'+str(idx+1),W[idx]) tfs.histogram('b'+str(idx+1),b[idx]) tfs.histogram('a'+str(idx+1),a[idx+1]) #-------------------------------------------------------------- with tf.name_scope('loss'): y_ = tf.placeholder(tf.float32,[None,K],name="tr_out") pure_loss = tf.reduce_mean(tf.square(a[hidden_layer_size]-y_),\ name="pure_loss") loss = tf.add(pure_loss, punish, name="loss") tfs.scalar('loss',loss) tfs.scalar('punish',punish) tfs.scalar('pure_loss',pure_loss) with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer(name="optimizer") optimizer.minimize(loss,name="train") #记录网络被训练了多少样本次 train_times = tf.Variable(tf.zeros([1,1]), name='train_times') train_trunk = tf.placeholder(tf.float32,[None,1],\ name="train_trunk") tf.assign(train_times , train_trunk + train_times,\ name="train_times_add") merged_summary = tfs.merge_all() return {"graph":graph,"merged_summary":merged_summary}
def scalar_summary(self, tag, value, step): """Add scalar summary.""" with self.writer.as_default(): tf_summary.scalar(tag, value, step=step)
def log_scalar(name, value, step): """Log a scalar value to both MLflow and TensorBoard""" writer.add_summary(scalar(name, value).eval(), step) mlflow.log_metric(name, value)
def train_fit(data_set): """ function: 开启模型训练过程 :return: Model, Log """ summary_writer = summary.create_file_writer(log_dir) data_params = { 'batch_size': BATCH_SIZE, 'shuffle': SHUFFLE, 'num_workers': NUM_WORKS } devices = 'cuda' if cuda.is_available() else 'cpu' device(devices) optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.99) train_set = CustomDataset(data_set, token, TEXT_LEN) train_set_pt = DataLoader(train_set, **data_params) model = BertClass() ckpt = train.Checkpoint(transformer=model.trainable_variables, optimizer=optimizer) ckpt_manager = train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=MAX_TO_KEEP) def train_step(model_, id_, mk_, type_ids_, optimizer_, target_): with GradientTape() as tp: y_pred = model_(id_, mk_, type_ids_) loss_value = loss_fn(target=target_, output=y_pred) # y_pred = [round(y_p) for y_p in y_pred] acc = accuracy_fn(target_, y_pred) gradient = tp.gradient(loss_value, model.trainable_variables) optimizer_.apply_gradients(zip(gradient, model.trainable_variables)) return loss_value, np.array(acc).mean(), y_pred for epoch in range(1, EPOCHS + 1): for _, batch_data in enumerate(train_set_pt): ids = convert_to_tensor(batch_data['ids'].detach().numpy()) mask = convert_to_tensor(batch_data['mask'].detach().numpy()) token_type_ids = convert_to_tensor( batch_data['token_type_ids'].detach().numpy()) targets = convert_to_tensor( batch_data['targets'].detach().numpy()) loss, accuracy, pred = train_step(model_=model, id_=ids, mk_=mask, type_ids_=token_type_ids, optimizer_=optimizer, target_=targets) if _ % 20 == 0 and _ > 0: # 将loss和accuracy写入日志文件 # 日志每训练十批数据保存一次日志文件 print("epoch: {}, fit step: {}, loss: {}, accuracy: {}". format(epoch, _, loss, accuracy)) print("epoch is {}, predict: {}".format(epoch, pred)) if epoch % 2 == 0: # 模型每训练两轮保存一次 ckpt_manager.save(check_interval=True) with summary_writer.as_default(): summary.scalar(name="loss_value_step:{}".format(epoch), data=loss, step=epoch) with summary_writer.as_default(): summary.scalar(name='accuracy_value_step:{}'.format(epoch), data=accuracy, step=epoch)
def _write_aggregate_summaries(model_dir, global_step, eval_tag, aggregates_dict): """Writes text metrics as summaries.""" eval_dir = os.path.join(model_dir, eval_tag) summary_writer = contrib_summary.create_file_writer(eval_dir) with summary_writer.as_default(), \ contrib_summary.always_record_summaries(): for k, v in sorted(aggregates_dict[_ROUGE_METRIC].items()): contrib_summary.scalar("text_eval/%s-R" % k, v.mid.recall, step=global_step) contrib_summary.scalar("text_eval/%s-P" % k, v.mid.precision, step=global_step) contrib_summary.scalar("text_eval/%s-F" % k, v.mid.fmeasure, step=global_step) for k, v in sorted(aggregates_dict[_BLEU_METRIC].items()): contrib_summary.scalar("text_eval/%s" % k, v.mid.bleu, step=global_step) for k, v in sorted(aggregates_dict[_REPETITION_METRIC].items()): contrib_summary.scalar("text_eval/%s-T" % k, v.mid.target_ratio, step=global_step) contrib_summary.scalar("text_eval/%s-P" % k, v.mid.prediction_ratio, step=global_step) for k, v in sorted(aggregates_dict[_LENGTH_METRIC].items()): contrib_summary.scalar("text_eval/%s-T" % k, v.mid.target_length, step=global_step) contrib_summary.scalar("text_eval/%s-P" % k, v.mid.prediction_length, step=global_step) contrib_summary.scalar("text_eval/%s-R" % k, v.mid.relative_length, step=global_step)
def build_and_optimize( train_data: Data, val_data: Data, step_size: int, options: Options, options_dict: Dict[str, Union[str, float]]) -> Dict[str, Any]: """Builds an DeepGRP model with updated options, trains it and validates it. Used for hyperopt optimization. Args: train_data (deepgrp.preprocessing.Data): Training data. val_data (deepgrp.preprocessing.Data): Validation data. step_size (int): Window size for the final evaluation. options (Options): General hyperparameter. options_dict (Dict[str, Union[str, float]]): Varying hyperparameter. Returns: Dict[str, Any]: Dictionary with results (Hyperopt compatible). """ options = _update_options(options, options_dict) logdir = create_logdir(options) def _train_test(model): # pragma: no cover extra_callback = [hp.KerasCallback(logdir, options_dict)] training((train_data, val_data), options, model, logdir, extra_callback) K.clear_session() predictions = dgpred.predict_complete(step_size, options, logdir, val_data, use_mss=True) K.clear_session() is_not_na = np.logical_not(np.isnan(predictions[:, 0])) predictions_class = predictions[is_not_na].argmax(axis=1) dgpred.filter_segments(predictions_class, options.min_mss_len) _, metrics = dgpred.calculate_metrics( predictions_class, val_data.truelbl[:, is_not_na].argmax(axis=0)) return metrics results = { 'loss': np.inf, 'Metrics': None, 'options': options.todict(), 'logdir': None, 'status': STATUS_FAIL, 'error': "", } try: K.clear_session() model = create_model(options) file_writer = tf.summary.create_file_writer(logdir) file_writer.set_as_default() metrics = _train_test(model) tfsum.scalar('MCC', metrics['MCC'], step=0, description="Matthews correlation coefficient") except Exception as err: # pylint: disable=broad-except _LOGGER.exception("Error occurred while training") results["error"] = str(err) results["status"] = STATUS_FAIL else: results["logdir"] = logdir results["loss"] = -1 * metrics['MCC'] results["status"] = STATUS_OK results["Metrics"] = metrics if np.isnan(results["loss"]): results["status"] = STATUS_FAIL results["loss"] = np.inf finally: file_writer.close() if results["status"] == STATUS_FAIL and results["logdir"]: shutil.rmtree(results["logdir"]) return results
our_stats = [ "{0} {1}".format(k, round(float(v), ROUNDING)) for k, v in our_stats.items() ] guess = net.guess(X_OURS) guess_argmax = [np.argmax(a) for a in guess] # Output logger.info( "----------------------- TEST RESULTS ------------------------") logger.info("Network name: " + net.name) logger.info("Train stats: " + " ".join(train_stats)) logger.info("Test stats: " + " ".join(test_stats)) logger.info("Our stats: " + " ".join(our_stats)) logger.info("Guesses: " + str(guess_argmax)) logger.info( "-------------------------------------------------------------") # Reset keras.backend.clear_session() # Write guesses to log writer = tfs.create_file_writer(storage_loc + "our_data") with writer.as_default(): for i in range(10): for j in range(10): tfs.scalar("our_data_" + str(i), guess[i][j], step=j) writer.flush() logger.info("All training finished. Quitting")