def __init__(self, config: Config, shape): width = shape[1] height = shape[0] n_classes = config.get_value("n_classes", 13) n_channels = config.get_value("n_channels", 2) act = selu w_init = scaled_elu_initialization # tf Graph input X = tf.placeholder(tf.float32, [None, height, width, n_channels], name="Features") y_ = tf.placeholder(tf.float32, [None, n_classes], name="Labels") d = tf.placeholder(tf.float32) print(X.get_shape()) layers = list() layers.append(conv(X, w_init, act, k=3, s=1, out=32, id=1)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=32, id=2)) layers.append(maxpool(layers[-1], k=2, s=2, id=1)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=64, id=3)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=64, id=4)) layers.append(maxpool(layers[-1], k=2, s=2, id=2)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=96, id=5)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=96, id=6)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=96, id=7)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=96, id=8)) layers.append(maxpool(layers[-1], k=2, s=2, id=3)) # dense layers layers.append( fc(layers[-1], w_init, act, units=128, flatten=True, id=1)) layers.append(dropout(layers[-1], d, act, id=1)) layers.append( fc(layers[-1], w_init, act, units=128, flatten=False, id=2)) layers.append(dropout(layers[-1], d, act, id=2)) layers.append( fc(layers[-1], w_init, tf.identity, units=n_classes, flatten=False, id=3)) # publish self.X = X self.y_ = y_ self.dropout = d self.output = layers[-1].get_output() self.layers = layers
def main(): config = Config() working_dir = os.path.join(config.working_dir, config.specs) working_dir = os.path.join(working_dir, dt.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")) make_sure_path_exists(working_dir) with open(os.path.join(working_dir, 'log.txt'), 'a') as logfile: sys.stdout = Tee(sys.stdout, logfile, sys.stdout) bl_config = config.get_value('bl_config') logger.configure(os.path.join(working_dir, 'baselines'), ['tensorboard', 'log', 'stdout']) train(env_id=bl_config['env'], num_timesteps=bl_config['num_timesteps'], policy=config.get_value('policy'), working_dir=working_dir, config=config) sys.stdout.flush()
def main(_): config = Config() np.random.seed(config.get_value("random_seed", 12345)) # PARAMETERS n_epochs = config.get_value("epochs", 100) batchsize = config.get_value("batchsize", 8) n_classes = config.get_value("n_classes", 3) dropout = config.get_value("dropout", 0.25) # TODO num_threads = config.get_value("num_threads", 0) # zzxue initial_val = config.get_value("initial_val", True) # READER, LOADER readers = invoke_dataset_from_config(config) reader_train = readers["train"] reader_val = readers["val"] train_loader = torch.utils.data.DataLoader(reader_train, batch_size=config.batchsize, shuffle=True, num_workers=num_threads) val_loader = torch.utils.data.DataLoader(reader_val, batch_size=1, shuffle=False, num_workers=num_threads) feats = np.zeros([24080, 256]) vafeats = np.zeros([2625,256]) with tf.Session() as sess: saver = tf.train.import_meta_graph("./model/checkpoint-752500.ckpt.meta") #保存的模型路径 saver.restore(sess, "./model/checkpoint-752500.ckpt") graph = tf.get_default_graph() tensor_name_list = [tensor.name for tensor in graph.as_graph_def().node]# 得到当前图中所有变量的名称 x_holder = graph.get_tensor_by_name("Features:0") # 获取占位符 fc3_features=graph.get_tensor_by_name("FC-2_2/selu/mul_1:0") #获取要提取的特征,用名字FC-3_2/IdentityFC-2_2/selu/mul_1:0 keep_prob=graph.get_tensor_by_name("Placeholder:0") #Labels:0 for mbi, mb in enumerate(train_loader): feature = sess.run(fc3_features, feed_dict={x_holder:mb['input'].numpy(),keep_prob:dropout}) # # if mbi==1502: # feats[24032:24035,:]=feature # else: # feats[mbi*16:(mbi+1)*16,:]=feature feats[mbi*16:(mbi+1)*16,:]=feature for vbi, vmb in enumerate(val_loader): valfeature = sess.run(fc3_features, feed_dict={x_holder:vmb['input'].squeeze().numpy(),keep_prob:dropout}) val_feat=np.mat(valfeature) vafeats[vbi,:]=np.mean(val_feat,0) # if vbi==667: # vafeats[2668:2670]=valfeature # else: # vafeats[vbi*4:(vbi+1)*4,:]=valfeature numpy.savetxt("traindata.txt", feats); numpy.savetxt("validationdata.txt", vafeats); a=1 return feature
def __init__(self, config: Config, shape): width = shape[1] height = shape[0] output_units = config.get_value("n_classes", 3) n_channels = config.get_value("n_channels", 2) activation = selu weight_init = scaled_elu_initialization n_full1 = config.get_value("n_full1", 256) n_full2 = config.get_value("n_full1", 256) # tf Graph input, compatible to DenseNet X = tf.placeholder(tf.float32, [None, height, width, n_channels], name="Features") y_ = tf.placeholder(tf.float32, [None, output_units], name="Labels") d = tf.placeholder(tf.float32) layers = list() layers.append(conv(X, weight_init, activation, k=3, s=2, out=32, id=1)) layers.append(maxpool(layers[-1], k=2, s=2, id=1)) blk1 = layers[-1] blk1_bmp = layers[-2] print("Block 1: {}".format(blk1.get_output_shape())) layers.append( conv(layers[-1], weight_init, activation, k=3, s=2, out=64, id=2)) layers.append( conv(layers[-1], weight_init, activation, k=3, s=1, out=64, id=3)) layers.append( conv(layers[-1], weight_init, activation, k=3, s=1, out=64, id=4)) layers.append( conv(layers[-1], weight_init, activation, k=3, s=1, out=64, id=5)) layers.append(maxpool(layers[-1], k=2, s=2, id=2)) blk2 = layers[-1] blk2_bmp = layers[-2] print("Block 2: {}".format(blk2.get_output_shape())) layers.append( conv(layers[-1], weight_init, activation, k=3, s=1, out=128, id=6)) layers.append( conv(layers[-1], weight_init, activation, k=3, s=1, out=128, id=7)) layers.append( conv(layers[-1], weight_init, activation, k=3, s=1, out=128, id=8)) blk3 = layers[-1] print("Block 3: {}".format(blk3.get_output_shape())) # global average pooling layers.append(global_average(blk1, id=1)) layers.append(global_average(blk2, id=2)) layers.append(global_average(blk3, id=3)) # concat layers.append(ConcatLayer(layers[-3:], name="ConcatAverage")) print("Concat: {}".format(layers[-1].get_output_shape())) # FC layers.append( fc(layers[-1], weight_init, activation, n_full1, flatten=True, id=1)) layers.append(dropout(layers[-1], d, activation, 2)) print("FC 1: {}".format(layers[-1].get_output_shape())) layers.append( fc(layers[-1], weight_init, activation, n_full2, flatten=False, id=2)) layers.append(dropout(layers[-1], d, activation, 3)) print("FC 2: {}".format(layers[-1].get_output_shape())) layers.append( fc(layers[-1], weight_init, tf.identity, output_units, flatten=False, id=3)) # publish self.X = X self.y_ = y_ self.dropout = d self.blk1_bmp = blk1_bmp self.blk2_bmp = blk2_bmp self.blk3 = blk3 self.fc1 = layers[-5] self.fc2 = layers[-3] self.out = layers[-1] self.layers = layers self.output = layers[-1].get_output()
def __init__(self, config: Config, shape): width = shape[1] height = shape[0] n_classes = config.get_value("n_classes", 13) a = config.get_value('a', 5) act = tf.nn.relu w_init = weight_xavier_conv2d # tf Graph input X = tf.placeholder(tf.float32, [None, height, width, 2], name="Features") y_ = tf.placeholder(tf.float32, [None, n_classes], name="Labels") # dropout d = tf.placeholder(tf.float32) d1 = tf.cond(tf.equal(d, tf.constant(0, dtype=tf.float32)), lambda: tf.constant(0, dtype=tf.float32), lambda: tf.constant(0.2, dtype=tf.float32)) d2 = tf.cond(tf.equal(d, tf.constant(0, dtype=tf.float32)), lambda: tf.constant(0, dtype=tf.float32), lambda: tf.constant(0.5, dtype=tf.float32)) print(X.get_shape()) layers = list() layers.append(avgpool(X, k=3, s=2, id=1)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=32, id=1)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=64, id=2)) layers.append(maxpool(layers[-1], k=3, s=2, id=1)) layers.append(dropout(layers[-1], d1, act, id=1)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=64, id=3)) layers.append(maxpool(layers[-1], k=3, s=2, id=2)) layers.append(dropout(layers[-1], d1, act, id=2)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=128, id=4)) layers.append(maxpool(layers[-1], k=3, s=2, id=3)) layers.append(dropout(layers[-1], d1, act, id=3)) layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=128, id=5)) layers.append(maxpool(layers[-1], k=3, s=2, id=4)) layers.append(dropout(layers[-1], d1, act, id=4)) layers.append(conv(layers[-1], w_init, act, k=1, s=1, out=1000, id=6)) layers.append(dropout(layers[-1], d2, act, id=5)) # intermediate output layer layers.append( conv(layers[-1], w_init, tf.identity, k=1, s=1, out=n_classes, id=7)) # noisyAnd pooling with tf.variable_scope('NoisyAND'): a = tf.get_variable(name='a', shape=[1], initializer=tf.constant_initializer(a), trainable=False) b = tf.get_variable(name='b', shape=[1, n_classes], initializer=tf.constant_initializer(0.0)) b = tf.clip_by_value(b, 0.0, 1.0) mean = tf.reduce_mean(tf.nn.sigmoid(layers[-1].get_output()), axis=[1, 2]) noisyAnd = (tf.nn.sigmoid(a * (mean - b)) - tf.nn.sigmoid(-a * b)) / \ (tf.sigmoid(a * (1 - b)) - tf.sigmoid(-a * b)) # output layer layers.append( fc(layers[-1], tf.contrib.layers.xavier_initializer(uniform=False, seed=None, dtype=tf.float32), tf.identity, units=n_classes, flatten=False, id=1)) # publish self.X = X self.y_ = y_ self.dropout = d self.output_nand = noisyAnd self.output = layers[-1].get_output(prev_layers=[layers[-2]]) self.layers = layers
def main(_): np.random.seed(0) rng = np.random.RandomState(seed=0) config = Config() # # Load Data # with Timer(name="Load data"): training_data = BouncingMNISTDataHandler( config, config.mnist_train_images, config.mnist_train_labels, rng) test_data = BouncingMNISTDataHandler( config, config.mnist_test_images, config.mnist_test_labels, rng) dataset = DataSet((config.batch_size, config.num_frames, config.image_size, config.image_size, 1), (config.batch_size, config.num_frames, config.image_size, config.image_size)) # Create new TeLL session with two summary writers tell = TeLLSession(config=config, summaries=["train", "validation"], model_params={"dataset": dataset}) # Get some members from the session for easier usage session = tell.tf_session summary_writer_train, summary_writer_validation = tell.tf_summaries["train"], tell.tf_summaries["validation"] model = tell.model workspace, config = tell.workspace, tell.config # Parameters learning_rate = config.get_value("learning_rate", 1e-3) iterations = config.get_value("iterations", 1000) batch_size = config.get_value("batch_size", 256) display_step = config.get_value("display_step", 10) calc_statistics = config.get_value("calc_statistics", False) blur_filter_size = config.get_value("blur_filter_size", None) training_summary_tensors = OrderedDict() # Define loss and optimizer #with tf.name_scope("Cost"): # sem_seg_loss, _ = image_crossentropy(pred=model.output, target=model.y_, # calc_statistics=calc_statistics, reduce_by="sum") # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(sem_seg_loss) # tf.summary.scalar("Loss", sem_seg_loss) # Evaluate model validation_summary_tensors = OrderedDict() # validationset always uses class weights for loss calculation with tf.name_scope('Cost'): blur_sampling_range = tf.placeholder(tf.float32) if blur_filter_size is not None: sem_seg_loss = blurred_cross_entropy(output=model.output, target=model.y_, filter_size=blur_filter_size, sampling_range=blur_sampling_range) else: sem_seg_loss, _ = image_crossentropy(pred=model.output, target=model.y_, reduce_by="mean", calc_statistics=calc_statistics) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(sem_seg_loss) iou, iou_op = tf.contrib.metrics.streaming_mean_iou( predictions=tf.squeeze(tf.arg_max(model.output, 4)), labels=tf.squeeze(model.y_), num_classes=model.output.get_shape()[-1]) loss_prot = tf.summary.scalar("Loss", sem_seg_loss) iou_prot = tf.summary.scalar("IoU", iou) train_summaries = tf.summary.merge([loss_prot]) valid_summaries = tf.summary.merge([loss_prot, iou_prot]) # Initialize tensorflow variables (either initializes them from scratch or restores from checkpoint) step = tell.initialize_tf_variables().global_step # ------------------------------------------------------------------------- # Start training # ------------------------------------------------------------------------- plot_elements_sym = list(model.get_plot_dict().values()) plot_elements = list() plot_ranges = model.get_plot_range_dict() try: while step < iterations: check_kill_file(workspace=workspace) batch_x, batch_y = training_data.GetBatch() i = step * batch_size if step % display_step == 0: mean_loss = 0. for j in range(10): test_x, test_y = test_data.GetBatch() summary, loss, _, *plot_elements = session.run([valid_summaries, sem_seg_loss, iou_op, *plot_elements_sym], feed_dict={model.X: test_x, model.y_feed: test_y, blur_sampling_range: 3.5}) summary_writer_validation.add_summary(summary, i) mean_loss += loss # Re-associate returned tensorflow values to plotting keys plot_dict = OrderedDict(zip(list(model.get_plot_dict().keys()), plot_elements)) # Plot outputs and cell states over frames if specified if config.store_states and 'ConvLSTMLayer_h' in plot_dict and step % config.plot_at == 0: convh = plot_dict['ConvLSTMLayer_h'] convrh = [c[0, :, :, 0] for c in convh] convrh = [convrh[:6], convrh[6:12], convrh[12:18], convrh[18:24], convrh[24:]] plot_args = dict(images=convrh, filename=os.path.join(workspace.get_result_dir(), "step{}_h.png".format(step))) plotter.set_plot_kwargs(plot_args) plotter.plot() if config.store_states and 'ConvLSTMLayer_c' in plot_dict and step % config.plot_at == 0: convc = plot_dict['ConvLSTMLayer_c'] convrc = [c[0, :, :, 0] for c in convc] convrc = [convrc[:6], convrc[6:12], convrc[12:18], convrc[18:24], convrc[24:]] plot_args = dict(images=convrc, filename=os.path.join(workspace.get_result_dir(), "step{}_c.png".format(step))) plotter.set_plot_kwargs(plot_args) plotter.plot() print('Validation Loss at step {}: {}'.format(i, mean_loss / 10)) summary, loss, _ = session.run([train_summaries, sem_seg_loss, optimizer], feed_dict={model.X: batch_x, model.y_feed: batch_y, blur_sampling_range: 3.5}) summary_writer_train.add_summary(summary, i) step += 1 print("Training Finished!") # Final Eval mean_loss = 0. for j in range(100): test_x, test_y = test_data.GetBatch() summary, loss, _ = session.run([valid_summaries, sem_seg_loss, iou_op], feed_dict={model.X: test_x, model.y_feed: test_y, blur_sampling_range: 3.5}) mean_loss += loss test_x, test_y = test_data.GetBatch() pred = session.run(tf.argmax(model.output, 4), feed_dict={model.X: test_x}) pred = to_color(pred) true = to_color(test_y) out = to_image(pred, true) for i in range(pred.shape[0]): imsave(tell.workspace.get_result_dir() + '/sample_{:02d}.png'.format(i), out[i,]) print("Validation Loss {}".format(mean_loss / 100)) except AbortRun: print("Aborting...") finally: tell.close(global_step=step) plotter.close()
def main(_): # ------------------------------------------------------------------------------------------------------------------ # Setup training # ------------------------------------------------------------------------------------------------------------------ # Initialize config, parses command line and reads specified file; also supports overriding of values from cmd config = Config() # # Prepare input data # # Make sure datareader is reproducible random_seed = config.get_value('random_seed', 12345) np.random.seed(random_seed) # not threadsafe, use rnd_gen object where possible rnd_gen = np.random.RandomState(seed=random_seed) # Set datareaders n_timesteps = config.get_value('mnist_n_timesteps', 20) # Load datasets for trainingset with Timer(name="Loading Data"): readers = initialize_datareaders(config, required=("train", "val")) # Set Preprocessing trainingset = Normalize(readers["train"], apply_to=['X', 'y']) validationset = Normalize(readers["val"], apply_to=['X', 'y']) # Set minibatch loaders trainingset = DataLoader(trainingset, batchsize=2, batchsize_method='zeropad', verbose=False) validationset = DataLoader(validationset, batchsize=2, batchsize_method='zeropad', verbose=False) # # Initialize TeLL session # tell = TeLLSession(config=config, summaries=["train", "validation"], model_params={"dataset": trainingset}) # Get some members from the session for easier usage sess = tell.tf_session summary_writer_train, summary_writer_validation = tell.tf_summaries["train"], tell.tf_summaries["validation"] model = tell.model workspace, config = tell.workspace, tell.config # # Define loss functions and update steps # print("Initializing loss calculation...") loss, _ = image_crossentropy(target=model.y_[:, 10:, :, :], pred=model.output[:, 10:, :, :, :], pixel_weights=model.pixel_weights[:, 10:, :, :], reduce_by='mean') train_summary = tf.summary.scalar("Training Loss", loss) # create summary to add to tensorboard # Loss function for validationset val_loss = loss val_loss_summary = tf.summary.scalar("Validation Loss", val_loss) # create summary to add to tensorboard # Regularization reg_penalty = regularize(layers=model.get_layers(), l1=config.l1, l2=config.l2, regularize_weights=True, regularize_biases=True) regpen_summary = tf.summary.scalar("Regularization Penalty", reg_penalty) # create summary to add to tensorboard # Update step for weights update = update_step(loss + reg_penalty, config) # # Initialize tensorflow variables (either initializes them from scratch or restores from checkpoint) # global_step = tell.initialize_tf_variables().global_step # # Set up plotting # (store tensors we want to plot in a dictionary for easier tensor-evaluation) # # We want to plot input, output and target for the 1st sample, 1st frame, and 1st channel in subplot 1 tensors_subplot1 = OrderedDict() tensors_subplot2 = OrderedDict() tensors_subplot3 = OrderedDict() for frame in range(n_timesteps): tensors_subplot1['input_{}'.format(frame)] = model.X[0, frame, :, :] tensors_subplot2['target_{}'.format(frame)] = model.y_[0, frame, :, :] - 1 tensors_subplot3['network_output_{}'.format(frame)] = tf.argmax(model.output[0, frame, :, :, :], axis=-1) - 1 # We also want to plot the cell states and hidden states for each frame (again of the 1st sample and 1st lstm unit) # in subplot 2 and 3 tensors_subplot4 = OrderedDict() tensors_subplot5 = OrderedDict() for frame in range(len(model.lstm_layer.c)): tensors_subplot4['hiddenstate_{}'.format(frame)] = model.lstm_layer.h[frame][0, :, :, 0] tensors_subplot5['cellstate_{}'.format(frame)] = model.lstm_layer.c[frame][0, :, :, 0] # Create a list to store all symbolic tensors for plotting plotting_tensors = list(tensors_subplot1.values()) + list(tensors_subplot2.values()) + \ list(tensors_subplot3.values()) + list(tensors_subplot4.values()) + \ list(tensors_subplot5.values()) # # Finalize graph # This makes our tensorflow graph read-only and prevents further additions to the graph # sess.graph.finalize() if sess.graph.finalized: print("Graph is finalized!") else: raise ValueError("Could not finalize graph!") sys.stdout.flush() # ------------------------------------------------------------------------------------------------------------------ # Start training # ------------------------------------------------------------------------------------------------------------------ try: epoch = int(global_step / trainingset.n_mbs) epochs = range(epoch, config.n_epochs) # Loop through epochs print("Starting training") for ep in epochs: epoch = ep print("Starting training epoch: {}".format(ep)) # Initialize variables for over-all loss per epoch train_loss = 0 # Load one minibatch at a time and perform a training step t_mb = Timer(verbose=True, name="Load Minibatch") mb_training = trainingset.batch_loader(rnd_gen=rnd_gen) # # Loop through minibatches # for mb_i, mb in enumerate(mb_training): sys.stdout.flush() # Print minibatch load time t_mb.print() # Abort if indicated by file check_kill_file(workspace) # # Calculate scores on validation set # if global_step % config.score_at == 0: print("Starting scoring on validation set...") evaluate_on_validation_set(validationset, global_step, sess, model, summary_writer_validation, val_loss_summary, val_loss, workspace) # # Perform weight updates and do plotting # if (mb_i % config.plot_at) == 0 and os.path.isfile(workspace.get_plot_file()): # Perform weight update, return summary values and values for plotting with Timer(verbose=True, name="Weight Update"): plotting_values = [] train_summ, regpen_summ, _, cur_loss, *plotting_values = sess.run( [train_summary, regpen_summary, update, loss, *plotting_tensors], feed_dict={model.X: mb['X'], model.y_: mb['y']}) # Add current summary values to tensorboard summary_writer_train.add_summary(train_summ, global_step=global_step) summary_writer_train.add_summary(regpen_summ, global_step=global_step) # Create and save subplot 1 (input) save_subplots(images=plotting_values[:len(tensors_subplot1)], subfigtitles=list(tensors_subplot1.keys()), subplotranges=[(0, 1)] * n_timesteps, colorbar=True, automatic_positioning=True, tight_layout=True, filename=os.path.join(workspace.get_result_dir(), "input_ep{}_mb{}.png".format(ep, mb_i))) del plotting_values[:len(tensors_subplot1)] # Create and save subplot 2 (target) save_subplots(images=plotting_values[:len(tensors_subplot2)], subfigtitles=list(tensors_subplot2.keys()), subplotranges=[(0, 10) * n_timesteps], colorbar=True, automatic_positioning=True, tight_layout=True, filename=os.path.join(workspace.get_result_dir(), "target_ep{}_mb{}.png".format(ep, mb_i))) del plotting_values[:len(tensors_subplot2)] # Create and save subplot 3 (output) save_subplots(images=plotting_values[:len(tensors_subplot3)], subfigtitles=list(tensors_subplot3.keys()), # subplotranges=[(0, 10)] * n_timesteps, colorbar=True, automatic_positioning=True, tight_layout=True, filename=os.path.join(workspace.get_result_dir(), "output_ep{}_mb{}.png".format(ep, mb_i))) del plotting_values[:len(tensors_subplot3)] # Create and save subplot 2 (hidden states, i.e. ConvLSTM outputs) save_subplots(images=plotting_values[:len(tensors_subplot4)], subfigtitles=list(tensors_subplot4.keys()), title='ConvLSTM hidden states (outputs)', colorbar=True, automatic_positioning=True, tight_layout=True, filename=os.path.join(workspace.get_result_dir(), "hidden_ep{}_mb{}.png".format(ep, mb_i))) del plotting_values[:len(tensors_subplot4)] # Create and save subplot 3 (cell states) save_subplots(images=plotting_values[:len(tensors_subplot5)], subfigtitles=list(tensors_subplot5.keys()), title='ConvLSTM cell states', colorbar=True, automatic_positioning=True, tight_layout=True, filename=os.path.join(workspace.get_result_dir(), "cell_ep{}_mb{}.png".format(ep, mb_i))) del plotting_values[:len(tensors_subplot5)] else: # # Perform weight update without plotting # with Timer(verbose=True, name="Weight Update"): train_summ, regpen_summ, _, cur_loss = sess.run([ train_summary, regpen_summary, update, loss], feed_dict={model.X: mb['X'], model.y_: mb['y']}) # Add current summary values to tensorboard summary_writer_train.add_summary(train_summ, global_step=global_step) summary_writer_train.add_summary(regpen_summ, global_step=global_step) # Add current loss to running average loss train_loss += cur_loss # Print some status info print("ep {} mb {} loss {} (avg. loss {})".format(ep, mb_i, cur_loss, train_loss / (mb_i + 1))) # Reset timer t_mb = Timer(name="Load Minibatch") # Free the memory allocated for the minibatch data mb.clear() del mb global_step += 1 # # Calculate scores on validation set # # Perform scoring on validation set print("Starting scoring on validation set...") evaluate_on_validation_set(validationset, global_step, sess, model, summary_writer_validation, val_loss_summary, val_loss, workspace) # Save the model tell.save_checkpoint(global_step=global_step) # Abort if indicated by file check_kill_file(workspace) except AbortRun: print("Detected kill file, aborting...") finally: # # If the program executed correctly or an error was raised, close the data readers and save the model and exit # trainingset.close() validationset.close() tell.close(save_checkpoint=True, global_step=global_step)
def main(_): config = Config() # Create new TeLL session with two summary writers tell = TeLLSession(config=config, summaries=["train", "validation"]) # Get some members from the session for easier usage session = tell.tf_session summary_writer_train, summary_writer_validation = tell.tf_summaries["train"], tell.tf_summaries["validation"] model = tell.model workspace, config = tell.workspace, tell.config # Parameters learning_rate = config.get_value("learning_rate", 1e-3) iterations = config.get_value("iterations", 1000) batchsize = config.get_value("batchsize", 250) display_step = config.get_value("display_step", 10) dropout = config.get_value("dropout_prob", 0.25) # # Load Data # with Timer(name="Load data"): mnist = input_data.read_data_sets("../MNIST_data", one_hot=True) # Define loss and optimizer with tf.name_scope("Cost"): cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model.output, labels=model.y_)) ##entropy = tf.reduce_mean(tf.contrib.bayesflow.entropy.entropy_shannon( ## tf.contrib.distributions.Categorical(p=tf.nn.softmax(logits=model.output)))) probs = tf.nn.softmax(logits=model.output) entropy = tf.reduce_mean(-tf.reduce_sum(tf.log(tf.maximum(probs, 1e-15)) * probs, 1)) # test decor regularization #decor_penalty(model.hidden1, model.y_, 10, [1], 0.) #decor_penalty(model.hidden2, model.y_, 10, [1], 0.) optimizer = tell.tf_optimizer.minimize(cost - config.get_value("entropy_w", 0.) * entropy) tf.summary.scalar("Loss", cost) #tf.summary.scalar("Decor", decor1 + decor2) #tf.summary.scalar("Entropy", entropy) tf.summary.scalar("O-Prob", tf.reduce_mean(tf.reduce_sum(tf.nn.softmax(logits=model.output) * model.y_, 1))) # Evaluate model with tf.name_scope("Accuracy"): correct_pred = tf.equal(tf.argmax(model.output, 1), tf.argmax(model.y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar("Accuracy", accuracy) merged_summaries = tf.summary.merge_all() # Initialize tensorflow variables (either initializes them from scratch or restores from checkpoint) step = tell.initialize_tf_variables(reset_optimizer_on_restore=True).global_step # ------------------------------------------------------------------------- # Start training # ------------------------------------------------------------------------- acc_train = 0. val_acc_best = 0. try: while step < iterations: check_kill_file(workspace=workspace) batch_x, batch_y = mnist.train.next_batch(batchsize) i = step * batchsize if step % display_step == 0: summary, acc = session.run([merged_summaries, accuracy], feed_dict={model.X: mnist.validation.images[:2048], model.y_: mnist.validation.labels[:2048], model.dropout: 0}) summary_writer_validation.add_summary(summary, i) print('step {}: train acc {}, valid acc {}'.format(i, acc_train, acc)) if acc > val_acc_best: val_acc_best = acc else: summary, acc_train, _ = session.run([merged_summaries, accuracy, optimizer], feed_dict={model.X: batch_x, model.y_: batch_y, model.dropout: dropout}) summary_writer_train.add_summary(summary, i) step += 1 print("Training Finished! best valid acc {}".format(val_acc_best)) # Final Eval print("Test Accuracy:", session.run(accuracy, feed_dict={model.X: mnist.test.images[:2048], model.y_: mnist.test.labels[:2048], model.dropout: 0})) except AbortRun: print("Aborting...") finally: tell.close(global_step=step)
working_dir=working_dir, config=config, plotting=dict( save_subplots=save_subplots, save_movie=save_movie, save_subplots_line_plots=save_subplots_line_plots), rnd_gen=rnd_gen) if __name__ == '__main__': config = Config() working_dir = os.path.join(config.working_dir, config.specs) working_dir = os.path.join(working_dir, dt.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")) make_sure_path_exists(working_dir) with open(os.path.join(working_dir, 'log.txt'), 'a') as logfile: sys.stdout = Tee(sys.stdout, logfile, sys.stdout) bl_config = config.get_value('bl_config') logger.configure(os.path.join(working_dir, 'baselines'), ['tensorboard', 'log', 'stdout']) train(env_id=bl_config['env'], num_timesteps=bl_config['num_timesteps'], policy=config.get_value('policy'), working_dir=working_dir, config=config) sys.stdout.flush()
def main(_): # ------------------------------------------------------------------------------------------------------------------ # Setup training # ------------------------------------------------------------------------------------------------------------------ # Initialize config, parses command line and reads specified file; also supports overriding of values from cmd config = Config() random_seed = config.get_value('random_seed', 12345) np.random.seed(random_seed) # not threadsafe, use rnd_gen object where possible rnd_gen = np.random.RandomState(seed=random_seed) # Load datasets for trainingset with Timer(name="Loading Data"): readers = initialize_datareaders(config, required=("train", "val")) trainingset = DataLoader(readers["train"], batchsize=config.batchsize) #validationset = DataLoader(readers["val"], batchsize=config.batchsize) # Initialize TeLL session tell = TeLLSession(config=config, model_params={"input_shape": [300]}) # Get some members from the session for easier usage session = tell.tf_session model = tell.model workspace, config = tell.workspace, tell.config # Initialize Tensorflow variables global_step = tell.initialize_tf_variables().global_step sys.stdout.flush() # ------------------------------------------------------------------------------------------------------------------ # Start training # ------------------------------------------------------------------------------------------------------------------ try: epoch = int(global_step / trainingset.n_mbs) epochs = range(epoch, config.n_epochs) # # Loop through epochs # print("Starting training") for ep in epochs: print("Starting training epoch: {}".format(ep)) # Initialize variables for over-all loss per epoch train_loss = 0 # Load one minibatch at a time and perform a training step t_mb = Timer(name="Load Minibatch") mb_training = trainingset.batch_loader(rnd_gen=rnd_gen) # # Loop through minibatches # for mb_i, mb in enumerate(mb_training): sys.stdout.flush() #Print minibatch load time t_mb.print() # Abort if indicated by file check_kill_file(workspace) # # Calculate scores on validation set # if global_step % config.score_at == 0: print("Starting scoring on validation set...") # Get new sample training_sample = np.ones(shape=(1,np.random.randint(low=20,high=100),300)) # # Perform weight update # with Timer(name="Weight Update"): # # Set placeholder values # placeholder_values = OrderedDict( input_placeholder=training_sample, sequence_length_placeholder = training_sample.shape[1] ) feed_dict = dict(((model.placeholders[k], placeholder_values[k]) for k in placeholder_values.keys())) # # Decide which tensors to compute # data_keys = ['lstm_internals_enc', 'lstm_internals_dec', 'lstm_h_enc', 'lstm_h_dec', 'loss' , 'loss_last_time_prediction','loss_last_time_prediction', 'reg_loss'] data_tensors = [model.data_tensors[k] for k in data_keys] operation_keys = ['ae_update'] operation_tensors = [model.operation_tensors[k] for k in operation_keys] summary_keys = ['all_summaries'] summary_tensors = [model.summaries[k] for k in summary_keys] # # Run graph and re-associate return values with keys in dictionary # ret = session.run(data_tensors + summary_tensors + operation_tensors, feed_dict) data_keys = ['loss'] data_tensors = [model.data_tensors[k] for k in data_keys] session.run(model.data_tensors['loss'] , feed_dict) session.run(model.data_tensors['latent_space'], feed_dict) ret_dict = OrderedDict(((k, ret[i]) for i, k in enumerate(data_keys))) del ret[:len(data_keys)] ret_dict.update(OrderedDict(((k, ret[i]) for i, k in enumerate(summary_keys)))) # Print some status info #print("ep {} mb {} loss {} (avg. loss {})".format(ep, mb_i, cur_loss, train_loss / (mb_i + 1))) # Reset timer #t_mb = Timer(name="Load Minibatch") # Free the memory allocated for the minibatch data #mb.clear() #del mb global_step += 1 # # Calculate scores on validation set after training is done # # Perform scoring on validation set print("Starting scoring on validation set...") tell.save_checkpoint(global_step=global_step) # Abort if indicated by file check_kill_file(workspace) except AbortRun: print("Detected kill file, aborting...") finally: tell.close(save_checkpoint=True, global_step=global_step)
def __init__(self, config: Config = None, summaries: list = ["training"], model_params=None): """ Take care of initializing a TeLL environment. Creates working directory, instantiates network architecture, configures tensorflow and tensorboard. Furthermore takes care of resuming runs from an existing workspace. :param config: Config config object or None; if None config will be initialized from command line parameter :param summaries: list List of names for summary writers, by default one writer named "training" is opened :param model_params: Optional dictionary of parameters unpacked and passed to model upon initialization if not None :returns: tf_session: Tensorflow session tf_saver: Tensorflow checkpoint saver tf_summaries: dictionary containing tensorflow summary writers, accessible via the names passed upon creation model: TeLL model step: current global step (0 for new runs otherwise step stored in checkpoint file) workspace: TeLL workspace instance config: TeLL config object """ if config is None: config = Config() # Setup working dir workspace = Workspace(config.working_dir, config.specs, config.restore) print("TeLL workspace: {}".format(workspace.working_dir)) # Import configured architecture architecture = config.import_architecture() # Set GPU os.environ["CUDA_VISIBLE_DEVICES"] = str( config.get_value("cuda_gpu", "0")) # Some Tensorflow configuration tf_config = tf.ConfigProto( inter_op_parallelism_threads=config.get_value( "inter_op_parallelism_threads", 1), intra_op_parallelism_threads=config.get_value( "intra_op_parallelism_threads", 1), log_device_placement=config.get_value("log_device_placement", False)) tf_config.gpu_options.allow_growth = config.get_value( "tf_allow_growth", True) # Start Tensorflow session print("Starting session...") tf_session = tf.Session(config=tf_config) # Set tensorflow random seed set_seed(config.get_value("random_seed", 12345)) # # Init Tensorboard # print("Initializing summaries...") summary_instances = {} for summary in summaries: summary_instances[summary] = tf.summary.FileWriter( os.path.join(workspace.get_tensorboard_dir(), summary), tf_session.graph) # Initialize Model if model_params is None: model = architecture(config=config) else: model = architecture(config=config, **model_params) # Print number of trainable parameters trainable_vars = np.sum( [np.prod(t.get_shape()) for t in tf.trainable_variables()]) print("Number of trainable variables: {}".format(trainable_vars)) with tf.name_scope("TeLL") as tell_namescope: # Store global step in checkpoint tf_global_step = tf.Variable(initial_value=tf.constant( 0, dtype=tf.int64), name="tell_global_step", dtype=tf.int64, trainable=False) # Define placeholder and operation to dynamically update tf_global_step with a python integer global_step_placeholder = tf.placeholder_with_default( tf_global_step, shape=tf_global_step.get_shape()) set_global_step = tf_global_step.assign(global_step_placeholder) # # Add ops to save and restore all the variables # tf_saver = tf.train.Saver(max_to_keep=config.get_value( "max_checkpoints", 10), sharded=False) # Expose members self.tf_session = tf_session self.tf_saver = tf_saver self.tf_summaries = summary_instances self.model = model self.workspace = workspace self.config = config self.global_step = 0 self.__global_step_placeholder = global_step_placeholder self.__global_step_update = set_global_step self.__tell_namescope = tell_namescope
def main(_): # ------------------------------------------------------------------------------------------------------------------ # Setup training # ------------------------------------------------------------------------------------------------------------------ # Initialize config, parses command line and reads specified file; also supports overriding of values from cmd config = Config() # Load datasets for trainingset with Timer(name="Loading Training Data"): # Make sure datareader is reproducible random_seed = config.get_value('random_seed', 12345) np.random.seed( random_seed) # not threadsafe, use rnd_gen object where possible rnd_gen = np.random.RandomState(seed=random_seed) print("Loading training data...") trainingset = ShortLongDataset(n_timesteps=250, n_samples=3000, batchsize=config.batchsize, rnd_gen=rnd_gen) # Load datasets for validationset validationset = ShortLongDataset(n_timesteps=250, n_samples=300, batchsize=config.batchsize, rnd_gen=rnd_gen) # Initialize TeLL session tell = TeLLSession(config=config, summaries=["train"], model_params={"dataset": trainingset}) # Get some members from the session for easier usage session = tell.tf_session summary_writer = tell.tf_summaries["train"] model = tell.model workspace, config = tell.workspace, tell.config # Loss function for trainingset print("Initializing loss calculation...") loss = tf.reduce_mean( tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits( model.y_, model.output, -tf.reduce_sum(model.y_ - 1) / tf.reduce_sum(model.y_)), axis=[1])) train_summary = tf.summary.scalar("Training Loss", loss) # add loss to tensorboard # Loss function for validationset val_loss = tf.reduce_mean( tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits( model.y_, model.output, -tf.reduce_sum(model.y_ - 1) / tf.reduce_sum(model.y_)), axis=[1])) val_loss_summary = tf.summary.scalar( "Validation Loss", val_loss) # add val_loss to tensorboard # Regularization reg_penalty = regularize(layers=model.get_layers(), l1=config.l1, l2=config.l2, regularize_weights=True, regularize_biases=True) regpen_summary = tf.summary.scalar( "Regularization Penalty", reg_penalty) # add reg_penalty to tensorboard # Update step for weights update = update_step(loss + reg_penalty, config) # Initialize Tensorflow variables global_step = tell.initialize_tf_variables().global_step sys.stdout.flush() # ------------------------------------------------------------------------------------------------------------------ # Start training # ------------------------------------------------------------------------------------------------------------------ try: epoch = int(global_step / trainingset.n_mbs) epochs = range(epoch, config.n_epochs) # # Loop through epochs # print("Starting training") for ep in epochs: print("Starting training epoch: {}".format(ep)) # Initialize variables for over-all loss per epoch train_loss = 0 # Load one minibatch at a time and perform a training step t_mb = Timer(name="Load Minibatch") mb_training = trainingset.batch_loader(rnd_gen=rnd_gen) # # Loop through minibatches # for mb_i, mb in enumerate(mb_training): sys.stdout.flush() # Print minibatch load time t_mb.print() # Abort if indicated by file check_kill_file(workspace) # # Calculate scores on validation set # if global_step % config.score_at == 0: print("Starting scoring on validation set...") evaluate_on_validation_set(validationset, global_step, session, model, summary_writer, val_loss_summary, val_loss, workspace) # # Perform weight update # with Timer(name="Weight Update"): train_summ, regpen_summ, _, cur_loss = session.run( [train_summary, regpen_summary, update, loss], feed_dict={ model.X: mb['X'], model.y_: mb['y'] }) # Add current summary values to tensorboard summary_writer.add_summary(train_summ, global_step=global_step) summary_writer.add_summary(regpen_summ, global_step=global_step) # Add current loss to running average loss train_loss += cur_loss # Print some status info print("ep {} mb {} loss {} (avg. loss {})".format( ep, mb_i, cur_loss, train_loss / (mb_i + 1))) # Reset timer t_mb = Timer(name="Load Minibatch") # Free the memory allocated for the minibatch data mb.clear() del mb global_step += 1 # # Calculate scores on validation set after training is done # # Perform scoring on validation set print("Starting scoring on validation set...") evaluate_on_validation_set(validationset, global_step, session, model, summary_writer, val_loss_summary, val_loss, workspace) tell.save_checkpoint(global_step=global_step) # Abort if indicated by file check_kill_file(workspace) except AbortRun: print("Detected kill file, aborting...") finally: tell.close(save_checkpoint=True, global_step=global_step)
def __init__(self, config: Config, dataset): """ """ depth = config.get_value("enc_dec_depth", 2) basenr_convs = config.get_value("enc_dec_conv_maps_base", 32) include_org_label = config.get_value("include_org_label", False) init_name = config.get_value("conv_W_initializer", "weight_xavier_conv2d") conv_W_initializer = getattr(TeLL.initializations, init_name) shared = False # # Layer list # layers = list() # # Create placeholders for feeding an input frame and a label at the first timestep # n_seq_pos = dataset.X_shape[ 1] # dataset.X_shape is [sample, seq_pos, x, y, features) X = tf.placeholder(tf.float32, shape=dataset.X_shape) y_ = tf.placeholder(tf.int32, shape=dataset.y_shape) if include_org_label: y_org = tf.placeholder(tf.int32, shape=dataset.y_org_shape) # # Input Layer # input_shape = dataset.X_shape[:1] + (1, ) + dataset.X_shape[2:] layers.append(RNNInputLayer(tf.zeros(input_shape, dtype=tf.float32))) # # Scaler Structure # conv_weights_shape = [ config.kernel_conv, config.kernel_conv, layers[-1].get_output_shape()[-1], basenr_convs ] if shared: shared_input_conv_weights = conv_W_initializer(conv_weights_shape) layers.append( ConvLayer(incoming=layers[0], W=shared_input_conv_weights, a=tf.nn.elu, dilation_rate=[11, 11])) layers.append( ConvLayer(incoming=layers[0], W=shared_input_conv_weights, a=tf.nn.elu, dilation_rate=[9, 9])) layers.append( ConvLayer(incoming=layers[0], W=shared_input_conv_weights, a=tf.nn.elu, dilation_rate=[7, 7])) layers.append( ConvLayer(incoming=layers[0], W=shared_input_conv_weights, a=tf.nn.elu, dilation_rate=[5, 5])) layers.append( ConvLayer(incoming=layers[0], W=shared_input_conv_weights, a=tf.nn.elu, dilation_rate=[3, 3])) layers.append( ConvLayer(incoming=layers[0], W=shared_input_conv_weights, a=tf.nn.elu, dilation_rate=[1, 1])) else: layers.append( ConvLayer(incoming=layers[0], W=conv_W_initializer(conv_weights_shape), a=tf.nn.elu, dilation_rate=[11, 11])) layers.append( ConvLayer(incoming=layers[0], W=conv_W_initializer(conv_weights_shape), a=tf.nn.elu, dilation_rate=[9, 9])) layers.append( ConvLayer(incoming=layers[0], W=conv_W_initializer(conv_weights_shape), a=tf.nn.elu, dilation_rate=[7, 7])) layers.append( ConvLayer(incoming=layers[0], W=conv_W_initializer(conv_weights_shape), a=tf.nn.elu, dilation_rate=[5, 5])) layers.append( ConvLayer(incoming=layers[0], W=conv_W_initializer(conv_weights_shape), a=tf.nn.elu, dilation_rate=[3, 3])) layers.append( ConvLayer(incoming=layers[0], W=conv_W_initializer(conv_weights_shape), a=tf.nn.elu, dilation_rate=[1, 1])) # concat feature maps of all scale levels and reduce the number of features with a 1x1 conv layers.append(ConcatLayer(incomings=layers[1:])) conv_weights_shape = [ 1, 1, layers[-1].get_output_shape()[-1], basenr_convs ] layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer(conv_weights_shape))) # add 3 more conv layers to have some depth conv_weights_shape = [ config.kernel_conv, config.kernel_conv, layers[-1].get_output_shape()[-1], basenr_convs ] layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer(conv_weights_shape))) layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer(conv_weights_shape))) layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer(conv_weights_shape))) # # Output Layer # layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer([ config.kernel_conv_out, config.kernel_conv_out, layers[-1].get_output_shape()[-1], 11 ]), padding='SAME', name='ConvLayerSemanticSegmentation', a=tf.identity)) sem_seg_layer = layers[-1] self.X = X self.y_ = y_ self.output = sem_seg_layer.get_output()
def __init__(self, config: Config, dataset): """Architecture for semantic segmentation as described in presentation using standard for loop.""" depth = config.get_value("enc_dec_depth", 2) basenr_convs = config.get_value("enc_dec_conv_maps_base", 16) include_org_label = config.get_value("include_org_label", False) init_name = config.get_value("conv_W_initializer", "weight_xavier_conv2d") conv_W_initializer = getattr(TeLL.initializations, init_name) # # Layer list # layers = list() # # Create placeholders for feeding an input frame and a label at the first timestep # n_seq_pos = dataset.X_shape[ 1] # dataset.X_shape is [sample, seq_pos, x, y, features) X = tf.placeholder(tf.float32, shape=dataset.X_shape) y_ = tf.placeholder(tf.int32, shape=dataset.y_shape) if include_org_label: y_org = tf.placeholder(tf.int32, shape=dataset.y_org_shape) # ---------------------------------------------------------------------------------------------------------- # Define network architecture # ---------------------------------------------------------------------------------------------------------- # initializer for weight values of kernels # conv_W_initializer = weight_xavier_conv2d # # Initialize input to network of shape [sample, 1, x, y, features] with zero tensor of size of a frame # input_shape = dataset.X_shape[:1] + (1, ) + dataset.X_shape[2:] layers.append(RNNInputLayer(tf.zeros(input_shape, dtype=tf.float32))) rnn_input_layer = layers[-1] # # Encoder and maxpooling layers # encoders = list() for d in range(1, depth + 1): print("\tConvLayerEncoder{}...".format(d)) layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer([ config.kernel_conv, config.kernel_conv, layers[-1].get_output_shape()[-1], basenr_convs * (2**d) ]), padding='SAME', name='ConvLayerEncoder{}'.format(d), a=tf.nn.elu)) encoders.append(layers[-1]) print("\tMaxpoolingLayer{}...".format(d)) layers.append( MaxPoolingLayer(incoming=layers[-1], ksize=(1, 3, 3, 1), strides=(1, 2, 2, 1), padding='SAME', name='MaxpoolingLayer{}'.format(d))) # # ConvLSTM Layer # if config.n_lstm: n_lstm = config.n_lstm lstm_x_fwd = config.kernel_lstm_fwd lstm_y_fwd = config.kernel_lstm_fwd lstm_x_bwd = config.kernel_lstm_bwd lstm_y_bwd = config.kernel_lstm_bwd lstm_input_channels_fwd = layers[-1].get_output_shape()[-1] if config.reduced_rec_lstm: lstm_input_channels_bwd = config.reduced_rec_lstm else: lstm_input_channels_bwd = n_lstm lstm_init = dict(W_ci=[ conv_W_initializer( [lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm]), conv_W_initializer( [lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm]) ], W_ig=[ conv_W_initializer([ lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm ]), conv_W_initializer([ lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm ]) ], W_og=[ conv_W_initializer([ lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm ]), conv_W_initializer([ lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm ]) ], W_fg=[ conv_W_initializer([ lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm ]), conv_W_initializer([ lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm ]) ], b_ci=constant([n_lstm]), b_ig=constant([n_lstm]), b_og=constant([n_lstm]), b_fg=constant([n_lstm], 1)) print("\tConvLSTM...") layers.append( ConvLSTMLayer(incoming=layers[-1], n_units=n_lstm, **lstm_init, a_out=get_rec_attr(tf, config.lstm_act), forgetgate=config.forgetgate, comb=config.lstm_comb, store_states=config.store_states, tickerstep_biases=tf.zeros, output_dropout=config.lstm_output_dropout, precomp_fwds=False)) lstm_layer = layers[-1] # # Optional maxpooling and upscaling of rec LSTM connections combined with/or optional feature squashing # ext_lstm_recurrence = None if config.lstm_rec_maxpooling: print("\tMaxpoolingDeconv...") layers.append( MaxPoolingLayer(incoming=layers[-1], ksize=(1, 3, 3, 1), strides=(1, 2, 2, 1), padding='SAME', name='MaxPoolingLayer')) layers.append( DeConvLayer(incoming=layers[-1], a=tf.nn.elu, W=conv_W_initializer([ 3, 3, layers[-1].get_output_shape()[-1], layers[-1].get_output_shape()[-1] ]), strides=(1, 2, 2, 1), padding='SAME', name='DeConvLayer')) print("\tConvLSTMRecurrence...") ext_lstm_recurrence = layers[-1] if config.reduced_rec_lstm: print("\tFeatureSquashing...") layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer([ config.kernel_conv_out, config.kernel_conv_out, layers[-1].get_output_shape()[-1], config.reduced_rec_lstm ]), padding='SAME', name='ConvLayerFeatureSquashing', a=tf.nn.elu)) print("\tConvLSTMRecurrence...") ext_lstm_recurrence = layers[-1] if ext_lstm_recurrence is not None: lstm_layer.add_external_recurrence(ext_lstm_recurrence) else: print("\tSubstituteConvLayer...") n_lstm = basenr_convs * (2**depth) * 4 layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer([ config.kernel_conv, config.kernel_conv, layers[-1].get_output_shape()[-1], int(basenr_convs * (2**depth) * 4.5) ]), padding='SAME', name='SubstituteConvLayer', a=tf.nn.elu)) lstm_layer = layers[-1] # # Decoder and upscaling layers # for d in list(range(1, depth + 1))[::-1]: print("\tUpscalingLayer{}...".format(d)) layers[-1] = ScalingLayer( incoming=layers[-1], size=encoders[d - 1].get_output_shape()[-3:-1], name='UpscalingLayergLayer{}'.format(d)) print("\tConcatLayer{}...".format(d)) layers.append( ConcatLayer([encoders[d - 1], layers[-1]], name='ConcatLayer{}'.format(d))) print("\tConvLayerDecoder{}...".format(d)) layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer([ config.kernel_conv, config.kernel_conv, layers[-1].get_output_shape()[-1], basenr_convs * (2**d) ]), padding='SAME', name='ConvLayerDecoder{}'.format(d), a=tf.nn.elu)) # # ConvLayer for semantic segmentation # print("\tConvLayerSemanticSegmentation...") layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer([ config.kernel_conv_out, config.kernel_conv_out, layers[-1].get_output_shape()[-1], 11 ]), padding='SAME', name='ConvLayerSemanticSegmentation', a=tf.identity)) sem_seg_layer = layers[-1] # ---------------------------------------------------------------------------------------------------------- # Loop through sequence positions and create graph # ---------------------------------------------------------------------------------------------------------- # # Loop through sequence positions # print("\tRNN Loop...") sem_seg_out = list() for seq_pos in range(n_seq_pos): with tf.name_scope("Sequence_pos_{}".format(seq_pos)): print("\t seq. pos. {}...".format(seq_pos)) # Set input layer to X at frame (t) and outputs of upper layers at (t-1) layers[0].update(X[:, seq_pos:seq_pos + 1, :]) # Calculate new network output at (t), including new hidden states _ = lstm_layer.get_output() sem_seg_out.append( sem_seg_layer.get_output(prev_layers=encoders + [lstm_layer])) # # Loop through tickersteps # # # Use empty frame as X during ticker steps (did not work so good) # tickerstep_input = tf.zeros(dataset.X_shape[:1] + (1,) + dataset.X_shape[2:], dtype=tf.float32, # name="tickerframe") # Use last frame as X during ticker steps #tickerstep_input = X[:, -1:, :] #layers[0].update(tickerstep_input) #for tickerstep in range(config.tickersteps): # with tf.name_scope("Tickerstep_{}".format(tickerstep)): # print("\t tickerstep {}...".format(tickerstep)) # # # Calculate new network output at (t), including new hidden states # _ = lstm_layer.get_output(tickerstep_nodes=True) #sem_seg_out = sem_seg_layer.get_output(prev_layers=encoders + [lstm_layer]) print("\tDone!") # # Publish # self.X = X self.y_feed = y_ self.y_ = y_[:, 10:] self.output = tf.concat(sem_seg_out[10:], 1) self.__layers = layers self.__n_lstm = n_lstm self.__lstm_layer = lstm_layer self.lstm_layer = lstm_layer self.__plot_dict, self.__plot_range_dict, self.__plotsink = self.__setup_plotting( config) if include_org_label: self.y_org = y_org
def main(_): config = Config() # Create new TeLL session with two summary writers tell = TeLLSession(config=config, summaries=["train", "validation"]) # Get some members from the session for easier usage session = tell.tf_session summary_writer_train, summary_writer_validation = tell.tf_summaries[ "train"], tell.tf_summaries["validation"] model = tell.model workspace, config = tell.workspace, tell.config # Parameters learning_rate = config.get_value("learning_rate", 1e-3) iterations = config.get_value("iterations", 1000) batchsize = config.get_value("batchsize", 250) display_step = config.get_value("display_step", 10) dropout = config.get_value("dropout_prob", 0.25) # # Prepare input data # # Set datareaders training_reader = MNISTReader(dset='train') validation_reader = MNISTReader(dset='validation') test_reader = MNISTReader(dset='test') # Set Preprocessing training_data_preprocessed = DataProcessing(training_reader, apply_to='X') training_data_preprocessed = Normalize(training_data_preprocessed, apply_to='X') training_data_preprocessed = Normalize(training_data_preprocessed, apply_to=['X', 'Y']) # Set minibatch loaders training_loader = DataLoader(training_data_preprocessed, batchsize=50, batchsize_method='zeropad') validation_loader = DataLoader(validation_reader, batchsize=50, batchsize_method='zeropad') test_loader = DataLoader(test_reader, batchsize=50, batchsize_method='zeropad') # # Define loss and optimizer # with tf.name_scope("Cost"): cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=model.output, labels=model.y_)) decor1 = decor_penalty(model.hidden1, model.y_, 10, [1], 0.) decor2 = decor_penalty(model.hidden2, model.y_, 10, [1], 6e-5) optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost + decor1 + decor2) tf.summary.scalar("Loss", cost) tf.summary.scalar("Decor", decor1 + decor2) # Evaluate model with tf.name_scope("Accuracy"): correct_pred = tf.equal(tf.argmax(model.output, 1), tf.argmax(model.y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar("Accuracy", accuracy) merged_summaries = tf.summary.merge_all() # Initialize tensorflow variables (either initializes them from scratch or restores from checkpoint) step = tell.initialize_tf_variables().global_step # ------------------------------------------------------------------------- # Start training # ------------------------------------------------------------------------- acc_train = 0. try: while step < iterations: # Loop through training set for mb_i, mb in enumerate( training_loader.batch_loader(num_cached=5, num_threads=3)): check_kill_file(workspace=workspace) # Perform weight update summary, acc_train, _ = session.run( [merged_summaries, accuracy, optimizer], feed_dict={ model.X: mb['X'], model.y_: mb['y'], model.dropout: dropout }) summary_writer_train.add_summary(summary, mb_i + step * batchsize) if step % display_step == 0: # Loop through validation set cos_sum, acc_sum, cor_sum = (0, 0, 0) for vmb_i, vmb in enumerate( validation_loader.batch_loader(num_cached=5, num_threads=3)): cos, acc, cor = session.run( [cost, accuracy, correct_pred], feed_dict={ model.X: vmb['X'], model.y_: vmb['y'], model.dropout: 0 }) cos_sum += cos acc_sum += acc cor_sum += cor print('step {}: train acc {}, valid acc {}'.format( mb_i + step * batchsize, cos_sum / vmb_i, acc_sum / vmb_i, cor_sum / vmb_i)) step += 1 if step >= iterations: break print("Training Finished!") # Final Eval for tmb_i, tmb in enumerate( test_loader.batch_loader(num_cached=len( test_reader.get_sample_keys()), num_threads=1)): print( "Test Accuracy:", session.run(accuracy, feed_dict={ model.X: tmb['X'], model.y_: tmb['y'], model.dropout: 0 })) except AbortRun: print("Aborting...") finally: tell.close(global_step=step)
def main(_): # ------------------------------------------------------------------------------------------------------------------ # Setup training # ------------------------------------------------------------------------------------------------------------------ # Initialize config, parses command line and reads specified file; also supports overriding of values from cmd config = Config() # # Load datasets for training and validation # with Timer(name="Loading Data", verbose=True): # Make sure datareader is reproducible random_seed = config.get_value('random_seed', 12345) np.random.seed( random_seed) # not threadsafe, use rnd_gen object where possible rnd_gen = np.random.RandomState(seed=random_seed) print("Loading training data...") trainingset = MovingDotDataset(n_timesteps=5, n_samples=50, batchsize=config.batchsize, rnd_gen=rnd_gen) print("Loading validation data...") validationset = MovingDotDataset(n_timesteps=5, n_samples=25, batchsize=config.batchsize, rnd_gen=rnd_gen) # # Initialize TeLL session # tell = TeLLSession(config=config, summaries=["train", "validation"], model_params={"dataset": trainingset}) # Get some members from the session for easier usage sess = tell.tf_session summary_writer_train, summary_writer_validation = tell.tf_summaries[ "train"], tell.tf_summaries["validation"] model = tell.model workspace, config = tell.workspace, tell.config # # Define loss functions and update steps # print("Initializing loss calculation...") pos_target_weight = np.prod( trainingset.y_shape[2:] ) - 1 # only 1 pixel per sample is of positive class -> up-weight! loss = tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits(targets=model.y_, logits=model.output, pos_weight=pos_target_weight)) # loss = tf.reduce_mean(-tf.reduce_sum((model.y_ * tf.log(model.output)) * # -tf.reduce_sum(model.y_ - 1) / tf.reduce_sum(model.y_), # axis=[1, 2, 3, 4])) train_summary = tf.summary.scalar( "Training Loss", loss) # create summary to add to tensorboard # Loss function for validationset val_loss = tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits(targets=model.y_, logits=model.output, pos_weight=pos_target_weight)) # val_loss = tf.reduce_mean(-tf.reduce_sum(model.y_ * tf.log(model.output) * # -tf.reduce_sum(model.y_ - 1) / tf.reduce_sum(model.y_), # axis=[1, 2, 3, 4])) val_loss_summary = tf.summary.scalar( "Validation Loss", val_loss) # create summary to add to tensorboard # Regularization reg_penalty = regularize(layers=model.get_layers(), l1=config.l1, l2=config.l2, regularize_weights=True, regularize_biases=True) regpen_summary = tf.summary.scalar( "Regularization Penalty", reg_penalty) # create summary to add to tensorboard # Update step for weights update = update_step(loss + reg_penalty, config) # # Prepare plotting # plot_elements_sym = list(model.get_plot_dict().values()) plot_elements = list() plot_ranges = model.get_plot_range_dict() # # Initialize tensorflow variables (either initializes them from scratch or restores from checkpoint) # global_step = tell.initialize_tf_variables().global_step # # Finalize graph # This makes our tensorflow graph read-only and prevents further additions to the graph # sess.graph.finalize() if sess.graph.finalized: print("Graph is finalized!") else: raise ValueError("Could not finalize graph!") sys.stdout.flush() # ------------------------------------------------------------------------------------------------------------------ # Start training # ------------------------------------------------------------------------------------------------------------------ try: epoch = int(global_step / trainingset.n_mbs) epochs = range(epoch, config.n_epochs) # Loop through epochs print("Starting training") for ep in epochs: epoch = ep print("Starting training epoch: {}".format(ep)) # Initialize variables for over-all loss per epoch train_loss = 0 # Load one minibatch at a time and perform a training step t_mb = Timer(verbose=True, name="Load Minibatch") mb_training = trainingset.batch_loader(rnd_gen=rnd_gen) # # Loop through minibatches # for mb_i, mb in enumerate(mb_training): sys.stdout.flush() # Print minibatch load time t_mb.print() # Abort if indicated by file check_kill_file(workspace) # # Calculate scores on validation set # if global_step % config.score_at == 0: print("Starting scoring on validation set...") evaluate_on_validation_set(validationset, global_step, sess, model, summary_writer_validation, val_loss_summary, val_loss, workspace) # # Perform weight updates and do plotting # if (mb_i % config.plot_at) == 0 and os.path.isfile( workspace.get_plot_file()): # Perform weight update, return summary_str and values for plotting with Timer(verbose=True, name="Weight Update"): train_summ, regpen_summ, _, cur_loss, cur_output, *plot_elements = sess.run( [ train_summary, regpen_summary, update, loss, model.output, *plot_elements_sym ], feed_dict={ model.X: mb['X'], model.y_: mb['y'] }) # Add current summary values to tensorboard summary_writer_train.add_summary(train_summ, global_step=global_step) summary_writer_train.add_summary(regpen_summ, global_step=global_step) # Re-associate returned tensorflow values to plotting keys plot_dict = OrderedDict( zip(list(model.get_plot_dict().keys()), plot_elements)) # # Plot subplots in plot_dict # Loop through each element in plotlist and pass it to the save_subplots function for plotting # (adapt this to your needs for plotting) # with Timer(verbose=True, name="Plotting", precision="msec"): for plotlist_i, plotlist in enumerate( model.get_plotsink()): for frame in range(len(plot_dict[plotlist[0]])): subplotlist = [] subfigtitles = [] subplotranges = [] n_cols = int(np.ceil(np.sqrt(len(plotlist)))) for col_i, col_i in enumerate(range(n_cols)): subfigtitles.append( plotlist[n_cols * col_i:n_cols * col_i + n_cols]) subplotlist.append([ plot_dict[p] [frame * (frame < len(plot_dict[p])), :] for p in plotlist[n_cols * col_i:n_cols * col_i + n_cols] ]) subplotranges.append([ plot_ranges.get(p, False) for p in plotlist[n_cols * col_i:n_cols * col_i + n_cols] ]) # remove rows/columns without images subplotlist = [ p for p in subplotlist if p != [] ] plot_args = dict( images=subplotlist, filename=os.path.join( workspace.get_result_dir(), "plot{}_ep{}_mb{}_fr{}.png".format( plotlist_i, ep, mb_i, frame)), subfigtitles=subfigtitles, subplotranges=subplotranges) plotter.set_plot_kwargs(plot_args) plotter.plot() # Plot outputs and cell states over frames if specified if config.store_states and 'ConvLSTMLayer_h' in plot_dict: convh = plot_dict['ConvLSTMLayer_h'] convrh = [c[0, :, :, 0] for c in convh] convrh = [ convrh[:6], convrh[6:12], convrh[12:18], convrh[18:24], convrh[24:] ] plot_args = dict(images=convrh, filename=os.path.join( workspace.get_result_dir(), "plot{}_ep{}_mb{}_h.png".format( plotlist_i, ep, mb_i))) plotter.set_plot_kwargs(plot_args) plotter.plot() if config.store_states and 'ConvLSTMLayer_c' in plot_dict: convc = plot_dict['ConvLSTMLayer_c'] convrc = [c[0, :, :, 0] for c in convc] convrc = [ convrc[:6], convrc[6:12], convrc[12:18], convrc[18:24], convrc[24:] ] plot_args = dict(images=convrc, filename=os.path.join( workspace.get_result_dir(), "plot{}_ep{}_mb{}_c.png".format( plotlist_i, ep, mb_i))) plotter.set_plot_kwargs(plot_args) plotter.plot() else: # # Perform weight update without plotting # with Timer(verbose=True, name="Weight Update"): train_summ, regpen_summ, _, cur_loss = sess.run( [train_summary, regpen_summary, update, loss], feed_dict={ model.X: mb['X'], model.y_: mb['y'] }) # Add current summary values to tensorboard summary_writer_train.add_summary(train_summ, global_step=global_step) summary_writer_train.add_summary(regpen_summ, global_step=global_step) # Add current loss to running average loss train_loss += cur_loss # Print some status info print("ep {} mb {} loss {} (avg. loss {})".format( ep, mb_i, cur_loss, train_loss / (mb_i + 1))) # Reset timer t_mb = Timer(name="Load Minibatch") # Free the memory allocated for the minibatch data mb.clear() del mb global_step += 1 # # Calculate scores on validation set # # Perform scoring on validation set print("Starting scoring on validation set...") evaluate_on_validation_set(validationset, global_step, sess, model, summary_writer_validation, val_loss_summary, val_loss, workspace) # Save the model tell.save_checkpoint(global_step=global_step) # Abort if indicated by file check_kill_file(workspace) except AbortRun: print("Detected kill file, aborting...") finally: # # If the program executed correctly or an error was raised, close the data readers and save the model and exit # trainingset.close() validationset.close() tell.close(save_checkpoint=True, global_step=global_step) plotter.close()
def main(_): config = Config() np.random.seed(config.get_value("random_seed", 12345)) # PARAMETERS n_epochs = config.get_value("epochs", 100) batchsize = config.get_value("batchsize", 8) n_classes = config.get_value("n_classes", 13) dropout = config.get_value("dropout", 0.25) # TODO num_threads = config.get_value("num_threads", 5) initial_val = config.get_value("initial_val", True) # READER, LOADER readers = invoke_dataset_from_config(config) reader_train = readers["train"] reader_val = readers["val"] train_loader = torch.utils.data.DataLoader(reader_train, batch_size=config.batchsize, shuffle=True, num_workers=num_threads) val_loader = torch.utils.data.DataLoader(reader_val, batch_size=1, shuffle=False, num_workers=num_threads) # CONFIG tell = TeLLSession(config=config, model_params={"shape": reader_train.shape}) # Get some members from the session for easier usage session = tell.tf_session model = tell.model workspace, config = tell.workspace, tell.config prediction = tf.sigmoid(model.output) prediction_val = tf.reduce_mean(tf.sigmoid(model.output), axis=0, keepdims=True) # LOSS if hasattr(model, "loss"): loss = model.loss() else: with tf.name_scope("Loss_per_Class"): loss = 0 for i in range(n_classes): loss_batch = tf.nn.sigmoid_cross_entropy_with_logits( logits=model.output[:, i], labels=model.y_[:, i]) loss_mean = tf.reduce_mean(loss_batch) loss += loss_mean # Validation loss after patching if hasattr(model, "loss"): loss_val = model.loss() else: with tf.name_scope("Loss_per_Class_Patching"): loss_val = 0 for i in range(n_classes): loss_batch = tf.nn.sigmoid_cross_entropy_with_logits( logits=tf.reduce_mean(model.output[:, i], axis=0, keepdims=True), labels=model.y_[:, i]) loss_mean = tf.reduce_mean(loss_batch) loss_val += loss_mean # REGULARIZATION reg_penalty = regularize(layers=model.layers, l1=config.l1, l2=config.l2, regularize_weights=True, regularize_biases=True) # LEARNING RATE (SCHEDULE) # if a LRS is defined always use MomentumOptimizer and pass learning rate to optimizer lrs_plateu = False if config.get_value("lrs", None) is not None: lr_sched_type = config.lrs["type"] if lr_sched_type == "plateau": lrs_plateu = True learning_rate = tf.placeholder(tf.float32, [], name='learning_rate') lrs_learning_rate = config.get_value( "optimizer_params")["learning_rate"] lrs_n_bad_epochs = 0 # counter for plateu LRS lrs_patience = config.lrs["patience"] lrs_factor = config.lrs["factor"] lrs_threshold = config.lrs["threshold"] lrs_mode = config.lrs["mode"] lrs_best = -np.inf if lrs_mode == "max" else np.inf lrs_is_better = lambda old, new: (new > old * ( 1 + lrs_threshold)) if lrs_mode == "max" else (new < old * ( 1 - lrs_threshold)) else: learning_rate = None # if no LRS is defined the default optimizer is used with its defined learning rate # LOAD WEIGHTS and get list of trainables if specified assign_loaded_variables = None trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if config.get_value("checkpoint", None) is not None: with Timer(name="Loading Checkpoint", verbose=True): assign_loaded_variables, trainables = tell.load_weights( config.get_value("checkpoint", None), config.get_value("freeze", False), config.get_value("exclude_weights", None), config.get_value("exclude_freeze", None)) # Update step if len(trainables) > 0: update, gradients, gradient_name_dict = update_step( loss + reg_penalty, config, tell, lr=learning_rate, trainables=trainables) # INITIALIZE Tensorflow VARIABLES step = tell.initialize_tf_variables().global_step # ASSING LOADED WEIGHTS (overriding initializations) if available if assign_loaded_variables is not None: session.run(assign_loaded_variables) # ------------------------------------------------------------------------- # Start training # ------------------------------------------------------------------------- try: n_mbs = len(train_loader) epoch = int((step * batchsize) / (n_mbs * batchsize)) epochs = range(epoch, n_epochs) if len(trainables) == 0: validate(val_loader, n_classes, session, loss_val, prediction_val, model, workspace, step, batchsize, tell) return print("Epoch: {}/{} (step: {}, nmbs: {}, batchsize: {})".format( epoch + 1, n_epochs, step, n_mbs, batchsize)) for ep in epochs: if ep == 0 and initial_val: f1 = validate(val_loader, n_classes, session, loss_val, prediction_val, model, workspace, step, batchsize, tell) else: if config.has_value("lrs_best") and config.has_value( "lrs_learning_rate") and config.has_value( "lrs_n_bad_epochs"): f1 = config.get_value("lrs_f1") lrs_best = config.get_value("lrs_best") lrs_learning_rate = config.get_value("lrs_learning_rate") lrs_n_bad_epochs = config.get_value("lrs_n_bad_epochs") else: f1 = 0 # LRS "Plateu" if lrs_plateu: # update scheduler if lrs_is_better(lrs_best, f1): lrs_best = f1 lrs_n_bad_epochs = 0 else: lrs_n_bad_epochs += 1 # update learning rate if lrs_n_bad_epochs > lrs_patience: lrs_learning_rate = max(lrs_learning_rate * lrs_factor, 0) lrs_n_bad_epochs = 0 with tqdm(total=len(train_loader), desc="Training [{}/{}]".format(ep + 1, len(epochs))) as pbar: for mbi, mb in enumerate(train_loader): # LRS "Plateu" if lrs_plateu: feed_dict = { model.X: mb['input'].numpy(), model.y_: mb['target'].numpy(), model.dropout: dropout, learning_rate: lrs_learning_rate } else: feed_dict = { model.X: mb['input'].numpy(), model.y_: mb['target'].numpy(), model.dropout: dropout } # TRAINING pred, loss_train, _ = session.run( [prediction, loss, update], feed_dict=feed_dict) # Update status pbar.set_description_str( "Training [{}/{}] Loss: {:.4f}".format( ep + 1, len(epochs), loss_train)) pbar.update() step += 1 validate(val_loader, n_classes, session, loss_val, prediction_val, model, workspace, step, batchsize, tell) except AbortRun: print("Aborting...") finally: tell.close(global_step=step, save_checkpoint=True)
def __init__(self, config: Config, dataset): """Example for convolutional network with convLSTM and convolutional output layer; Plots cell states, hidden states, X, y_, and a argmax over the convLSTM units outputs; Command-line usage: >>> python3 samples/main_convlstm_mnist.py --config=samples/config_convlstm_mnist.json """ import TeLL # # Some convenience objects # # We will use a list to store all layers for regularization etc. (this is optional) layers = [] depth = config.get_value("enc_dec_depth", 2) basenr_convs = config.get_value("enc_dec_conv_maps_base", 16) init_name = config.get_value("conv_W_initializer", "weight_xavier_conv2d") conv_W_initializer = getattr(TeLL.initializations, init_name) # # Create placeholders for feeding an input frame and a label at each sequence position # X_shape = dataset.mb_info['X'][ 0] # dataset.X_shape is [sample, seq_pos, x, y, features) y_shape = dataset.mb_info['y'][0] frame_input_shape = X_shape[:1] + (1, ) + X_shape[2:] n_classes = 11 frame_output_shape = y_shape[:1] + (1, ) + y_shape[2:] + (n_classes, ) n_seq_pos = X_shape[1] X = tf.placeholder(tf.float32, shape=X_shape) y_ = tf.placeholder( tf.int32, shape=y_shape) # dataset.y_shape is [sample, seq_pos, features) # ---------------------------------------------------------------------------------------------------------- # Define network architecture # ---------------------------------------------------------------------------------------------------------- # # Initialize input to network of shape [sample, 1, x, y, features] with zero tensor of size of a frame # rnn_input_layer = RNNInputLayer( tf.zeros(frame_input_shape, dtype=tf.float32)) layers.append(rnn_input_layer) # # Encoder and maxpooling layers # encoders = list() for d in range(1, depth + 1): print("\tConvLayerEncoder{}...".format(d)) layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer([ config.kernel_conv, config.kernel_conv, layers[-1].get_output_shape()[-1], basenr_convs * (2**d) ]), padding='SAME', name='ConvLayerEncoder{}'.format(d), a=tf.nn.elu)) encoders.append(layers[-1]) print("\tMaxpoolingLayer{}...".format(d)) layers.append( MaxPoolingLayer(incoming=layers[-1], ksize=(1, 3, 3, 1), strides=(1, 2, 2, 1), padding='SAME', name='MaxpoolingLayer{}'.format(d))) # # ConvLSTM Layer # if config.n_lstm: n_lstm = config.n_lstm lstm_x_fwd = config.kernel_lstm_fwd lstm_y_fwd = config.kernel_lstm_fwd lstm_x_bwd = config.kernel_lstm_bwd lstm_y_bwd = config.kernel_lstm_bwd lstm_input_channels_fwd = layers[-1].get_output_shape()[-1] if config.reduced_rec_lstm: lstm_input_channels_bwd = config.reduced_rec_lstm else: lstm_input_channels_bwd = n_lstm lstm_init = dict(W_ci=[ conv_W_initializer( [lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm]), conv_W_initializer( [lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm]) ], W_ig=[ conv_W_initializer([ lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm ]), conv_W_initializer([ lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm ]) ], W_og=[ conv_W_initializer([ lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm ]), conv_W_initializer([ lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm ]) ], W_fg=[ conv_W_initializer([ lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm ]), conv_W_initializer([ lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm ]) ], b_ci=constant([n_lstm]), b_ig=constant([n_lstm]), b_og=constant([n_lstm]), b_fg=constant([n_lstm], 1)) print("\tConvLSTM...") layers.append( ConvLSTMLayer(incoming=layers[-1], n_units=n_lstm, **lstm_init, a_out=get_rec_attr(tf, config.lstm_act), forgetgate=config.forgetgate, store_states=config.store_states, tickerstep_biases=tf.zeros, output_dropout=config.lstm_output_dropout, precomp_fwds=False)) lstm_layer = layers[-1] # # Optional feature squashing # ext_lstm_recurrence = None if config.reduced_rec_lstm: print("\tFeatureSquashing...") layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer([ config.kernel_conv_out, config.kernel_conv_out, layers[-1].get_output_shape()[-1], config.reduced_rec_lstm ]), padding='SAME', name='ConvLayerFeatureSquashing', a=tf.nn.elu)) print("\tConvLSTMRecurrence...") ext_lstm_recurrence = layers[-1] if ext_lstm_recurrence is not None: lstm_layer.add_external_recurrence(ext_lstm_recurrence) else: print("\tSubstituteConvLayer...") layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer([ config.kernel_conv, config.kernel_conv, layers[-1].get_output_shape()[-1], int(basenr_convs * (2**depth) * 4.5) ]), padding='SAME', name='SubstituteConvLayer', a=tf.nn.elu)) lstm_layer = layers[-1] # # ConvLayer for semantic segmentation # print("\tConvLayerSemanticSegmentation...") layers.append( ConvLayer(incoming=layers[-1], W=conv_W_initializer([ config.kernel_conv_out, config.kernel_conv_out, layers[-1].get_output_shape()[-1], n_classes ]), padding='SAME', name='ConvLayerSemanticSegmentation', a=tf.identity)) # # Upscaling layer # print("\tUpscalingLayer...") layers[-1] = ScalingLayer(incoming=layers[-1], size=frame_output_shape[-3:-1], name='UpscalingLayergLayer') output_layer = layers[-1] # ---------------------------------------------------------------------------------------------------------- # Create graph through sequence positions and ticker steps # ---------------------------------------------------------------------------------------------------------- outputs_all_timesteps = [] # # Loop through sequence positions # print("\tRNN Loop...") for seq_pos in range(n_seq_pos): with tf.name_scope("Sequence_pos_{}".format(seq_pos)): print("\t seq. pos. {}...".format(seq_pos)) # Set rnn input layer to current frame rnn_input_layer.update(X[:, seq_pos:seq_pos + 1, :]) # Calculate new network state at new frame (this updates the network's hidden activations, cell states, # and dependencies automatically) output = output_layer.get_output() outputs_all_timesteps.append(output) # # Loop through tickersteps # # Use last frame as input during ticker steps tickerstep_input = X[:, -1:, :] for tickerstep in range(config.tickersteps): with tf.name_scope("Tickerstep_{}".format(tickerstep)): print("\t tickerstep {}...".format(tickerstep)) # Set rnn input layer to tickerstep input rnn_input_layer.update(tickerstep_input) # Calculate new network state at new frame and activate tickerstep biases output = output_layer.get_output(tickerstep_nodes=True) outputs_all_timesteps.append(output) print("\tDone!") # # Publish # self.X = X self.y_ = y_ self.output = tf.concat(outputs_all_timesteps, axis=1, name='outputs_all_timesteps') pixel_weights = tf.ones_like(y_, dtype=tf.float32) # pixel_weights -= tf.cast(y_ == 0, dtype=tf.float32) * tf.constant(1.-0.2) self.pixel_weights = pixel_weights # We will use this list of layers for regularization in the main file self.__layers = layers # We will plot some parts of the lstm, so we make it accessible as attribute self.lstm_layer = lstm_layer