val_paths = [ patches_val_lb_h, patches_bound_labels_val, patches_dist_labels_val, patches_color_labels_val ] else: y_paths = [patches_tr_lb_h] val_paths = [patches_val_lb_h] rows = args.patch_size cols = args.patch_size channels = 3 # Define optimizer if args.optimizer == 'adam': optm = Adam(lr=args.learning_rate, beta_1=0.9) elif args.optimizer == 'sgd': optm = SGD(lr=args.learning_rate, momentum=0.8) # Define Loss print('=' * 60) if args.loss == 'cross_entropy': print('Using Cross Entropy') # loss = "categorical_crossentropy" loss = tf.keras.losses.CategoricalCrossentropy() loss_bound = tf.keras.losses.BinaryCrossentropy() loss_reg = tf.keras.losses.MeanSquaredError() elif args.loss == "tanimoto": print('Using Tanimoto Dual Loss') loss = Tanimoto_dual_loss() loss_bound = Tanimoto_dual_loss()
# Validation tiles val_tiles = [val1, val2] patches_val, patches_val_ref = patch_tiles(val_tiles, mask_tiles, image_array, final_mask, patch_size, stride) patches_val_aug, patches_val_ref_aug = bal_aug_patches(percent, patch_size, patches_val, patches_val_ref) patches_val_ref_aug_h = tf.keras.utils.to_categorical(patches_val_ref_aug, number_class) #%% start_time = time.time() exp = 1 rows = patch_size cols = patch_size adam = Adam(lr=0.0001, beta_1=0.9) batch_size = 8 weights = [0.5, 0.5, 0] loss = weighted_categorical_crossentropy(weights) model = unet((rows, cols, channels)) model.compile(optimizer=adam, loss=loss, metrics=['accuracy']) # print model information model.summary() filepath = 'models/' # define early stopping callback earlystop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=10, verbose=1, mode='min')
# -*- coding: utf-8 -*- """This example assumes you've read `advanced.py`, and covers: - Exploding & vanishing gradients monitoring - Spotting dead weights """ import deeptrain deeptrain.util.misc.append_examples_dir_to_sys_path() from utils import CL_CONFIGS as C from utils import init_session, make_classifier from utils import Adam from see_rnn import rnn_histogram, rnn_heatmap #%%# Case 1 ################################################################### # We build a model prone to large but not exploding/vanishing gradients C['model']['optimizer'] = Adam(10) C['traingen']['epochs'] = 1 tg = init_session(C, make_classifier) #%%# Train #################################################################### tg.train() #%%# Case 2 ################################################################### # Now a model prone to exploding / vanishing gradients from utils import TS_CONFIGS as C from utils import make_timeseries_classifier C['model']['activation'] = 'relu' C['model']['optimizer'] = Adam(.3) C['traingen']['epochs'] = 1 C['traingen']['eval_fn'] = 'predict'
raise NotImplementedError logging(str(model), path=opt.path) ''' temporary ''' _prob = get_prob_from_energy_func_for_vis(model.energy_func, num=256) _gtlatent = get_imshow_plot(_prob, val=6 if opt.dataset in ['sbmnist', 'dbmnist', 'dbmnist-val5k'] else 4) #img = convert_npimage_torchimage(_img) #writer.add_image('train/latent', img.float(), 0) ''' --------- ''' # init optimizer if opt.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=opt.lr) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=1./4.0, patience=0, verbose=True) elif opt.optimizer == 'adam': optimizer = Adam(model.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) scheduler = None elif opt.optimizer == 'amsgrad': optimizer = Adam(model.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999), amsgrad=True) scheduler = None elif opt.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=opt.lr, momentum=opt.momentum) scheduler = None else: raise NotImplementedError('unknown optimizer: {}'.format(opt.optimizer)) # init weight avg if opt.weight_avg == 'polyak': optimizer = torchcontrib.optim.Polyak(optimizer, polyak_start=opt.weight_avg_start, polyak_freq=1, polyak_decay=opt.weight_avg_decay) elif opt.weight_avg == 'swa': optimizer = torchcontrib.optim.SWA(optimizer, swa_start=opt.weight_avg_start, swa_freq=1)
def run(): np.random.seed(args.seed) torch.manual_seed(args.seed) gym.logger.set_level(40) env = gym.make(args.env_name) state_size = env.observation_space.shape[0] action_size = env.action_space.shape[0] state_stat = RunningStat(env.observation_space.shape, eps=1e-2) action_space = env.action_space policy = Policy(state_size, action_size, args.hidden_size, action_space.low, action_space.high) num_params = policy.num_params optim = Adam(num_params, args.lr) ray.init(num_cpus=args.num_parallel) return_list = [] for epoch in range(100000): ##################################### ### Rollout and Update State Stat ### ##################################### policy.set_state_stat(state_stat.mean, state_stat.std) # set diff params (mirror sampling) assert args.episodes_per_batch % 2 == 0 diff_params = torch.empty((args.episodes_per_batch, num_params), dtype=torch.float) diff_params_pos = torch.randn(args.episodes_per_batch // 2, num_params) * args.noise_std diff_params[::2] = diff_params_pos diff_params[1::2] = -diff_params_pos rets = [] num_episodes_popped = 0 num_timesteps_popped = 0 while num_episodes_popped < args.episodes_per_batch \ and num_timesteps_popped < args.timesteps_per_batch: #or num_timesteps_popped < args.timesteps_per_batch: results = [] for i in range(min(args.episodes_per_batch, 500)): # set policy randomized_policy = deepcopy(policy) randomized_policy.add_params(diff_params[num_episodes_popped + i]) # rollout results.append( rollout.remote(randomized_policy, args.env_name, seed=np.random.randint(0, 10000000))) for result in results: ret, timesteps, states = ray.get(result) rets.append(ret) # update state stat if states is not None: state_stat.increment(states.sum(axis=0), np.square(states).sum(axis=0), states.shape[0]) num_timesteps_popped += timesteps num_episodes_popped += 1 rets = np.array(rets, dtype=np.float32) diff_params = diff_params[:num_episodes_popped] best_policy_idx = np.argmax(rets) best_policy = deepcopy(policy) best_policy.add_params(diff_params[best_policy_idx]) best_rets = [ rollout.remote(best_policy, args.env_name, seed=np.random.randint(0, 10000000), calc_state_stat_prob=0.0, test=True) for _ in range(10) ] best_rets = np.average(ray.get(best_rets)) print('epoch:', epoch, 'mean:', np.average(rets), 'max:', np.max(rets), 'best:', best_rets) with open(args.outdir + '/return.csv', 'w') as f: return_list.append( [epoch, np.max(rets), np.average(rets), best_rets]) writer = csv.writer(f, lineterminator='\n') writer.writerows(return_list) plt.figure() sns.lineplot(data=np.array(return_list)[:, 1:]) plt.savefig(args.outdir + '/return.png') plt.close('all') ############# ### Train ### ############# fitness = compute_centered_ranks(rets).reshape(-1, 1) if args.weight_decay > 0: #l2_decay = args.weight_decay * ((policy.get_params() + diff_params)**2).mean(dim=1, keepdim=True).numpy() l1_decay = args.weight_decay * (policy.get_params() + diff_params).mean( dim=1, keepdim=True).numpy() fitness += l1_decay grad = (fitness * diff_params.numpy()).mean(axis=0) policy = optim.update(policy, -grad)
tracker_loss_train = tf.keras.losses.SparseCategoricalCrossentropy(name='loss_train') tracker_loss_val = tf.keras.losses.SparseCategoricalCrossentropy(name='loss_val') tracker_loss_test = tf.keras.losses.SparseCategoricalCrossentropy(name='loss_test') tracker_acc_train = tf.keras.metrics.SparseCategoricalAccuracy(name='acc_train') tracker_acc_val = tf.keras.metrics.SparseCategoricalAccuracy(name='acc_val') tracker_acc_test = tf.keras.metrics.SparseCategoricalAccuracy(name='acc_test') os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = str(args.amp) acc_runs, time_total = [], 0 for runtime in range(args.runtimes): if runtime and time_total * (runtime + 1.2) / runtime > args.walltime: break if args.optimizer == 'RMSprop': optimizer = RMSprop(lr=args.lr, weight_decay=args.weight_decay, rho=0.99, epsilon=1e-8) # setting eps=1e-8 to get closer to PyTorch! elif args.optimizer == 'Adam': optimizer = Adam(lr=args.lr, weight_decay=args.weight_decay, epsilon=1e-8) if args.amp: optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer) idx_train, idx_val, idx_test, labels_train, labels_val, labels_test = split_dataset(adj.shape[0], labels, args.dataset, args.public, args.percent) epoch, early_stopping, acc_best_train, acc_test_best_train, acc_test_best_val, best_train, best_val, best_test = 0, 0, 0, 0, 0, float('Inf'), float('Inf'), float('Inf') train = trainer() if args.debug: current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = 'logs/gradient_tape/' + current_time + '/train' val_log_dir = 'logs/gradient_tape/' + current_time + '/val' test_log_dir = 'logs/gradient_tape/' + current_time + '/test' run_log_dir = 'logs/gradient_tape/' + current_time + '/run' train_summary_writer = tf.summary.create_file_writer(train_log_dir) val_summary_writer = tf.summary.create_file_writer(val_log_dir) test_summary_writer = tf.summary.create_file_writer(test_log_dir) run_summary_writer = tf.summary.create_file_writer(run_log_dir) time_run_start = time.time()
padding=((1, 1), (1, 1))), ReLU(), BatchNorm(), Convolution(input_shape=(8, 8), input_depth=128, n_filters=128, filter_dim=(3, 3), stride=(1, 1), padding=((1, 1), (1, 1))), ReLU(), BatchNorm(), MaxPooling(input_shape=(8, 8), input_depth=128, filter_dim=(2, 2), stride=(2, 2)), Dropout(rate=0.4), Reshape(input_shape=(128, 4, 4), output_shape=(2048, 1)), Dense(size=10, input_len=2048), Softmax()) optimizer = Adam(network.trainables, learning_rate=lambda n: 0.0001, beta_1=0.9, beta_2=0.999) avg = IncrementalAverage() for epoch in range(STARTING_EPOCH, STARTING_EPOCH + EPOCHS): batch = 1 for x, y in make_batch(training_data, training_labels, BATCH_SIZE): out = network(x) avg.add(np.sum(VectorCrossEntropy.error(out, y))) network.backward(VectorCrossEntropy.gradient(out, y), update=True) if batch % LOG_FREQ == 0: print( f"epoch {epoch}/{EPOCHS} | batch {batch} - loss: {avg.get()}") batch += 1 # Testing testacc = IncrementalAverage()