def predict(net, char, h=None, top_k=None): # tensor inputs x = np.array([[net.char2int[char]]]) x = one_hot_encode(x, len(net.chars)) inputs = torch.from_numpy(x).to(device) # detach hidden state from history h = tuple([each.data for each in h]) # get the output of the model out, h = net(inputs, h) # get the character probabilities p = F.softmax(out, dim=1).data if (on_gpu()): p = p.cpu() # move to cpu # get top characters if top_k is None: top_ch = np.arange(len(net.chars)) else: p, top_ch = p.topk(top_k) top_ch = top_ch.numpy().squeeze() # select the likely next character with some element of randomness p = p.numpy().squeeze() char = np.random.choice(top_ch, p=p / p.sum()) # return the encoded value of the predicted char and the hidden state return net.int2char[char], h
def get_label(self, example, num_unique_classes, label_type, classes): label = example[1] if label_type == 'one_hot': label = classes.index(label) label = util.one_hot_encode(label, num_unique_classes) elif label_type == 'int': label = classes.index(label) return label
def inference(config, cla): if cla.batch_size is not None: batch_size = int(cla.batch_size) else: batch_size = config['training']['batch_size'] if cla.target_field_length is not None: cla.target_field_length = int(cla.target_field_length) if not bool(cla.one_shot): model = models.DenoisingWavenet(config, target_field_length=cla.target_field_length, load_checkpoint=cla.load_checkpoint, print_model_summary=cla.print_model_summary) print('Performing inference..') else: print('Performing one-shot inference..') samples_folder_path = os.path.join(config['training']['path'], 'samples') output_folder_path = get_valid_output_folder_path(samples_folder_path) #If input_path is a single wav file, then set filenames to single element with wav filename if cla.noisy_input_path.endswith('.wav'): filenames = [cla.noisy_input_path.rsplit('/', 1)[-1]] cla.noisy_input_path = cla.noisy_input_path.rsplit('/', 1)[0] + '/' if cla.clean_input_path is not None: cla.clean_input_path = cla.clean_input_path.rsplit('/', 1)[0] + '/' else: if not cla.noisy_input_path.endswith('/'): cla.noisy_input_path += '/' filenames = [filename for filename in os.listdir(cla.noisy_input_path) if filename.endswith('.wav')] clean_input = None for filename in filenames: noisy_input = util.load_wav(cla.noisy_input_path + filename, config['dataset']['sample_rate']) if cla.clean_input_path is not None: if not cla.clean_input_path.endswith('/'): cla.clean_input_path += '/' clean_input = util.load_wav(cla.clean_input_path + filename, config['dataset']['sample_rate']) input = {'noisy': noisy_input, 'clean': clean_input} output_filename_prefix = filename[0:-4] + '_' if config['model']['condition_encoding'] == 'one_hot': condition_input = util.one_hot_encode(int(cla.condition_value), 29)[0] else: condition_input = util.binary_encode(int(cla.condition_value), 29)[0] if bool(cla.one_shot): if len(input['noisy']) % 2 == 0: # If input length is even, remove one sample input['noisy'] = input['noisy'][:-1] if input['clean'] is not None: input['clean'] = input['clean'][:-1] model = models.DenoisingWavenet(config, load_checkpoint=cla.load_checkpoint, input_length=len(input['noisy']), print_model_summary=cla.print_model_summary) print("Denoising: " + filename) denoise.denoise_sample(model, input, condition_input, batch_size, output_filename_prefix, config['dataset']['sample_rate'], output_folder_path)
def __init__(self, data, logit, dequantize, rng): x = self._dequantize( data[0], rng) if dequantize else data[0] # dequantize pixels self.x = self._logit_transform(x) if logit else x # logit self.labels = data[1] # numeric labels self.y = util.one_hot_encode(self.labels, 10) # 1-hot encoded labels self.N = self.x.shape[0] # number of datapoints
def __init__(self, x, l, logit, flip, dequantize, rng): D = x.shape[1] / 3 # number of pixels x = self._dequantize(x, rng) if dequantize else x # dequantize x = self._logit_transform(x) if logit else x # logit x = self._flip_augmentation(x) if flip else x # flip self.x = x # pixel values self.r = self.x[:, :D] # red component self.g = self.x[:, D:2 * D] # green component self.b = self.x[:, 2 * D:] # blue component self.labels = np.hstack([l, l]) if flip else l # numeric labels self.y = util.one_hot_encode(self.labels, 10) # 1-hot encoded labels self.N = self.x.shape[0] # number of datapoints
# Load the CIFAR-100 dataset train_labels = np.load('/work/cse479/shared/homework/02/cifar_labels.npy') train_data = np.load('/work/cse479/shared/homework/02/cifar_images.npy') save_directory = './homework2_sessions' # Randomize order np.random.seed(42) # Seeded so we always get same splits idx = np.random.permutation(train_data.shape[0]) train_data, train_labels = train_data[idx], train_labels[idx] # Reshape the data train_data = np.reshape(train_data, [-1, 32, 32, 3]) # One hot encode the labels train_labels = one_hot_encode(train_labels) # Load imagenet data for use with Autoencoder elif model_name == 'autoencoder': save_directory = './homework2_sessions_autoencoder' # Load image data data = np.load('/work/cse479/shared/homework/02/imagenet_images.npy') # Randomize order np.random.seed(42) # Seeded so we always get same splits idx = np.random.permutation(data.shape[0]) data = data[idx] # Note for Imagenet train_data includes all data
def inference(config, cla): from collections import namedtuple MyStruct = namedtuple("MyStruct", "rescaling rescaling_max multiProcFlag") hparams = MyStruct(rescaling=True, rescaling_max=0.999, multiProcFlag=False) outputfolder = 'bbbbbb' os.makedirs(outputfolder, exist_ok=True) import pickle with open('Data/TestSignalList500.pkl', 'rb') as f: # Python 3: open(..., 'rb') sequence_i_save, interf_i_save = pickle.load(f) # This is for statistical analysis SampleN = 100 random.seed(66666666) # # This is for one example # SampleN = 1 # temp = 66 # sequence_i_save = [sequence_i_save[temp]] # interf_i_save = [interf_i_save[temp]] # Instantiate Model model = models.DenoisingWavenet( config, load_checkpoint=cla.load_checkpoint, print_model_summary=cla.print_model_summary) model.model.load_weights(cla.load_checkpoint) from DataGenerator import dataGenBig dg = dataGenBig(model, seedNum=123456789, verbose=False) s2_scale = 0.5 for sample_i in range( SampleN): # SampleN groups of mixtures and separated signals. print("Sample number {}".format(sample_i + 1)) sequence_i = sequence_i_save[sample_i] interf_i = interf_i_save[sample_i] target_path = dg.target_test[sequence_i] interf_path = dg.interf_test[interf_i] print(target_path, '\n', interf_path) # generate the mixture s1 = np.load(target_path) # read in the target s2_original, _ = librosa.load( interf_path ) # both the target and the interference are sampled at 22050 Hz L = len(s1) s2 = s2_original.copy() while len(s2) < L: s2 = np.concatenate((s2, s2_original), axis=0) s2 = s2[:L] if (s1 is None) | (s2 is None): print("Data loading fail") sys.exit() # first normalise s2 s2 = s2 * (s2_scale / max(abs(s2))) mixture = s1 + s2 if hparams.rescaling: scale = 1 / max(abs(mixture)) * hparams.rescaling_max else: scale = 1 / max( abs(mixture) ) * 0.99 # normalise the mixture thus the maximum magnitude = 0.99 mixture *= scale input = {'noisy': mixture, 'clean': None} print("Denoising: " + target_path.split('/')[-1]) batch_size = 10 if config['model']['condition_encoding'] == 'one_hot': condition_input = util.one_hot_encode(int(cla.condition_value), 29)[0] else: condition_input = util.binary_encode(int(cla.condition_value), 29)[0] dst_wav_name = "Ind_{}_est_wavenet_".format(sample_i) denoise.denoise_sample(model, input, condition_input, batch_size, dst_wav_name, 22050, outputfolder)
def train(epochs=20, clip=5, val_frac=0.1, print_every=100): global data net.train() # create training and validation data val_idx = int(len(data) * (1 - val_frac)) data, val_data = data[:val_idx], data[val_idx:] counter = 0 n_chars = len(net.chars) for e in range(epochs): # initialize hidden state h = net.init_hidden(batch_size) for x, y in get_batches(data, batch_size, seq_length): counter += 1 # One-hot encode our data and make them Torch tensors x = one_hot_encode(x, n_chars) inputs, targets = torch.from_numpy(x), torch.from_numpy(y) inputs, targets = inputs.to(device), targets.cuda(device) h = tuple([each.data for each in h]) net.zero_grad() # get the output from the model output, h = net(inputs, h) # calculate the loss and perform backprop loss = criterion(output, targets.view(batch_size * seq_length).long()) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. nn.utils.clip_grad_norm_(net.parameters(), clip) opt.step() # loss stats if counter % print_every == 0: # Get validation loss val_h = net.init_hidden(batch_size) val_losses = [] net.eval() for x, y in get_batches(val_data, batch_size, seq_length): # One-hot encode our data and make them Torch tensors x = one_hot_encode(x, n_chars) x, y = torch.from_numpy(x), torch.from_numpy(y) # Creating new variables for the hidden state, otherwise # we'd backprop through the entire training history val_h = tuple([each.data for each in val_h]) inputs, targets = x, y if (on_gpu()): inputs, targets = inputs.cuda(), targets.cuda() output, val_h = net(inputs, val_h) val_loss = criterion( output, targets.view(batch_size * seq_length).long()) val_losses.append(val_loss.item()) net.train( ) # reset to train mode after iterationg through validation data print("Epoch: {}/{}...".format(e + 1, epochs), "Step: {}...".format(counter), "Loss: {:.4f}...".format(loss.item()), "Val Loss: {:.4f}".format(np.mean(val_losses)))
flags.DEFINE_integer('batch_size', 64, '') flags.DEFINE_integer('max_epoch_num', 300, '') FLAGS = flags.FLAGS # load data from keras.datasets import fashion_mnist (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() x_train = x_train.reshape(-1, 784) x_test = x_test.reshape(-1, 784) y_train = y_train.reshape(-1, 1) y_test = y_test.reshape(-1, 1) y_train = util.one_hot_encode(y_train, 10) y_test = util.one_hot_encode(y_test, 10) #The following line can be run if we want to get a separate validation set #X_train, X_validation, y_train, y_validation = util.train_test_split(x_train, y_train) def train(X_train, y_train): train_num_examples = X_train.shape[0] ce_train = [] y_train_predicted = [] for i in range(math.ceil(train_num_examples / batch_size)): batch_xs = X_train[i * batch_size:(i + 1) * batch_size, :] batch_ys = y_train[i * batch_size:(i + 1) * batch_size, :] _, ce, y_predicted = session.run([train_op, cross_entropy, output], {
data = np.load('/work/cse479/shared/homework/02/cifar_images.npy') save_directory = './homework2_sessions' # Randomize order np.random.seed(42) # Seeded so we always get same splits idx = np.random.permutation(data.shape[0]) data,labels = data[idx], labels[idx] # Reshape the data data = np.reshape(data, [-1, 32, 32, 3]) # Save before one-hot-encoding raw_labels = [int(label) for label in labels] # One hot encode the labels labels = one_hot_encode(labels) # Split data into training and testing sets train_data, train_labels, test_data, test_labels = split_data(train_test_prop, data, labels) # Grab corresponding raw_labels for train and test _,train_labels_raw,_,raw_labels=split_data(train_test_prop,data,raw_labels) # Calculate the number of training and testing samples train_num_examples, test_num_examples = train_data.shape[0], test_data.shape[0] # Load imagenet data for use with Autoencoder elif model_name == 'autoencoder': save_directory = './homework2_sessions_autoencoder'
batch_ys = test_labels[i*batch_size:(i+1)*batch_size, :] test_ce, conf_matrix, test_predicted = session.run([net_loss_regularization, confusion_matrix, output], {x:batch_xs, y:batch_ys}) ce_vals.append(test_ce) conf_mxs.append(conf_matrix) test_predictions.append(test_predicted) avg_test_ce = sum(ce_vals) / len(ce_vals) print('Test Cross Entropy: ' + str(avg_test_ce)) test_accuracy = accuracy_score(np.argmax(test_labels, axis=1), np.argmax(np.vstack(test_predictions), axis=1)) print('Test Accuracy:', test_accuracy) full_train_images = np.load(FLAGS.data_dir + 'fmnist_train_data.npy') full_train_labels = np.load(FLAGS.data_dir + 'fmnist_train_labels.npy') #divide into train, validation and test full_train_labels = util.one_hot_encode(full_train_labels, 10) full_train_data = np.concatenate((full_train_images, full_train_labels), axis = 1) training_set, test_set = util.split_rows(full_train_data, 0.8) training_set_new, validation_set = util.split_rows(training_set, 0.8) training_data = np.hsplit(training_set_new, [784, 794]) train_images = training_data[0] train_labels = training_data[1] validation_data = np.hsplit(validation_set, [784,794]) validation_images = validation_data[0] validation_labels = validation_data[1] test_data = np.hsplit(test_set, [784,794]) test_images = test_data[0] test_labels = test_data[1] #size of train, validation and test
def __init__(self, model, chars, sequence_length): char2idx = { char:i for i,char in enumerate(chars) } encode_fn = lambda ch: util.one_hot_encode(ch, char2idx) super().__init__(model, chars, encode_fn, sequence_length, len(chars))
def main(argv): train_accuracies_list = [] test_accuracies_list = [] train_losses_list = [] test_losses_list = [] #fetch data train_x = np.load(FLAGS.data_dir + 'cifar_images.npy') train_y = np.load(FLAGS.data_dir + 'cifar_labels.npy') train_ae = np.load(FLAGS.data_dir + 'imagenet_images.npy') # one hot encode labels train_y = util.one_hot_encode(train_y, 100) #split into train data and test data where test data is used for validation x_train, x_test, y_train, y_test = util.data_split(train_x, train_y, 0.1) batch_size = FLAGS.batch_size tf.reset_default_graph() #build model according to command line argument if argv[1] == '1': x, y, output = model.architecture_1( [16, 32], 2, activation=tf.nn.relu, regularizer=tf.contrib.layers.l2_regularizer(0.01)) cross_entropy = get_cross_entropy(y, output) total_regularization_loss = regularize(cross_entropy, reg_coeff=0.01) optimizer = train_operation(learning_rate=0.00001) train_op = optimizer.minimize(total_regularization_loss) elif argv[1] == '2': x, y, output = model.architecture_2([64, 128, 128, 256, 256, 512], 2, activation=tf.nn.relu) cross_entropy = get_cross_entropy(y, output) total_regularization_loss = regularize(cross_entropy, reg_coeff=0.01) optimizer = train_operation(learning_rate=0.0001) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_regularization_loss) else: print("Invalid argument") patience = 0 best_loss = 1000 with tf.Session() as session: global_step_tensor = tf.get_variable('global_step', trainable=False, shape=[], initializer=tf.zeros_initializer) saver = tf.train.Saver() session.run(tf.global_variables_initializer()) for epoch in range(FLAGS.max_epoch_num): print("EPOCH: " + str(epoch)) ce_loss_train, train_accuracy = train(x, y, session, x_train, y_train, train_op, cross_entropy, output) print("Train CE Loss: " + str(ce_loss_train)) print("Train Accuracy: " + str(train_accuracy)) train_accuracies_list.append(train_accuracy) train_losses_list.append(ce_loss_train) y_preds, ce_loss_test, test_accuracy, conf_mxs = test( x, y, session, x_test, y_test, cross_entropy, output) test_accuracies_list.append(test_accuracy) test_losses_list.append(ce_loss_test) if ce_loss_test < best_loss: best_loss = ce_loss_test model_saver(session, global_step_tensor) patience = 0 else: patience = patience + 1 print("Patience: " + str(patience)) if patience > 20: break print("Test CE Loss: " + str(ce_loss_test)) print("Test Accuracy: " + str(test_accuracy)) error = 1 - test_accuracy conf_interval_upper = error + 1.96 * math.sqrt( (error * (1 - error)) / y_test.shape[0]) conf_interval_lower = error - 1.96 * math.sqrt( (error * (1 - error)) / y_test.shape[0]) print('upper_bound' + str(conf_interval_upper)) print('lower_bound' + str(conf_interval_lower)) # Generate Loss Plot plt.clf() plt.figure(figsize=(10, 6)) plt.plot(train_losses_list, label='train loss') plt.plot(test_losses_list, label='test loss') plt.legend(loc='upper left') plt.title('Train and Test Loss') plt.grid() plt.savefig('loss_homework2.png', dpi=300) plt.show() #Generate Accuracy PLot plt.figure(figsize=(10, 6)) plt.plot(train_accuracies_list, label='train accuracy') plt.plot(test_accuracies_list, label='test accuracy') plt.legend(loc='upper left') plt.title('Train and Test Accuracy') plt.grid() plt.savefig('accuracy_homework2.png', dpi=300) plt.show()
print("df test X:") print(df_test_X) print("df test X rows:") print(df_test_X.shape[0]) print("df_train_X shape:") print(df_train_X.shape) print("df_test_X shape:") print(df_test_X.shape) # one hot encode the y data print("df_train_y.values type:") print(type(df_train_y.values)) print("df_train_y.values[:10]:") print(df_train_y.values[:10]) train_y = util.one_hot_encode(df_train_y.values, 2) test_y = util.one_hot_encode(df_test_y, 2) auto_test_y = util.one_hot_encode(auto_df_test_y, 2) print("train_y one hot:") print(train_y[:10]) sclr = util.fit_scaler(df_train_X) train_X = util.scale_data(sclr, df_train_X) test_X = util.scale_data(sclr, df_test_X) auto_test_X = util.scale_data(sclr, auto_df_test_X) print("train_X AFTER scaling:") print(train_X) print("test_X AFTER scaling:") print(test_X)
def run_training(self, max_num_actions, max_run_time, batch_size, batches_per_step, saver_util): # log start time, in case we are limiting by time... start_time = time.time() # run for some max number of actions num_actions_taken = 0 n = 0 one_hot_list = util.one_hot_encode(self.obj_list) while True: rewards = [] losses = [] remain_obj = [i for i in range(10)] # run an episode env.shuffle_obj() # shuffle object for _ in range(10): target_obj_idx = self.obj_list.index( random.sample(remain_obj, 1)[0]) obj_name = env.obj_list[target_obj_idx] print('target object:', obj_name) # start a new episode state_1 = self.env.reset(target_obj_idx) # prepare data for updating replay memory at end of episode action_reward_state_sequence = [] target_obj_hot = one_hot_list[target_obj_idx, :] done = False step = 0 while not done: # choose action action = self.actor.action_given(state_1, add_noise=True) # take action step in env state_2, reward, done = self.env.step(action) rewards.append(reward) # cache for adding to replay memory action_reward_state_sequence.append( (action, reward, np.copy(state_2))) # roll state for next step. state_1 = state_2 step = step + 1 # if step == opts.action_repeat_per_scene: if step == 1: done = True # at end of episode update replay memory self.replay_memory.add_episode(initial_state, action_reward_state_sequence) if len(remain_obj) > 0: # Target object remove version remain_obj.remove(self.obj_list[target_obj_idx]) env.remove_obj(target_obj_idx) ## Random object remove version # rand_idx= self.obj_list.index(random.sample(remain_obj, 1)[0]) # remain_obj.remove(rand_idx) # env.remove_obj(rand_idx) # do a training step (after waiting for buffer to fill a bit...) if self.replay_memory.size() > opts.replay_memory_burn_in: # run a set of batches for _ in range(batches_per_step): batch = self.replay_memory.batch(batch_size) self.actor.train(batch.state_1) self.critic.train(batch) # update target nets self.target_actor.update_weights() self.target_critic.update_weights() # do debug (if requested) on last batch # dump some stats and progress info stats = collections.OrderedDict() stats["time"] = time.time() stats["n"] = n # stats["mean_losses"] = float(np.mean(losses)) stats["total_reward"] = np.sum(rewards) stats["episode_len"] = len(rewards) stats[ "replay_memory_stats"] = self.replay_memory.current_stats( ) print( "STATS %s\t%s" % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), json.dumps(stats))) sys.stdout.flush() n += 1 # save if required if saver_util is not None: saver_util.save_if_required() # exit when finished num_actions_taken += len(rewards) if max_num_actions > 0 and num_actions_taken > max_num_actions: break if max_run_time > 0 and time.time( ) > start_time + max_run_time: break # emit occasional eval self.run_eval(1) # dump weights once if requested global DUMP_WEIGHTS if DUMP_WEIGHTS: self.debug_dump_network_weights() DUMP_WEIGHTS = False
root_tags_placeholder: root_tags_valid, title_placeholder: title_fea_valid, desc_placeholder: desc_fea_valid, ocr_placeholder: ocr_fea_valid, cate_placeholder: cate_fea_valid } confidence_root, loss_root, predict_root_label_valid = \ sess.run([valid_nets['confidence_root'], valid_nets['loss_root'], valid_nets['predict_label_root']], feed_dict=feed_dict) # gap_root = eval_util.calculate_gap(predict_root_label_valid, root_tags_valid) # gap_root_average += gap_root root_tags_one_hot = one_hot_encode(root_tags_valid, TAG_NUM) root_eval_metrics.accumulate( confidence_root, root_tags_one_hot, [0 for i in range(confidence_root.shape[0])]) video_id_all.extend(video_id_valid) predict_root_all.append(confidence_root) tags_root_all.append(root_tags_valid) if i == 0: print(confidence_root[0, :]) loss_root_average += loss_root tags_root_all, predict_root_argmax, accuracy_root = \ cal_accuracy(predict_root_all, tags_root_all) top_2_acc = top_n_accuracy(
def run_training(self, max_num_actions, max_run_time, batch_size, batches_per_step, saver_util): # log start time, in case we are limiting by time... start_time = time.time() # run for some max number of actions num_actions_taken = 0 n = 0 one_hot_list = util.one_hot_encode(self.obj_list) while True: rewards = [] losses = [] remain_obj = [i for i in range(10)] # Remain object initialize. # run an episode if opts.dont_do_rollouts: # _not_ gathering experience online pass else: env.shuffle_obj() # Tray Shuffle for _ in range(10): # object total num target_obj_idx = self.obj_list.index(random.sample(remain_obj, 1)[0]) obj_name = env.obj_list[target_obj_idx] print('Target object:', obj_name, file=sys.stderr) # Target object one hot target_obj_hot = one_hot_list[target_obj_idx, :] # start a new episode state_1 = self.env.reset() # prepare data for updating replay memory at end of episode initial_state = np.copy(state_1) action_reward_state_sequence = [] done = False step = 0 while not done: # choose action internal_state = env.internal_state action = self.naf.action_given(state_1, internal_state, target_obj_hot, add_noise=True) # Make action # take action step in env state_2, reward, done = self.env.step(action, target_obj_idx) rewards.append(reward) # cache for adding to replay memory action_reward_state_sequence.append((action, reward, np.copy(state_2), internal_state, target_obj_hot)) # roll state for next step. state_1 = state_2 step += 1 #if step == opts.action_repeat_per_scene: if step == 10: done = True # at end of episode update replay memory self.replay_memory.add_episode(initial_state, action_reward_state_sequence) # Random object remove if len(remain_obj) > 0: # Target object remove version remain_obj.remove(self.obj_list[target_obj_idx]) env.remove_obj(target_obj_idx) ## Random object remove version # rand_idx= self.obj_list.index(random.sample(remain_obj, 1)[0]) # remain_obj.remove(rand_idx) # env.remove_obj(rand_idx) # do a training step (after waiting for buffer to fill a bit...) if self.replay_memory.size() > opts.replay_memory_burn_in: # run a set of batches for _ in range(batches_per_step): batch = self.replay_memory.batch(batch_size) losses.append(self.naf.train(batch)) # update target nets self.target_value_net.update_weights() # TODO : Target Net update????? naf? value??? why # do debug (if requested) on last batch if VERBOSE_DEBUG: print("-----") print("> BATCH") print("state_1", batch.state_1.T) print("action\n", batch.action.T) print("reward ", batch.reward.T) print("terminal_mask ", batch.terminal_mask.T) print("state_2", batch.state_2.T) print("< BATCH") l_values, l, v, a, vp = self.naf.debug_values(batch) print("> BATCH DEBUG VALUES") print("l_values\n", l_values.T) print("loss\t", l) print("val\t" , np.mean(v), "\t", v.T) print("adv\t", np.mean(a), "\t", a.T) print("val'\t", np.mean(vp), "\t", vp.T) print("< BATCH DEBUG VALUES") # dump some stats and progress info stats = collections.OrderedDict() stats["time"] = time.time() stats["n"] = n stats["mean_losses"] = float(np.mean(losses)) if len(losses) > 0 else 0 stats["total_reward"] = np.sum(rewards) stats["episode_len"] = len(rewards) stats["replay_memory_stats"] = self.replay_memory.current_stats() print("STATS %s\t%s" % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), json.dumps(stats))) sys.stdout.flush() n += 1 # save if required if saver_util is not None: saver_util.save_if_required() # emit occasional eval if VERBOSE_DEBUG or n % 10 == 0: self.run_eval(1) # dump weights once if requested global DUMP_WEIGHTS if DUMP_WEIGHTS: self.debug_dump_network_weights() DUMP_WEIGHTS = False # exit when finished num_actions_taken += len(rewards) if max_num_actions > 0 and num_actions_taken > max_num_actions: break if max_run_time > 0 and time.time() > start_time + max_run_time: break