def forward_one_step(self, state, action, reward, next_state, test=False): xp = cuda.cupy if config.use_gpu else np n_batch = state.shape[0] state = Variable(state.reshape((n_batch, config.rl_history_length * 34))) next_state = Variable(next_state.reshape((n_batch, config.rl_history_length * 34))) if config.use_gpu: state.to_gpu() next_state.to_gpu() q = self.compute_q_variable(state, test=test) q_ = self.compute_q_variable(next_state, test=test) max_action_indices = xp.argmax(q_.data, axis=1) if config.use_gpu: max_action_indices = cuda.to_cpu(max_action_indices) target_q = self.compute_target_q_variable(next_state, test=test) target = q.data.copy() for i in xrange(n_batch): max_action_index = max_action_indices[i] target_value = reward[i] + config.rl_discount_factor * target_q.data[i][max_action_indices[i]] action_index = self.get_index_for_action(action[i]) old_value = target[i, action_index] diff = target_value - old_value if diff > 1.0: target_value = 1.0 + old_value elif diff < -1.0: target_value = -1.0 + old_value target[i, action_index] = target_value target = Variable(target) loss = F.mean_squared_error(target, q) return loss, q
def tiling(x: chainer.Variable, rows, cols): x = chainer.cuda.to_cpu(x.data) x = x[:, :3, :, :] x = numpy.asarray(numpy.clip(x * 127.5 + 127.5, 0.0, 255.0), dtype=numpy.uint8) _, _, h, w = x.shape x = x.reshape((rows, cols, 3, h, w)) x = x.transpose(0, 3, 1, 4, 2) x = x.reshape((rows * h, cols * w, 3)) return x
def make_image(trainer): x_ref, x_rot, eps = data batch = x_ref.shape[0] width = x_ref.shape[-1] height = x_ref.shape[-2] channel = x_ref.shape[-3] xp = gen.xp image_size = batch converter = chainer.dataset.concat_examples x_real = Variable(converter(x_rot, device)) x_ref = Variable(converter(x_ref, device)) eps = Variable(converter(eps, device)) x_real = Variable(x_real.data.astype(np.float32)) / 255.0 x_ref = Variable(x_ref.data.astype(np.float32)) / 255.0 eps = Variable(eps.data.astype(np.float32)) with chainer.using_config('train', False): x = gen(x_ref, eps) x_ref = chainer.cuda.to_cpu(x_ref.data) x_real = chainer.cuda.to_cpu(x_real.data) x_gen = chainer.cuda.to_cpu(x.data) x_ref = x_ref.reshape((1, image_size, channel, height, width)) x_real = x_real.reshape((1, image_size, channel, height, width)) x_gen = x_gen.reshape((1, image_size, channel, height, width)) x = np.concatenate((x_ref, x_real, x_gen), axis=0) x = x * 255 x = x.clip(0.0, 255.0) # gen_output_activation_func is sigmoid x = np.asarray(x, dtype=np.uint8) # gen output_activation_func is tanh # x = np.asarray(np.clip((x+1) * 0.5 * 255, 0.0, 255.0), dtype=np.uint8) _, _, _, H, W = x.shape #x = x.reshape((n_images, 3, 1, H, W)) # col, row, ch, H, W -> col, H, row, W, ch x = x.transpose(0, 3, 1, 4, 2) if channel == 3: x = x.reshape((3 * H, image_size * W, 3)) elif channel == 1: x = x.reshape((3 * H, image_size * W)) preview_dir = '{}/preview'.format(dst) preview_path = preview_dir + \ '/image{:0>6}.png'.format(trainer.updater.iteration) if not os.path.exists(preview_dir): os.makedirs(preview_dir) Image.fromarray(x).save(preview_path)
def cumprod(x, axis=-1): if not isinstance(x, Variable): x = Variable(x) if axis is None: x = x.reshape(-1) axis = 0 elif axis < 0: axis = x.ndim + axis assert axis >= 0 and axis < x.ndim xp = cuda.get_array_module(x) ndim = x.ndim dims = x.shape[axis] shape_new = x.shape[:axis] + (dims, ) + x.shape[axis:] x = functions.expand_dims(x, axis) x = functions.broadcast_to(x, shape_new) # TODO: use cupy.tril mask = numpy.tril(numpy.ones((dims, dims), numpy.bool)) if xp is cupy: mask = cuda.to_gpu(mask) expander = [1] * axis + [dims, dims] + [1] * (ndim - axis - 1) mask = mask.reshape(expander) mask = xp.broadcast_to(mask, shape_new) x = functions.where(mask, x, xp.ones_like(x.data)) return prod(x, axis + 1)
def main(): # Set up a neural network to train gen = Generator() chainer.serializers.load_npz('result/gen_iter_500000.npz', gen) np.random.seed(0) xp = gen.xp z = np.random.normal(0.0, 1.0, (100, 100)) for i in range(0, 10): for j in range(1, 10): # interpolate gradually #z[i*10 + j] = z[i*10] * 0.1 * (10-j) sub = z[10] - z[30] z[i * 10 + j] = z[i * 10] - 0.1 * j * sub z = Variable(xp.asarray(z.reshape(100, 100, 1, 1), dtype=np.float32)) with chainer.using_config('train', False): x = gen(z) x = chainer.cuda.to_cpu(x.data) x = np.asarray(np.clip(x * 255, 0.0, 255.0), dtype=np.uint8) _, _, H, W = x.shape x = x.reshape((10, 10, 1, H, W)) x = x.transpose(0, 3, 1, 4, 2) x = x.reshape((10 * H, 10 * W)) preview_dir = 'interpolate/preview' preview_path = preview_dir + '/phenomenon.png' if not os.path.exists(preview_dir): os.makedirs(preview_dir) Image.fromarray(x).save(preview_path)
def to_onehot(y, num_classes, label_smoothing_prob=0, use_cuda=False): """Convert indices into one-hot encoding. Args: y (chainer.Variable, int): Indices of labels. A tensor of size `[B, 1]`. num_classes (int): the number of classes label_smoothing_prob (float, optional): use_cuda (bool, optional): if True, use GPUs Returns: y (chainer.Variable, float): A tensor of size `[B, 1, num_classes]` """ batch_size = y.shape[0] y_onehot = np.eye(num_classes, dtype=np.float32)[ y.data.reshape(batch_size).tolist()] y_onehot = Variable(y_onehot) if use_cuda: y_onehot.to_gpu() y_onehot = y_onehot.reshape(batch_size, 1, num_classes) # Label smoothing if label_smoothing_prob > 0: y = y * (1 - label_smoothing_prob) + 1 / \ num_classes * label_smoothing_prob # TODO: fix bugs # if y.volatile: # y_onehot.volatile = True return y_onehot
def learning_based_method(self, net, word, z, top_n): ''' Parameters: net (chainer.Chain) ... model word (str) ... input word for the model z (int) ... input attribute ID for the model top_n (int) ... Number of top-N similar words to return nearest_words ''' try: x = self.word2vec[word].astype(np.float32) except KeyError: # the word not in vocabulary if word[0] == '(' and word[-1] == ')': # (oov_word) -> oov_word word = word[1:-1] return [('({})'.format(word), 'n/a') for _ in range(top_n)] # To variable x = Variable(x.reshape((1, len(x)))) z = Variable(np.array([[z]]).astype(np.int32)) #z = Variable(z.reshape(1,len(z))) # Transform a word attribute z from x into y with reflection y = net.test(x, z) # Show top five similar words to y y = y.array[0] nearest_words = self.word2vec.similar_by_vector( y, top_n) #self.word2vec.most_similar([y], [], 5) nearest_words = [(word[0], round(word[1], 4)) for word in nearest_words] return nearest_words
def predict(self, state, action): state_action = np.concatenate((state, action), axis=0).astype(np.float32) state_action = Variable( state_action.reshape((1, state_action.shape[0]))) next_state = self.model(state_action) return next_state
def fill_replay_buf(num_frames): average_reward = 0 global frame # import pdb; pdb.set_trace() episode_num = 0 action = randomize_action(left, 1) old_health = 100 old_ammo = 26 for sframe in range(frame, frame + num_frames): reward = doom_game.make_action(action) terminal = doom_game.is_episode_finished() if terminal: doom_game.new_episode() terminal_pool[index - 1] = 1 old_health = 100 old_ammo = 26 action = randomize_action(left, 1) state = doom_game.get_state() game_vars = state.game_variables new_health = game_vars[1] delta_health = new_health - old_health old_health = new_health new_ammo = game_vars[0] delta_ammo = new_ammo - old_ammo old_ammo = new_ammo reward += 0.05 * delta_health reward += 0.02 * delta_ammo train_image = cuda.to_gpu( (state.screen_buffer.astype(np.float32).transpose((2, 0, 1))), device=args.gpu) import pdb pdb.set_trace() #reward, terminal = game.process(screen) train_image = Variable( train_image.reshape((1, ) + train_image.shape) / 127.5 - 1, volatile=True) score = action_q(train_image, train=False) best_idx = int(F.argmax(score).data) # action = game.randomize_action(best, random_probability) action = randomize_action(actions[best_idx], random_probability) index = sframe % POOL_SIZE state_pool[index] = cuda.to_cpu(train_image.data) action_pool[index] = actions.index(action) reward_pool[index - 1] = reward average_reward = average_reward * 0.9 + reward * 0.1 #if sframe % 100 == 0: # print(average_reward) terminal_pool[index - 1] = 0 frame += num_frames
def get_xy_mirror_distance(net, word2vec, word, z_id): # To variable x = word2vec[word].astype(numpy.float32) x = Variable(x.reshape((1, len(x)))) #z = Variable(z.reshape(1,len(z))) z = Variable(numpy.array([[z_id]]).astype(numpy.int32)) # Transform a word attribute z from x into y with reflection y = net.test(x, z).array[0] x = x.array[0] a = net.embed_a(net.z1(z), x).array[0] c = net.embed_c(net.z1(z), x).array[0] # Calulate the distance between x/y and a mirror xd = get_mirror_distance(x, a, c) yd = get_mirror_distance(y, a, c) return xd, yd
def mnist_train_0_batch(data, test, batch_size=64, nb_epochs=10): for epoch in range(nb_epochs): print("Current epoch: %d" % (epoch + 1)) ## shuffle the dataset nb_data = len(data) - (len(data) % batch_size) shuffler = np.random.permutation(nb_data) for i in range(0, nb_data, batch_size): # clear or zero-out gradients model_0.cleargrads() # import subset of the data into numpy array with proper types x = np.array(data[shuffler[i:i + batch_size]][0]).astype( np.float32) y = np.array(data[shuffler[i:i + batch_size]][1]).astype(np.int32) # reshape for channel depth dimension and cast to chainer variable x = x.reshape(batch_size, 1, 28, 28) x = Variable(x) # evaluate data on model and backpropagate loss = foward_0(x, y) loss.backward() # update model parameters optimizer.update() ### evaluate on entire testing set ### print("Validation Set") # import data into numpy array with proper types x = np.array(test[:, ][0]).astype(np.float32) y = np.array(test[:, ][1]).astype(np.int32) # reshape and cast to chainer variable x = Variable(x.reshape(len(test), 1, 28, 28)) pred = foward_0(x, None, predict=True) acc = (pred == y).mean() print("Accuracy : {} \nError Rate: {}".format(acc * 100, (1 - acc) * 100))
def _get_nearest_words(net, word2vec, word, z_id, n_top, show=True): # To variable x = word2vec[word].astype(numpy.float32) x = Variable(x.reshape((1, len(x)))) #z = Variable(z.reshape(1,len(z))) z = Variable(numpy.array([[z_id]]).astype(numpy.int32)) # Transform a word attribute z from x into y with reflection y = net.test(x, z) # Show top five similar words to y y = y.array[0] n_nearest = max(n_top) nearest_words = word2vec.similar_by_vector( y, topn=n_nearest) #word2vec.most_similar([y], [], n_nearest) if show: print(word, nearest_words) return nearest_words
def play_using_saved_q(q_filepath, num_episodes=1, save_replay=False, replay_filepath=None, device=0): #d = int(device) #cuda.get_device(d).use() if save_replay and not replay_filepath: print("Error: please provide a filepath for replays") #saved_q = Q(width=640, height=480, latent_size=256, action_size=3) #saved_q = ControlYOLO(**{'pgrid_dims': [10, 8], 'bb_num': 1, 'num_classes': 10, 'drop_prob': 0.5}) saved_q = YOLO(**{ 'pgrid_dims': [10, 8], 'bb_num': 3, 'num_classes': 3, 'drop_prob': 0.5 }) #saved_q.to_gpu(device=d) #import pdb; pdb.set_trace() serializers.load_hdf5(q_filepath, saved_q) doom_game = gd.setup_game(show_window=False) for i in range(int(num_episodes)): doom_game.new_episode(replay_filepath + str(i) + "_rec.lmp") total_reward = 0 ct = 0 while not doom_game.is_episode_finished(): ct += 1 if ct % 10 == 0: print ct state = doom_game.get_state() #screen_buf = cuda.to_gpu((state.screen_buffer.astype(np.float32).transpose((2, 0, 1))), device=d) screen_buf = state.screen_buffer.astype(np.float32).transpose( (2, 0, 1)) screen_buf = Variable( screen_buf.reshape((1, ) + screen_buf.shape) / 127.5 - 1, volatile=True) scores = saved_q(screen_buf, train=False) best_idx = int(F.argmax(scores).data) total_reward += doom_game.make_action(actions[best_idx]) print("Total reward:", total_reward) doom_game.close()
def mnist_train_0(data, test, nb_epochs=10): for epoch in range(nb_epochs): print("Current epoch: %d" % (epoch + 1)) # clear gradient array model_0.cleargrads() # import subset of the data into numpy array with proper types subset = [i for i in range(500)] x = np.array(data[subset][0]).astype(np.float32) y = np.array(data[subset][1]).astype(np.int32) # reshape it for chainer and cast to chainer variable x = x.reshape(len(subset), 1, 28, 28) x = Variable(x) # evaluate data on model and backpropagate loss = foward_0(x, y) loss.backward() # update model parameters optimizer.update() ### evaluate on testing set ### # import data into numpy array with proper types subset = [i for i in range(100)] x = np.array(test[subset][0]).astype(np.float32) y = np.array(test[subset][1]).astype(np.int32) # reshape and cast to chainer variable x = Variable(x.reshape(len(subset), 1, 28, 28)) # evaluate test data using the current network parameters pred = foward_0(x, None, predict=True) # calculate accuracy acc = (pred == y).mean() print("Accuracy : {} \nError Rate: {}".format(acc * 100, (1 - acc) * 100))
action = None action_q = q.copy() action_q.reset_state() while True: if action is not None: game.play(action) pixmap = QPixmap.grabWindow(window_id, left, top, w, h) image = pixmap.toImage() bits = image.bits() bits.setsize(image.byteCount()) screen = Image.fromarray(np.array(bits).reshape((h, w, 4))[:,:,2::-1]) reward, terminal = game.process(screen) if reward is not None: train_image = xp.asarray(screen.resize((train_width, train_height))).astype(np.float32).transpose((2, 0, 1)) train_image = Variable(train_image.reshape((1,) + train_image.shape) / 127.5 - 1, volatile=True) score = action_q(train_image, train=False) best = int(np.argmax(score.data)) action = game.randomize_action(best, random_probability) print action, float(score.data[0][action]), best, float(score.data[0][best]), reward index = frame % POOL_SIZE state_pool[index] = cuda.to_cpu(train_image.data) action_pool[index] = action reward_pool[index - 1] = reward average_reward = average_reward * 0.9999 + reward * 0.0001 print "average reward: ", average_reward if terminal: terminal_pool[index - 1] = 1 if only_result: i = index - 2
num = len(x) x = Variable(x) t = Variable(t) model = Model() optimizer = optimizers.Adam() optimizer.setup(model) #while(1): for i in range(2000): #for j in range(num): model.cleargrads() y = model(x) #print(y.data) loss = F.mean_squared_error(y, t.reshape(num, 1)) loss.backward() optimizer.update() print("loss:", loss.data) test_path = "test.csv" csv_file = open(test_path, "r", encoding="utf_8", errors="", newline="\n") test_f = csv.reader(csv_file, delimiter=",", doublequote=True, lineterminator="\r\n", quotechar='"', skipinitialspace=True) test_x = [] #テストデータ変換
class LSM(): """ chainerのモデル風に使えるモデル. """ def __init__(self, *, dimension=2, learning_rate=0.1, define_by_run=False): self.dimension = dimension self.learning_rate = learning_rate self.define_by_run = define_by_run if self.define_by_run: self.w = numpy.random.randn(self.dimension + 1) self.w = self.w.astype(numpy.float32) self.w = Variable(self.w.reshape(self.dimension + 1)) self.w.cleargrad() if self.w.grad is None: self.grads = numpy.zeros([self.dimension + 1]) else: self.grads = self.w.grad.reshape(self.dimension + 1) else: self.w = numpy.random.randn(self.dimension + 1) self.grads = numpy.zeros([self.dimension + 1]) def __call__(self, *args): # パラメータが多すぎたらエラー if (len(args) > 2): print("Please check parameter.") elif (len(args) > 0): # ただのスコア計算なら self.x = numpy.array(args[0]) self.x = self.x.astype(numpy.float32) self.data = self.__score__() if self.define_by_run: pred_y = self.data self.data = self.data.data.reshape(self.data.data.shape[0]) # 学習するなら if (len(args) > 1): self.y = numpy.array(args[1]) self.y = self.y.astype(numpy.float32) if self.define_by_run: self.J = func_J(Variable(self.y), pred_y) else: self.error = (self.y - self.data) return self def __score__(self): """ データ点を入れたときのyの推定値. """ if self.define_by_run: scores = func_y(self.w, Variable(self.x), self.dimension) else: self.X = numpy.array([(x**numpy.ones([self.dimension + 1])) for x in self.x]) self.X = self.X**numpy.arange(self.dimension + 1) scores = numpy.dot(self.X, self.w) return scores def zerograds(self): attr_self = [i for i in dir(self) if "__" not in i] if "x" in attr_self: del self.x if "y" in attr_self: del self.y if "X" in attr_self: del self.X if "data" in attr_self: del self.data if "error" in attr_self: del self.error if self.define_by_run: self.w.cleargrad() if self.w.grad is None: self.grads = numpy.zeros([self.dimension + 1]) else: self.grads = self.w.grad.reshape(self.dimension + 1) else: self.grads = numpy.zeros([self.dimension + 1]) def backward(self): if self.define_by_run: self.J.backward(retain_grad=True) self.grads = -self.w.grad.reshape(self.dimension + 1) else: self.grads = numpy.dot(self.error, self.X)
def play_draw_and_record_yolo(yolo_filepath, replay_filepath, num_episodes=1, device=0): d = int(device) cuda.get_device(d).use() yolo = YOLO(**{ 'pgrid_dims': [10, 8], 'bb_num': 3, 'num_classes': 3, 'drop_prob': 0.5 }) yolo.to_gpu() serializers.load_hdf5(yolo_filepath, yolo) doom_game = gd.setup_game(show_window=False) for i in range(int(num_episodes)): doom_game.new_episode(replay_filepath + str(i) + "_rec.lmp") total_reward = 0 while not doom_game.is_episode_finished(): state = doom_game.get_state() screen_buf = cuda.to_gpu( (state.screen_buffer.astype(np.float32).transpose((2, 0, 1))), device=d) screen_buf = Variable( screen_buf.reshape((1, ) + screen_buf.shape) / 127.5 - 1, volatile=True) grid_var, scores = yolo.proposals_and_q(screen_buf, train=False) best_idx = int(F.argmax(scores).data) total_reward += doom_game.make_action(actions[best_idx]) grid = cuda.to_cpu(grid_var.data[0]) boxes = [] base_img = doom_game.get_state().screen_buffer """ for x, y in np.ndindex((10,8)): proposals = grid[x, y] class_probs = proposals[20:] best_class = class_probs.index(max(class_probs)) for c in range(3): conf_idx = c * 7 if proposals[conf_idx]: # >= 0.6: scaled = yolo.scale_coords(proposals[c+1:c+6]) box = (scaled, x, y, best_class) boxes.append(box) """ scaled = yolo.scale_coords( np.array( [0.1921875, 0.03125, 0.18540496, 0.3354102, 0.66666667])) boxes.append((scaled, 4, 4, 1)) import pdb pdb.set_trace() # sort boxes by confidence for box in boxes: w = box[0][2] h = box[0][3] xcenter = box[0][0] - 320 + box[1] * 64 xmin = int(round(xcenter - w / 2)) xmax = int(round(xcenter + w / 2)) ycenter = box[0][1] - 240 + box[2] * 60 ymin = int(round(ycenter - h / 2)) ymax = int(round(ycenter + h / 2)) z_ = round(box[0][4]) best_class = box[3] #import pdb; pdb.set_trace() # draw the bounding boxes for x_ in range(xmin, xmax + 1): base_img[ymin, x_, best_class] = 255 base_img[ymax, x_, best_class] = 255 for y_ in range(ymin, ymax + 1): base_img[y_, xmin, best_class] = 255 base_img[y_, xmax, best_class] = 255 import pdb pdb.set_trace() print("Total reward:", total_reward) doom_game.close()
import numpy as np import chainer import chainer.links as L from chainer import Variable from chainer import serializers from mnist import MnistModel train, test = chainer.datasets.get_mnist() # 訓練済みのデータを使ってモデル初期化 model = L.Classifier(MnistModel()) serializers.load_npz('./output/model_final', model) x, t = test[1] x = Variable(x.reshape(1, 784), volatile='on') y = model.predictor(x) pred = np.argmax(y.data, axis=1) print(y.data.flatten().tolist()) print("Acc: {}, Pred: {}".format(t, pred))
def target(agent): print "started target thread." global frame, random_probability, average_reward try: thread.start_new_thread(train, ()) next_clock = time.clock() + interval save_iter = 1000 save_count = 0 action = None action_q = q.copy() action_q.reset_state() while True: if action is not None: agent.send_action(action) screen = agent.receive_image() reward, terminal = agent.process(screen) if reward is not None: train_image = xp.asarray( screen.resize( (train_width, train_height))).astype(np.float32).transpose( (2, 0, 1)) train_image = Variable( train_image.reshape((1, ) + train_image.shape) / 127.5 - 1, volatile=True) score = action_q(train_image, train=False) best = int(np.argmax(score.data)) action = agent.randomize_action(best, random_probability) print action, float(score.data[0][action]), best, float( score.data[0][best]), reward index = frame % POOL_SIZE state_pool[index] = cuda.to_cpu(train_image.data) action_pool[index] = action reward_pool[index - 1] = reward average_reward = average_reward * 0.9999 + reward * 0.0001 print "average reward: ", average_reward if terminal: terminal_pool[index - 1] = 1 action_q = q.copy() action_q.reset_state() else: terminal_pool[index - 1] = 0 frame += 1 save_iter -= 1 random_probability *= random_reduction_rate if random_probability < min_random_probability: random_probability = min_random_probability else: action = None if save_iter <= 0: print 'save: ', save_count serializers.save_hdf5( '{0}_{1:03d}.model'.format(args_output, save_count), q) serializers.save_hdf5( '{0}_{1:03d}.state'.format(args_output, save_count), optimizer) save_iter = 10000 save_count += 1 current_clock = time.clock() wait = next_clock - current_clock print 'wait: ', wait if wait > 0: next_clock += interval time.sleep(wait) elif wait > -interval / 2: next_clock += interval else: next_clock = current_clock + interval except KeyboardInterrupt: pass
def update_core(self): # TIPS: in case of experiments, set n_critic as 5 is best result. gen_optimizer = self.get_optimizer('gen') critic_optimizer = self.get_optimizer('critic') xp = self.generator.xp for i in range(self.n_critic): # grab data batch = self.get_iterator('main').next() batchsize = len(batch) batch = self.converter(batch, self.device) real_data, real_label = batch real_label = Variable(self.onehot(batchsize, real_label)) real_data = Variable(real_data) / 255. gen_label = self.onehot(batchsize, self.generator.random_label(batchsize)) z = self.generator.make_input_z_with_given_label( batchsize, gen_label) # Generator gen_data = self.generator(z) # -1 gen_data = gen_data.reshape(batchsize, -1) real_data = real_data.reshape(batchsize, -1) real_label = real_label.reshape(batchsize, -1) gen_label = gen_label.reshape(batchsize, -1) # Critic(Discrimintor) critic_real = self.critic(F.concat((real_label, real_data), axis=1)) critic_fake = self.critic(F.concat((gen_label, gen_data), axis=1)) # Loss loss_gan = F.average(critic_fake - critic_real) std_x_real = xp.std(real_data.data, axis=0, keepdims=True) epsilon = xp.random.uniform(0., 1., real_data.data.shape).astype( np.float32) x_perturb = real_data + 0.5 * epsilon * std_x_real x_perturb = F.concat((gen_label, x_perturb), axis=1) grad, = chainer.grad([self.critic(x_perturb)], [x_perturb], enable_double_backprop=True) grad = F.sqrt(F.batch_l2_norm_squared(grad)) loss_grad = self.l * F.mean_absolute_error(grad, xp.ones_like(grad.data)) critic_loss = loss_gan + loss_grad self.critic.cleargrads() critic_loss.backward() critic_optimizer.update() chainer.report({'critic_loss': critic_loss}) chainer.report({'loss_grad': loss_grad}) chainer.report({'loss_gan': loss_gan}) if i == 0: gen_loss = F.average(-critic_fake) self.generator.cleargrads() gen_loss.backward() gen_optimizer.update() chainer.report({'gen_loss': gen_loss})
def evaluate(self): domain = ['in', 'truth', 'out'] if self.eval_hook: self.eval_hook(self) for k, dataset in enumerate(['test', 'train']): batch = self._iterators[dataset].next() x_in, t_out = chainer.dataset.concat_examples(batch, self.device) x_in = Variable(x_in) # original image t_out = Variable( t_out) # corresponding translated image (ground truth) with chainer.using_config( 'train', False), chainer.function.no_backprop_mode(): x_out = self._targets['dec_y']( self._targets['enc_x'](x_in)) # translated image by NN ## unfold stack and apply softmax if self.args.class_num > 0 and self.args.stack > 0: x_in = x_in.reshape(x_in.shape[0] * self.args.stack, x_in.shape[1] // self.args.stack, x_in.shape[2], x_in.shape[3]) x_out = F.softmax( x_out.reshape(x_out.shape[0] * self.args.stack, x_out.shape[1] // self.args.stack, x_out.shape[2], x_out.shape[3])) t_out = t_out.reshape(t_out.shape[0] * self.args.stack, t_out.shape[1] // self.args.stack, t_out.shape[2], t_out.shape[3]) #print(x_out.shape, t_out.shape) # select middle slices x_in = x_in[(self.args.stack // 2)::self.args.stack] x_out = x_out[(self.args.stack // 2)::self.args.stack] t_out = t_out[(self.args.stack // 2)::self.args.stack] if dataset == 'test': # for test dataset, compute some statistics fig = plt.figure(figsize=(12, 6 * len(x_out))) gs = gridspec.GridSpec(2 * len(x_out), 4, wspace=0.1, hspace=0.1) loss_rec_L1 = F.mean_absolute_error(x_out, t_out) loss_rec_L2 = F.mean_squared_error(x_out, t_out) loss_rec_CE = softmax_focalloss(x_out, t_out, gamma=self.args.focal_gamma, class_weight=self.class_weight) result = { "myval/loss_L1": loss_rec_L1, "myval/loss_L2": loss_rec_L2, "myval/loss_CE": loss_rec_CE } ## iterate over batch for i, var in enumerate([x_in, t_out, x_out]): if i % 3 != 0 and self.args.class_num > 0: # t_out, x_out imgs = var2unit_img(var, 0, 1) # softmax #imgs[:,:,:,0] = 0 # class 0 => black ###### #imgs = np.roll(imgs,1,axis=3)[:,:,:,:3] ## R0B, show only 3 classes (-1,0,1) else: imgs = var2unit_img(var) # tanh # print(imgs.shape,np.min(imgs),np.max(imgs)) for j in range(len(imgs)): ax = fig.add_subplot(gs[j + k * len(x_out), i]) ax.set_title(dataset + "_" + domain[i], fontsize=8) if (imgs[j].shape[2] == 3): ## RGB ax.imshow(imgs[j], interpolation='none', vmin=0, vmax=1) elif (imgs[j].shape[2] >= 4): ## categorical cols = ['k', 'b', 'c', 'g', 'y', 'r', 'm', 'w'] * 5 cmap = colors.ListedColormap(cols) im = np.argmax(imgs[j], axis=2) norm = colors.BoundaryNorm(list(range(len(cols) + 1)), cmap.N) ax.imshow(im, interpolation='none', cmap=cmap, norm=norm) else: ax.imshow(imgs[j][:, :, -1], interpolation='none', cmap='gray', vmin=0, vmax=1) ax.set_xticks([]) ax.set_yticks([]) ## difference image if (x_out.shape[1] >= 4): ## categorical eps = 1e-7 p = F.clip( x_out, x_min=eps, x_max=1 - eps) ## we assume the input is already applied softmax q = -F.clip(t_out, x_min=eps, x_max=1 - eps) * F.log(p) diff = F.sum(q * ((1 - p)**2), axis=1, keepdims=True) vmin = -1 vmax = 1 else: diff = (x_out - t_out) vmin = -0.1 vmax = 0.1 diff = diff.data.get().transpose(0, 2, 3, 1) for j in range(len(diff)): ax = fig.add_subplot(gs[j + k * len(x_out), 3]) ax.imshow(diff[j][:, :, 0], interpolation='none', cmap='coolwarm', vmin=vmin, vmax=vmax) ax.set_xticks([]) ax.set_yticks([]) gs.tight_layout(fig) plt.savefig(os.path.join(self.vis_out, 'count{:0>4}.jpg'.format(self.count)), dpi=200) self.count += 1 plt.close() return result
text = index_test[i + n - len(index_test)][0] label = index_test[i + n - len(index_test)][1] feature = index_test[i + n - len(index_test)][2] else: text = index_test[i + n][0] label = index_test[i + n][1] feature = index_test[i + n][2] Text.append(text) Label.append(label) Feature.append(feature) Text = np.array(Text, dtype="int32") Label = np.array(Label, dtype="int32") Feature = np.array(Feature, dtype="float32") Feature = np.mat(Feature) Feature = Feature.reshape(-1, 1) Feature = np.array(Feature) # print("feature vector = ", Feature) Feature = Variable(Feature) model.cleargrads() loss = model(Text, Label, Feature) #loss.backward() #optimizer.update() losses_test.append(loss.data) print(losses_test) ''' # Testing for i in range(0, len(index_test), BATCH_SIZE): Text = []
sum_gen_loss += loss_gen.data.get() if epoch % interval == 0 and batch == 0: serializers.save_npz('xy.model', gen_g_model) serializers.save_npz('yx.model', gen_f_model) for i in range(Ntest): black = (x_test[i] * 127.5 + 127.5).transpose(1, 2, 0).astype( np.uint8) pylab.subplot(2, Ntest, 2 * i + 1) pylab.imshow(black) pylab.axis('off') pylab.savefig(image_xy + '/output_xy_%d.png' % epoch) x = Variable(cuda.to_gpu(x_test[i])) x = x.reshape(1, channels, width, height) with chainer.using_config('train', False): x_y = gen_g_model(x) x_y = x_y.data.get() tmp = (np.clip(x_y[0, :, :, :] * 127.5 + 127.5, 0, 255)).transpose(1, 2, 0).astype(np.uint8) pylab.subplot(2, Ntest, 2 * i + 2) pylab.imshow(tmp) pylab.axis('off') pylab.savefig(image_yx + '/output_xy_%d.png' % epoch) pylab.close() for i in range(Ntest): white = (y_test[i] * 127.5 + 127.5).transpose(1, 2, 0).astype( np.uint8)
def update_core(self): opt_enc_x = self.get_optimizer('enc_x') opt_dec_y = self.get_optimizer('dec_y') opt_dis = self.get_optimizer('dis') ## image conversion batch = self.get_iterator('main').next() x_in, t_out = self.converter(batch, self.device) x_in = Variable(x_in) x_z = self.enc_x(add_noise(x_in, sigma=self.args.noise)) x_out = self.dec_y(x_z) ## unfold stack and apply softmax if self.args.class_num>0 and self.args.stack>0: #x_out = F.concat([F.softmax(x_out[:,(st*self.args.class_num):((st+1)*self.args.class_num)]) for st in range(self.args.stack)]) x_in = x_in.reshape(x_in.shape[0]*self.args.stack,x_in.shape[1]//self.args.stack,x_in.shape[2],x_in.shape[3]) x_out = F.softmax(x_out.reshape(x_out.shape[0]*self.args.stack,x_out.shape[1]//self.args.stack,x_out.shape[2],x_out.shape[3])) t_out = t_out.reshape(t_out.shape[0]*self.args.stack,t_out.shape[1]//self.args.stack,t_out.shape[2],t_out.shape[3]) # print(x_in.shape,x_out.shape, t_out.shape) loss_gen=0 ## regularisation on the latent space if self.args.lambda_reg>0: loss_reg_enc_x = losses.loss_func_reg(x_z[-1],'l2') loss_gen = loss_gen + self.args.lambda_reg * loss_reg_enc_x chainer.report({'loss_reg': loss_reg_enc_x}, self.enc_x) if self.args.lambda_dice>0: loss_dice = dice(x_out, t_out, class_weight=self.class_weight) loss_gen = loss_gen + self.args.lambda_dice * loss_dice chainer.report({'loss_dice': loss_dice}, self.dec_y) if self.args.lambda_rec_ce>0: loss_rec_ce = softmax_focalloss(x_out, t_out, gamma=self.args.focal_gamma, class_weight=self.class_weight) # for st in range(self.args.stack): # loss_rec_ce += softmax_focalloss(x_out[:,(st*self.args.stack):((st+1)*self.args.stack)], t_out[:,(st*self.args.stack):((st+1)*self.args.stack)]) loss_gen = loss_gen + self.args.lambda_rec_ce*loss_rec_ce chainer.report({'loss_CE': loss_rec_ce}, self.dec_y) # reconstruction error if self.args.lambda_rec_l1>0: loss_rec_l1 = weighted_error(x_out, t_out,exponent=1,class_weight=self.class_weight) #loss_rec_l1 = F.mean_absolute_error(x_out, t_out) loss_gen = loss_gen + self.args.lambda_rec_l1*loss_rec_l1 chainer.report({'loss_L1': loss_rec_l1}, self.dec_y) if self.args.lambda_rec_l2>0: loss_rec_l2 = weighted_error(x_out, t_out,exponent=2,class_weight=self.class_weight) #loss_rec_l2 = F.mean_squared_error(x_out, t_out) loss_gen = loss_gen + self.args.lambda_rec_l2*loss_rec_l2 chainer.report({'loss_L2': loss_rec_l2}, self.dec_y) # total variation if self.args.lambda_tv > 0: loss_tv = total_variation2(x_out, self.args.tv_tau) loss_gen = loss_gen + self.args.lambda_tv * loss_tv chainer.report({'loss_tv': loss_tv}, self.dec_y) # Adversarial loss if self.args.lambda_dis>0 and self.iteration >= self.args.dis_warmup: # stack again if self.args.class_num>0 and self.args.stack>0: #x_out = F.concat([F.softmax(x_out[:,(st*self.args.class_num):((st+1)*self.args.class_num)]) for st in range(self.args.stack)]) x_in = x_in.reshape(x_in.shape[0]//self.args.stack,x_in.shape[1]*self.args.stack,x_in.shape[2],x_in.shape[3]) x_out = x_out.reshape(x_out.shape[0]//self.args.stack,x_out.shape[1]*self.args.stack,x_out.shape[2],x_out.shape[3]) t_out = t_out.reshape(t_out.shape[0]//self.args.stack,t_out.shape[1]*self.args.stack,t_out.shape[2],t_out.shape[3]) x_in_out = F.concat([x_in,x_out]) y_fake = self.dis(x_in_out) if self.args.dis_wgan: loss_adv = -F.average(y_fake) else: #batchsize,_,w,h = y_fake.data.shape #loss_dis = F.sum(F.softplus(-y_fake)) / batchsize / w / h loss_adv = self.loss_func_comp(y_fake,1.0,self.args.dis_jitter) chainer.report({'loss_dis': loss_adv}, self.dec_y) loss_gen = loss_gen + self.args.lambda_dis * loss_adv # update generator model self.enc_x.cleargrads() self.dec_y.cleargrads() loss_gen.backward() opt_enc_x.update(loss=loss_gen) opt_dec_y.update(loss=loss_gen) ## discriminator if self.args.lambda_dis>0 and self.iteration >= self.args.dis_warmup: x_in_out_copy = self._buffer.query(x_in_out.array) if self.args.dis_wgan: ## synthesised -, real + eps = self.xp.random.uniform(0, 1, size=len(batch)).astype(self.xp.float32)[:, None, None, None] loss_real = -F.average(self.dis(F.concat([x_in, t_out]))) loss_fake = F.average(self.dis(x_in_out_copy)) y_mid = eps * x_in_out + (1.0 - eps) * x_in_out_copy # gradient penalty gd, = chainer.grad([self.dis(y_mid)], [y_mid], enable_double_backprop=True) gd = F.sqrt(F.batch_l2_norm_squared(gd) + 1e-6) loss_dis_gp = F.mean_squared_error(gd, self.xp.ones_like(gd.data)) chainer.report({'loss_gp': self.args.lambda_wgan_gp * loss_dis_gp}, self.dis) loss_dis = (loss_fake + loss_real) * 0.5 + self.args.lambda_wgan_gp * loss_dis_gp else: loss_real = self.loss_func_comp(self.dis(F.concat([x_in, t_out])),1.0,self.args.dis_jitter) loss_fake = self.loss_func_comp(self.dis(x_in_out_copy),0.0,self.args.dis_jitter) ## mis-matched input-output pair should be discriminated as fake if self._buffer.num_imgs > 40 and self.args.lambda_mispair>0: f_in = self.xp.concatenate(random.sample(self._buffer.images, len(x_in))) f_in = Variable(f_in[:,:x_in.shape[1],:,:]) # extract the first x_in channels of the concatenated [x_in,x_out] loss_mispair = self.loss_func_comp(self.dis(F.concat([f_in,t_out])),0.0,self.args.dis_jitter) chainer.report({'loss_mispair': loss_mispair}, self.dis) else: loss_mispair = 0 loss_dis = 0.5*(loss_fake + loss_real) + self.args.lambda_mispair * loss_mispair # common for discriminator chainer.report({'loss_fake': loss_fake}, self.dis) chainer.report({'loss_real': loss_real}, self.dis) self.dis.cleargrads() loss_dis.backward() opt_dis.update(loss=loss_dis)
pixmap = QPixmap.grabWindow(window_id, left, top, w, h) image = pixmap.toImage() bits = image.bits() bits.setsize(image.byteCount()) screen = Image.fromarray( np.array(bits).reshape((h, w, 4))[:, :, 2::-1]) reward, terminal = game.process(screen) logging.debug("reward={}, terminal={}".format(reward, terminal)) if reward is not None: train_image = xp.asarray( screen.resize( (train_width, train_height))).astype(np.float32).transpose( (2, 0, 1)) train_image = Variable( train_image.reshape((1, ) + train_image.shape) / 127.5 - 1, volatile=True) score = action_q(train_image, train=False) best = int(np.argmax(score.data)) action = game.randomize_action(best, random_probability) #print action, float(score.data[0][action]), best, float(score.data[0][best]), reward index = frame % POOL_SIZE state_pool[index] = cuda.to_cpu(train_image.data) action_pool[index] = action reward_pool[index - 1] = reward average_reward = average_reward * 0.9999 + reward * 0.0001 logging.debug("average reward: ", average_reward) if terminal: terminal_pool[index - 1] = 1 if only_result:
def train(self, lossfun, n_epochs=100): print('Start training CycleGLO') losses = [] for epoch in range(n_epochs): print(epoch) self.opt_g.new_epoch() self.opt_f.new_epoch() self.opt_zx.new_epoch() self.opt_zy.new_epoch() for i in range(len(self.dataset)): x = self.dataset[i][0] y = self.dataset[i][1] #print(x, y) x, y = Variable(x), Variable(y) #print(x.shape, y.shape) self.g.cleargrads() self.f.cleargrads() self.g.z.cleargrads() self.f.z.cleargrads() xy = self.g(self.zx[i]) yx = self.f(self.zy[i]) #print(xy, yx) yxy = self.g(yx.data) xyx = self.f(xy.data) #print(yxy, xyx) xy_copy = Variable(self.getAndUpdateBuffer( 'x', xy.data, epoch)) yx_copy = Variable(self.getAndUpdateBuffer( 'y', yx.data, epoch)) #print(yx_copy.shape) x_loss = lossfun(yx_copy, x.reshape((1, self.n_pixels))) y_loss = lossfun(xy_copy, y.reshape((1, self.n_pixels))) g_loss = lossfun(xy, y.reshape((1, self.n_pixels))) f_loss = lossfun(yx, x.reshape((1, self.n_pixels))) cycle_x_loss = lossfun(xyx, x.reshape((1, self.n_pixels))) cycle_y_loss = lossfun(yxy, y.reshape((1, self.n_pixels))) gen_loss = self.lambda2 * g_loss + self.lambda2 * f_loss + cycle_x_loss + cycle_y_loss if self.learning_rate_decay > 0 and epoch % self.learning_rate_interval == 0: if self.opt_g.alpha > self.learning_rate_decay: self.opt_g.alpha -= self.learning_rate_decay if self.opt_f.alpha > self.learning_rate_decay: self.opt_f.alpha -= self.learning_rate_decay x_loss.backward() y_loss.backward() self.opt_zx.update() self.opt_zy.update() #### Update gen_loss.backward() self.opt_g.update() self.opt_f.update() self.zx[i] = project_z_to_ball(self.zx[i] - 0.1 * self.g.z.z.grad) self.zy[i] = project_z_to_ball(self.zy[i] - 0.1 * self.f.z.z.grad) losses += [(x_loss, y_loss, g_loss, f_loss, cycle_x_loss, cycle_y_loss, gen_loss)] print('done!') self.xrspace_mean = np.mean(self.zx, axis=0) self.xrspace_std = np.std(self.zx, axis=0) self.yrspace_mean = np.mean(self.zy, axis=0) self.yrspace_std = np.std(self.zy, axis=0) print(self.xrspace_mean, self.xrspace_std, self.yrspace_mean, self.yrspace_std) to_plot = [l[-1].data for l in losses] x_axis = list(range(n_epochs)) plt.plot(to_plot) plt.title('Loss per epoch of CycleGAN') plt.ylabel('loss') plt.xlabel('epoch') print('last loss:', to_plot[-1]) plt.show()