def transform(self, data, test=False): #make sure that data has the right shape. if not type(data) == Variable: if len(data.shape) < 4: data = data[np.newaxis] if len(data.shape) != 4: raise TypeError("Invalid dimensions for image data. Dim = %s. Must be 4d array." % str(data.shape)) if data.shape[1] != self.color_channels: if data.shape[-1] == self.color_channels: data = data.transpose(0, 3, 1, 2) else: raise TypeError("Invalid dimensions for image data. Dim = %s" % str(data.shape)) data = Variable(data) else: if len(data.data.shape) < 4: data.data = data.data[np.newaxis] if len(data.data.shape) != 4: raise TypeError("Invalid dimensions for image data. Dim = %s. Must be 4d array." % str(data.data.shape)) if data.data.shape[1] != self.color_channels: if data.data.shape[-1] == self.color_channels: data.data = data.data.transpose(0, 3, 1, 2) else: raise TypeError("Invalid dimensions for image data. Dim = %s" % str(data.shape)) # Actual transformation. if self.flag_gpu: data.to_gpu() z = self._encode(data, test=test)[0] z.to_cpu() return z.data
def predict_file(voxel_path, out_path, model, channel, box_width, label_name, device): data = sparse.load_npz(voxel_path) data = np.reshape(data.toarray(), [data.shape[0], 14, 30, 30, 30])[:, :channel].astype( np.float32) data_width = data.shape[2] b, e = (data_width - box_width) // 2, (data_width + box_width) // 2 data = data[:, :, b:e, b:e, b:e] batch_size = 16 i = 0 out_data = {} out_pred_score = np.array([]).reshape(0, len(label_name)) while i * batch_size < data.shape[0]: voxel = data[i * batch_size:(i + 1) * batch_size] voxel = Variable(voxel) if device >= 0: voxel.to_gpu() with function.no_backprop_mode(), chainer.using_config('train', False): pred_score = F.sigmoid(model(voxel)) pred_score = chainer.cuda.to_cpu(pred_score.data) out_pred_score = np.vstack([out_pred_score, pred_score]) i += 1 for index, i in enumerate(label_name): out_data.update({i: out_pred_score[:, index]}) np.savez(out_path, **out_data)
def test_network(test, model, output_path='', batch_size=100, device_id=-1): ''' Test. Args: dataset (chainer.dataset): Dataset model (chainer.links): Model output_path (str): Output path batch_size (int): Batch size device_id (int): Device id ''' logging.info('Test network... Start.') prediction = [] # set model model.train = False if device_id > -1: model.to_gpu() # set dataset for i in range(0, len(test), batch_size): data = test[i:i + batch_size] x = Variable(np.array(data)) if device_id > -1: x.to_gpu() prediction.extend(np.argmax(model.predictor(x).data, axis=1)) if output_path != '': pd.DataFrame(prediction).to_csv(os.path.join(output_path, 'prediction.csv'), header=None) logging.info('Test network... Done.')
def forward_one_step(self, state, action, reward, next_state, test=False): xp = cuda.cupy if config.use_gpu else np n_batch = state.shape[0] state = Variable(state) next_state = Variable(next_state) if config.use_gpu: state.to_gpu() next_state.to_gpu() q = self.compute_q_variable(state, test=test) max_target_q = self.compute_target_q_variable(next_state, test=test) max_target_q = xp.amax(max_target_q.data, axis=1) target = q.data.copy() for i in xrange(n_batch): if episode_ends[i] is True: target_value = np.sign(reward[i]) else: target_value = np.sign(reward[i]) + config.rl_discount_factor * max_target_q[i] action_index = self.get_index_with_action(action[i]) old_value = target[i, action_index] diff = target_value - old_value if diff > 1.0: target_value = 1.0 + old_value elif diff < -1.0: target_value = -1.0 + old_value target[i, action_index] = target_value target = Variable(target) loss = F.mean_squared_error(target, q) return loss, q
def forward_one_step(self, state, action, reward, next_state, test=False): xp = cuda.cupy if config.use_gpu else np n_batch = state.shape[0] state = Variable(state.reshape((n_batch, config.rl_history_length * 34))) next_state = Variable(next_state.reshape((n_batch, config.rl_history_length * 34))) if config.use_gpu: state.to_gpu() next_state.to_gpu() q = self.compute_q_variable(state, test=test) q_ = self.compute_q_variable(next_state, test=test) max_action_indices = xp.argmax(q_.data, axis=1) if config.use_gpu: max_action_indices = cuda.to_cpu(max_action_indices) target_q = self.compute_target_q_variable(next_state, test=test) target = q.data.copy() for i in xrange(n_batch): max_action_index = max_action_indices[i] target_value = reward[i] + config.rl_discount_factor * target_q.data[i][max_action_indices[i]] action_index = self.get_index_for_action(action[i]) old_value = target[i, action_index] diff = target_value - old_value if diff > 1.0: target_value = 1.0 + old_value elif diff < -1.0: target_value = -1.0 + old_value target[i, action_index] = target_value target = Variable(target) loss = F.mean_squared_error(target, q) return loss, q
def visualize_walkthrough(): x_batch = sample_x_from_data_distribution(20) z_batch = gen(x_batch, test=True) if use_gpu: z_batch.to_cpu() fig = pylab.gcf() fig.set_size_inches(16.0, 16.0) pylab.clf() if config.img_channel == 1: pylab.gray() z_a = z_batch.data[:10,:] z_b = z_batch.data[10:,:] for col in range(10): _z_batch = z_a * (1 - col / 9.0) + z_b * col / 9.0 _z_batch = Variable(_z_batch) if use_gpu: _z_batch.to_gpu() _x_batch = dec(_z_batch, test=True) if use_gpu: _x_batch.to_cpu() for row in range(10): pylab.subplot(10, 10, row * 10 + col + 1) if config.img_channel == 1: pylab.imshow(np.clip((_x_batch.data[row] + 1.0) / 2.0, 0.0, 1.0).reshape((config.img_width, config.img_width)), interpolation="none") elif config.img_channel == 3: pylab.imshow(np.clip((_x_batch.data[row] + 1.0) / 2.0, 0.0, 1.0).reshape((config.img_channel, config.img_width, config.img_width)), interpolation="none") pylab.axis("off") pylab.savefig("%s/walk_through.png" % args.visualization_dir)
def inverse_transform(self, data, test=False): if not type(data) == Variable: if len(data.shape) < 2: data = data[np.newaxis] if len(data.shape) != 2: raise TypeError("Invalid dimensions for latent data. Dim = %s. Must be a 2d array." % str(data.shape)) data = Variable(data) else: if len(data.data.shape) < 2: data.data = data.data[np.newaxis] if len(data.data.shape) != 2: raise TypeError("Invalid dimensions for latent data. Dim = %s. Must be a 2d array." % str(data.data.shape)) assert data.data.shape[-1] == self.latent_width, "Latent shape %d != %d" % (data.data.shape[-1], self.latent_width) if self.flag_gpu: data.to_gpu() out = self._decode(data, test=test) out.to_cpu() if self.mode == 'linear': final = out.data else: final = out.data.transpose(0, 2, 3, 1) return final
def to_onehot(y, num_classes, label_smoothing_prob=0, use_cuda=False): """Convert indices into one-hot encoding. Args: y (chainer.Variable, int): Indices of labels. A tensor of size `[B, 1]`. num_classes (int): the number of classes label_smoothing_prob (float, optional): use_cuda (bool, optional): if True, use GPUs Returns: y (chainer.Variable, float): A tensor of size `[B, 1, num_classes]` """ batch_size = y.shape[0] y_onehot = np.eye(num_classes, dtype=np.float32)[ y.data.reshape(batch_size).tolist()] y_onehot = Variable(y_onehot) if use_cuda: y_onehot.to_gpu() y_onehot = y_onehot.reshape(batch_size, 1, num_classes) # Label smoothing if label_smoothing_prob > 0: y = y * (1 - label_smoothing_prob) + 1 / \ num_classes * label_smoothing_prob # TODO: fix bugs # if y.volatile: # y_onehot.volatile = True return y_onehot
def supervised_gaussian_mixture(batchsize, n_dim, label_indices, n_labels, gpu_enabled=True): if n_dim % 2 != 0: raise Exception("n_dim must be a multiple of 2.") def sample(x, y, label, n_labels): shift = 1.4 r = 2.0 * np.pi / float(n_labels) * float(label) new_x = x * cos(r) - y * sin(r) new_y = x * sin(r) + y * cos(r) new_x += shift * cos(r) new_y += shift * sin(r) return np.array([new_x, new_y]).reshape((2,)) x_var = 0.5 y_var = 0.05 x = np.random.normal(0, x_var, (batchsize, n_dim / 2)) y = np.random.normal(0, y_var, (batchsize, n_dim / 2)) z = np.empty((batchsize, n_dim), dtype=np.float32) for batch in xrange(batchsize): for zi in xrange(n_dim / 2): z[batch, zi*2:zi*2+2] = sample(x[batch, zi], y[batch, zi], label_indices[batch], n_labels) z = Variable(z) if gpu_enabled: z.to_gpu() return z
def sample_z_from_n_2d_gaussian_mixture(batchsize, z_dim, label_indices, n_labels, gpu=False): if z_dim % 2 != 0: raise Exception("z_dim must be a multiple of 2.") def sample(x, y, label, n_labels): shift = 1.4 r = 2.0 * np.pi / float(n_labels) * float(label) new_x = x * cos(r) - y * sin(r) new_y = x * sin(r) + y * cos(r) new_x += shift * cos(r) new_y += shift * sin(r) return np.array([new_x, new_y]).reshape((2,)) x_var = 0.5 y_var = 0.05 x = np.random.normal(0, x_var, (batchsize, z_dim / 2)) y = np.random.normal(0, y_var, (batchsize, z_dim / 2)) z = np.empty((batchsize, z_dim), dtype=np.float32) for batch in xrange(batchsize): for zi in xrange(z_dim / 2): z[batch, zi*2:zi*2+2] = sample(x[batch, zi], y[batch, zi], label_indices[batch], n_labels) z = Variable(z) if gpu: z.to_gpu() return z
class AdamLearner(Link): def __init__(self, dim): super(AdamLearner, self).__init__( beta1=(dim, ), beta2=(dim, ) ) self.beta1.data.fill(-1e12) self.beta2.data.fill(-1e12) self.m = Variable(np.zeros_like(self.beta1.data)) self.v = Variable(np.zeros_like(self.beta2.data)) def to_gpu(self, device=None): super(AdamLearner, self).to_gpu() self.m.to_gpu(device) self.v.to_gpu(device) def __call__(self, x): f1 = F.sigmoid(self.beta1) f2 = F.sigmoid(self.beta2) #self.m = f1 * self.m + (1 - f1) * x #self.v = f2 * self.v + (1 - f2) * x**2 self.m = self.beta1 * self.m + (1 - self.beta1) * x self.v = self.beta2 * self.v + (1 - self.beta2) * x**2 g = 1e-3 * self.m / F.sqrt(self.v + 1e-8) return g
def onehot_categorical(batchsize, n_labels, gpu_enabled=True): y = np.zeros((batchsize, n_labels), dtype=np.float32) indices = np.random.randint(0, n_labels, batchsize) for b in xrange(batchsize): y[b, indices[b]] = 1 y = Variable(y) if gpu_enabled: y.to_gpu() return y
def sample_z_from_n_2d_gaussian_mixture(batchsize, z_dim, label_indices, n_labels, gpu=False): z = np.zeros((batchsize, z_dim), dtype=np.float32) for i in range(batchsize): z1 = np.random.normal(0.5, 0.2, 1) z2 = np.random.normal(0.5, 0.2, 1) z[i] = np.array([z1, z2]).reshape((2,)) z = Variable(z) if gpu: z.to_gpu() return z
def encode(self, data, test=False): x = self.enc(data, test=test) mean, ln_var = F.split_axis(x, 2, 1) samp = np.random.standard_normal(mean.data.shape).astype('float32') samp = Variable(samp) if self.flag_gpu: samp.to_gpu() z = samp * F.exp(0.5*ln_var) + mean return z, mean, ln_var
def sample_x_from_data_distribution(batchsize): shape = config.img_channel * config.img_width * config.img_width x_batch = np.zeros((batchsize, shape), dtype=np.float32) for j in range(batchsize): data_index = np.random.randint(len(dataset)) img = dataset[data_index] x_batch[j] = img.reshape((shape,)) x_batch = Variable(x_batch) if config.use_gpu: x_batch.to_gpu() return x_batch
def multi_box_intersection(a, b): w = multi_overlap(a.x, a.w, b.x, b.w) h = multi_overlap(a.y, a.h, b.y, b.h) zeros = Variable(np.zeros(w.shape, dtype=w.data.dtype)) zeros.to_gpu() w = F.maximum(w, zeros) h = F.maximum(h, zeros) area = w * h return area
def forward_one_step(self, state, action, reward, next_state, episode_ends, test=False): xp = cuda.cupy if config.use_gpu else np n_batch = state.shape[0] state = Variable(state) next_state = Variable(next_state) if config.use_gpu: state.to_gpu() next_state.to_gpu() q = self.compute_q_variable(state, test=test) q_ = self.compute_q_variable(next_state, test=test) max_action_indices = xp.argmax(q_.data, axis=1) if config.use_gpu: max_action_indices = cuda.to_cpu(max_action_indices) # Generate target target_q = self.compute_target_q_variable(next_state, test=test) # Initialize target signal # 教師信号を現在のQ値で初期化 target = q.data.copy() for i in xrange(n_batch): # Clip all positive rewards at 1 and all negative rewards at -1 # プラスの報酬はすべて1にし、マイナスの報酬はすべて-1にする if episode_ends[i] is True: target_value = np.sign(reward[i]) else: max_action_index = max_action_indices[i] target_value = np.sign(reward[i]) + config.rl_discount_factor * target_q.data[i][max_action_indices[i]] action_index = self.get_index_with_action(action[i]) # 現在選択した行動に対してのみ誤差を伝播する。 # それ以外の行動を表すユニットの2乗誤差は0となる。(target=qとなるため) old_value = target[i, action_index] diff = target_value - old_value # target is a one-hot vector in which the non-zero element(= target signal) corresponds to the taken action. # targetは実際にとった行動に対してのみ誤差を考え、それ以外の行動に対しては誤差が0となるone-hotなベクトルです。 # Clip the error to be between -1 and 1. # 1を超えるものはすべて1にする。(-1も同様) if diff > 1.0: target_value = 1.0 + old_value elif diff < -1.0: target_value = -1.0 + old_value target[i, action_index] = target_value target = Variable(target) # Compute error loss = F.mean_squared_error(target, q) return loss, q
def train_word_embedding_batch(self, char_ids_batch): xp = self.xp word_vec = self.encode_word_batch(char_ids_batch) batchsize = char_ids_batch.shape[0] char_ids_batch = char_ids_batch.T # reconstruction loss loss_reconstruction = 0 self.word_decoder_lstm.reset_state() prev_y = None for i in xrange(char_ids_batch.shape[0]): if prev_y is None: prev_y = Variable(xp.zeros((batchsize, self.char_embed_size), dtype=xp.float32)) dec_in = F.concat((word_vec, prev_y)) y = self.word_decoder_lstm(dec_in, test=False) target = Variable(char_ids_batch[i]) if self.gpu_enabled: target.to_gpu() loss = F.softmax_cross_entropy(y, target) prev_y = self.embed_id(target) loss_reconstruction += loss self.zero_grads_generator() loss_reconstruction.backward() self.update_generator() # adversarial loss ## 0: from encoder ## 1: from noise real_z = self.sample_z(batchsize, self.word_embed_size) fake_z = word_vec y_fake = self.discriminator(fake_z, test=False) ## train generator loss_generator = F.softmax_cross_entropy(y_fake, Variable(xp.ones((batchsize,), dtype=xp.int32))) self.zero_grads_generator() loss_generator.backward() self.update_generator() # train discriminator y_real = self.discriminator(real_z, test=False) loss_discriminator = F.softmax_cross_entropy(y_fake, Variable(xp.zeros((batchsize,), dtype=xp.int32))) loss_discriminator += F.softmax_cross_entropy(y_real, Variable(xp.ones((batchsize,), dtype=xp.int32))) self.optimizer_discriminator.zero_grads() loss_discriminator.backward() self.optimizer_discriminator.update() return float(loss_reconstruction.data), float(loss_generator.data), float(loss_discriminator.data)
def sample_x_and_label_from_data_distribution(batchsize, sequential=False): shape = config.img_channel * config.img_width * config.img_width x_batch = np.zeros((batchsize, shape), dtype=np.float32) label_batch = np.zeros((batchsize, 1), dtype=np.int32) for j in range(batchsize): data_index = np.random.randint(len(dataset)) if sequential: data_index = j img = dataset[data_index] x_batch[j] = img.reshape((shape,)) label_batch[j] = labels[data_index] x_batch = Variable(x_batch) if use_gpu: x_batch.to_gpu() return x_batch, label_batch
def sample_x_and_label_from_data_distribution(batchsize): shape = config.img_channel * config.img_width * config.img_width x_batch = np.zeros((batchsize, shape), dtype=np.float32) label_index_batch = np.zeros((batchsize, 1), dtype=np.int32) label_one_hot = np.zeros((batchsize, 10), dtype=np.float32) for j in range(batchsize): data_index = np.random.randint(len(dataset)) img = dataset[data_index] x_batch[j] = img.reshape((shape,)) label_index_batch[j] = labels[data_index] label_one_hot[j, labels[data_index]] = 1.0 x_batch = Variable(x_batch) label_one_hot = Variable(label_one_hot) if config.use_gpu: x_batch.to_gpu() label_one_hot.to_gpu() return x_batch, label_index_batch, label_one_hot
def sample_z_from_swiss_roll_distribution(batchsize, z_dim, label_indices, n_labels, gpu=False): def sample(label, n_labels): uni = np.random.uniform(0.0, 1.0) / float(n_labels) + float(label) / float(n_labels) r = math.sqrt(uni) * 3.0 rad = np.pi * 4.0 * math.sqrt(uni) x = r * cos(rad) y = r * sin(rad) return np.array([x, y]).reshape((2,)) z = np.zeros((batchsize, z_dim), dtype=np.float32) for batch in xrange(batchsize): for zi in xrange(z_dim / 2): z[batch, zi*2:zi*2+2] = sample(label_indices[batch], n_labels) z = Variable(z) if gpu: z.to_gpu() return z
def sample_x_y(self, x, argmax=False, test=False): batchsize = x.data.shape[0] y_distribution = self.encoder_x_y(x, test=test, softmax=True).data n_labels = y_distribution.shape[1] if self.gpu: y_distribution = cuda.to_cpu(y_distribution) sampled_y = np.zeros((batchsize, n_labels), dtype=np.float32) if argmax: args = np.argmax(y_distribution, axis=1) for b in xrange(batchsize): sampled_y[b, args[b]] = 1 else: for b in xrange(batchsize): label_id = np.random.choice(np.arange(n_labels), p=y_distribution[b]) sampled_y[b, label_id] = 1 sampled_y = Variable(sampled_y) if self.gpu: sampled_y.to_gpu() return sampled_y
def encode_word_batch(self, char_ids_batch, test=False): xp = self.xp self.word_encoder_lstm.reset_state() output = None char_ids_batch = char_ids_batch.T batchsize = char_ids_batch.shape[1] for i in xrange(char_ids_batch.shape[0]): condition_args = np.argwhere(char_ids_batch[i] == -1).reshape(-1) condition = np.full((batchsize, self.conf.word_encoder_lstm_units[0]), True, dtype=np.bool) for j in xrange(condition_args.shape[0]): condition[condition_args[j], :] = False condition = Variable(condition) if self.gpu_enabled: condition.to_gpu() c0 = Variable(xp.asanyarray(char_ids_batch[i], dtype=xp.int32)) c0 = self.embed_id(c0) output = self.word_encoder_lstm(c0, condition, test=test) output = self.word_encoder_fc(output, apply_f=True) return output
def sample_x_and_label_from_data_distribution(batchsize): x_batch = np.zeros((batchsize, 2), dtype=np.float32) label_index_batch = np.zeros((batchsize, 1), dtype=np.int32) label_one_hot = np.zeros((batchsize, 2), dtype=np.float32) for j in range(batchsize): data_index = np.random.randint(len(dataset)) x_batch[j] = dataset[data_index] label_index_batch[j] = labels[data_index] label_one_hot[j, labels[data_index]] = 1.0 x_batch = Variable(x_batch) label_one_hot = Variable(label_one_hot) if config.use_gpu: x_batch.to_gpu() label_one_hot.to_gpu() return x_batch, label_index_batch, label_one_hot
def predict(test_data, classifier, batchsize = 5, gpu = True): if gpu: classifier.predictor.to_gpu() else: classifier.predictor.to_cpu() classifier.predictor.train = False num_samples = 0 predictions = np.zeros((len(test_data.index),25)) for data in test_data.generate_minibatch(batchsize): num_samples += len(data) #print num_samples, '/', len(test_data.index) x = Variable(data) if gpu: x.to_gpu() yhat = classifier.predictor(x) yhat = F.softmax(yhat) yhat.to_cpu() predictions[num_samples-len(data):num_samples,:] = yhat.data return predictions
def sample_ax_y(self, a, x, argmax=False, test=False): a = self.to_variable(a) x = self.to_variable(x) batchsize = self.get_batchsize(x) y_distribution = F.softmax(self.q_y_ax(a, x, test=test)).data n_labels = y_distribution.shape[1] if self.gpu_enabled: y_distribution = cuda.to_cpu(y_distribution) sampled_y = np.zeros((batchsize, n_labels), dtype=np.float32) if argmax: args = np.argmax(y_distribution, axis=1) for b in xrange(batchsize): sampled_y[b, args[b]] = 1 else: for b in xrange(batchsize): label_id = np.random.choice(np.arange(n_labels), p=y_distribution[b]) sampled_y[b, label_id] = 1 sampled_y = Variable(sampled_y) if self.gpu_enabled: sampled_y.to_gpu() return sampled_y
def eps_greedy(self, state_batch, exploration_rate): if state_batch.ndim == 1: state_batch = state_batch.reshape(1, -1) elif state_batch.ndim == 3: state_batch = state_batch.reshape(-1, 34 * config.rl_history_length) prop = np.random.uniform() if prop < exploration_rate: action_batch = np.random.randint(0, len(config.actions), (state_batch.shape[0],)) q = None else: state_batch = Variable(state_batch) if config.use_gpu: state_batch.to_gpu() q = self.compute_q_variable(state_batch, test=True) if config.use_gpu: q.to_cpu() q = q.data action_batch = np.argmax(q, axis=1) for i in xrange(action_batch.shape[0]): action_batch[i] = self.get_action_for_index(action_batch[i]) return action_batch, q
def sample_z_from_10_2d_gaussian_mixture(batchsize, label_indices, n_labels, gpu=False): def sample(z, label, n_labels): shift = 1.4 r = 2.0 * np.pi / float(n_labels) * float(label) x = z[0] * cos(r) - z[1] * sin(r) y = z[0] * sin(r) + z[1] * cos(r) x += shift * cos(r) y += shift * sin(r) return np.array([x, y]).reshape((2,)) x_var = 0.5 y_var = 0.05 x = np.random.normal(0, x_var, (batchsize, 1)) y = np.random.normal(0, y_var, (batchsize, 1)) z = np.zeros((batchsize, 2), dtype=np.float32) for batch in xrange(batchsize): z[batch] = sample(np.array([x[batch], y[batch]]), label_indices[batch], n_labels) z = Variable(z) if gpu: z.to_gpu() return z
def eps_greedy(self, state, exploration_rate): prop = np.random.uniform() q_max = None q_min = None if prop < exploration_rate: # Select a random action action_index = np.random.randint(0, len(config.ale_actions)) else: # Select a greedy action state = Variable(state) if config.use_gpu: state.to_gpu() q = self.compute_q_variable(state, test=True) if config.use_gpu: action_index = cuda.to_cpu(cuda.cupy.argmax(q.data)) q_max = cuda.to_cpu(cuda.cupy.max(q.data)) q_min = cuda.to_cpu(cuda.cupy.min(q.data)) else: action_index = np.argmax(q.data) q_max = np.max(q.data) q_min = np.min(q.data) action = self.get_action_with_index(action_index) # No-op self.no_op_count = self.no_op_count + 1 if action == 0 else 0 if self.no_op_count > config.rl_no_op_max: no_op_index = np.argmin(np.asarray(config.ale_actions)) actions_without_no_op = [] for i in range(len(config.ale_actions)): if i == no_op_index: continue actions_without_no_op.append(config.ale_actions[i]) action_index = np.random.randint(0, len(actions_without_no_op)) action = actions_without_no_op[action_index] print "Reached no_op_max.", "New action:", action return action, q_max, q_min
def fit(self, img_data, gamma=1.0, save_freq=-1, pic_freq=-1, n_epochs=100, batch_size=100, weight_decay=True, model_path='./VAEGAN_training_model/', img_path='./VAEGAN_training_images/', img_out_width=10, mirroring=False): width = img_out_width self.enc_opt.setup(self.enc) self.dec_opt.setup(self.dec) self.disc_opt.setup(self.disc) if weight_decay: self.enc_opt.add_hook(chainer.optimizer.WeightDecay(0.00001)) self.dec_opt.add_hook(chainer.optimizer.WeightDecay(0.00001)) self.disc_opt.add_hook(chainer.optimizer.WeightDecay(0.00001)) n_data = img_data.shape[0] batch_iter = list(range(0, n_data, batch_size)) n_batches = len(batch_iter) c_samples = np.random.standard_normal((width, self.latent_width)).astype(np.float32) save_counter = 0 df_col = ['epoch', 'enc_loss', 'dec_loss', 'dis_loss', 'GAN_loss', 'like_loss', 'prior_loss', 'L_base', 'L_rec', 'L_p'] self.loss_buf = pd.DataFrame(columns=df_col) for epoch in range(1, n_epochs + 1): print('epoch: %i' % epoch) t1 = time.time() indexes = np.random.permutation(n_data) sum_l_enc = 0. sum_l_dec = 0. sum_l_disc = 0. sum_l_gan = 0. sum_l_like = 0. sum_l_prior = 0. sum_l_b_gan = 0. sum_l_r_gan = 0. sum_l_s_gan = 0. count = 0 for i in tqdm.tqdm(batch_iter): x = img_data[indexes[i: i + batch_size]] size = x.shape[0] if mirroring: for j in range(size): if np.random.randint(2): x[j, :, :, :] = x[j, :, :, ::-1] x_batch = Variable(x) zeros = Variable(np.zeros(size, dtype=np.int32)) ones = Variable(np.ones(size, dtype=np.int32)) if self.flag_gpu: x_batch.to_gpu() zeros.to_gpu() ones.to_gpu() # kl_loss : VAE中間表現のKL正則化ロス # dif_l : Discriminatorの中間層出力のMSE(学習データセットと再構成画像の中間出力のMSE) # disc_{rec, batch, samp} : Discriminator出力(2次元) kl_loss, dif_l, disc_rec, disc_batch, disc_samp = self._forward(x_batch) # Discriminator出力のloss計算 L_batch_GAN = F.softmax_cross_entropy(disc_batch, ones) L_rec_GAN = F.softmax_cross_entropy(disc_rec, zeros) L_samp_GAN = F.softmax_cross_entropy(disc_samp, zeros) l_gan = (L_batch_GAN + L_rec_GAN + L_samp_GAN)/3. l_like = dif_l l_prior = kl_loss enc_loss = self.kl_ratio*l_prior + l_like dec_loss = gamma*l_like - l_gan disc_loss = l_gan self.enc_opt.zero_grads() enc_loss.backward() self.enc_opt.update() self.dec_opt.zero_grads() dec_loss.backward() self.dec_opt.update() self.disc_opt.zero_grads() disc_loss.backward() self.disc_opt.update() sum_l_enc += enc_loss.data sum_l_dec += dec_loss.data sum_l_disc += disc_loss.data sum_l_gan += l_gan.data sum_l_like += l_like.data sum_l_prior += l_prior.data sum_l_b_gan += L_batch_GAN.data sum_l_r_gan += L_rec_GAN.data sum_l_s_gan += L_samp_GAN.data count += 1 #plot_data = img_data[indexes[:width]] sum_l_enc /= n_batches sum_l_dec /= n_batches sum_l_disc /= n_batches sum_l_gan /= n_batches sum_l_like /= n_batches sum_l_prior /= n_batches sum_l_b_gan /= n_batches sum_l_r_gan /= n_batches sum_l_s_gan /= n_batches msg = "enc_loss = {0}, dec_loss = {1} , disc_loss = {2}" msg2 = "gan_loss = {0}, sim_loss = {1}, kl_loss = {2}" print(msg.format(sum_l_enc, sum_l_dec, sum_l_disc)) print(msg2.format(sum_l_gan, sum_l_like, sum_l_prior)) t_diff = time.time()-t1 print("time: %f\n\n" % t_diff) df_tmp = pd.DataFrame([[epoch, sum_l_enc, sum_l_dec, sum_l_disc, sum_l_gan, sum_l_like, sum_l_prior, sum_l_b_gan, sum_l_r_gan, sum_l_s_gan]], columns=df_col) self.loss_buf = self.loss_buf.append(df_tmp, ignore_index=True)
def train_val(train_data, classifier, optimizer, num_train = 9000, epochs = 10, batchsize = 30, gpu = True): # split data to train and val train_data.split_train_val(num_train) for epoch in range(epochs): # train classifier.predictor.train = True num_samples = 0 train_cum_loss = 0 train_cum_acc = 0 for data in train_data.generate_minibatch(batchsize, mode = 'train'): num_samples += len(data[0]) #print num_samples, '/', len(train_data.train_index), '(epoch:%s)'%(epoch+1) optimizer.zero_grads() x, y = Variable(data[0]), Variable(data[1]) if gpu: x.to_gpu() y.to_gpu() loss = classifier(x, y) train_cum_acc += classifier.accuracy.data*batchsize #print 'train_accuracy:', train_cum_acc/num_samples train_cum_loss += classifier.loss.data*batchsize #print 'train_loss:', train_cum_loss/num_samples loss.backward() # back propagation optimizer.update() # update parameters train_accuracy = train_cum_acc/num_samples train_loss = train_cum_loss/num_samples # validation classifier.predictor.train = False num_samples = 0 val_cum_loss = 0 val_cum_acc = 0 for data in train_data.generate_minibatch(batchsize, mode = 'val'): num_samples += len(data[0]) #print num_samples, '/', len(train_data.val_index), '(epoch:%s)'%(epoch+1) x, y = Variable(data[0]), Variable(data[1]) if gpu: x.to_gpu() y.to_gpu() loss = classifier(x, y) val_cum_acc += classifier.accuracy.data*batchsize #print 'val_accuracy:', val_cum_acc/num_samples val_cum_loss += classifier.loss.data*batchsize #print 'val_loss:', val_cum_loss/num_samples val_accuracy = val_cum_acc/num_samples val_loss = val_cum_loss/num_samples print '-----------------', 'epoch:', epoch+1, '-----------------' print 'train_accuracy:', train_accuracy, 'train_loss:', train_loss print 'val_accuracy:', val_accuracy, 'val_loss:', val_loss print '\n' # shuffle train data train_data.shuffle() return classifier, optimizer
def train(self, x_train, y_train, x_test, y_test, train, gpu): """Trains and tests the classifier of QSAR. Args: x_train (Variable): NFP for the training dataset. y_train (np.array(int32[])): Activity data for the training dataset. x_test (Variable): NFP for the test dataset. y_test (np.array(int32[])): Activity data for the test dataset. train (boolean): Training flag. If you want to train the *NFP NN*, set it True, otherwise False. gpu (boolean): GPU flag. If you want to use GPU, set it True. Returns: result (float): Overall accuracy on the test dataset. """ N = len(y_train) N_test = len(y_test) model = self.model optimizer = self.optimizer # training if train: n_epoch = self.n_train_epoch else: n_epoch = self.n_val_epoch for epoch in six.moves.range(1, n_epoch + 1): perm = np.random.permutation(N) sum_accuracy = 0 sum_loss = 0 batchsize = self.batchsize for i in six.moves.range(0, N, batchsize): x = [x_train[perm[idx]] for idx in six.moves.range(i, min(N, i + batchsize))] x = F.concat(x, axis=0) t = Variable(np.asarray( y_train[perm[i:min(N, i + batchsize)]])) if gpu: x.to_gpu(0) t.to_gpu(0) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) print('train mean loss=%f, accuracy=%f' % (sum_loss / N, sum_accuracy / N)) # evaluation sum_loss = 0 sum_accuracy = 0 batchsize_test = N_test // 2 for i in six.moves.range(0, N_test, batchsize_test): x = [x_test[idx] for idx in six.moves.range( i, min(N_test, i + batchsize_test))] x = F.concat(x, axis=0) x.volatile = 'off' t = Variable(np.asarray(y_test[i:i + batchsize_test]), volatile='off') if not gpu: loss = model(x, t) sum_accuracy += float(model.accuracy.data) * len(t.data) else: x.to_gpu(0) t.to_gpu(0) loss = model(x, t) sum_accuracy += float(model.accuracy.data) * len(t.data) sum_loss += loss print('batchtest mean loss=%f, accuracy=%f' % (float(loss.data), float(model.accuracy.data))) sum_loss.backward() optimizer.update() result = sum_accuracy / N_test return result
def __call__(self, input_x, t): output = self.predictor(input_x) batch_size, _, grid_h, grid_w = output.shape self.seen += batch_size x, y, w, h, conf, prob = F.split_axis(F.reshape(output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes+5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) # xのactivation y = F.sigmoid(y) # yのactivation conf = F.sigmoid(conf) # confのactivation prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) # probablitiyのacitivation # 教師データの用意 tw = np.zeros(w.shape, dtype=np.float32) # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1) th = np.zeros(h.shape, dtype=np.float32) tx = np.tile(0.5, x.shape).astype(np.float32) # 活性化後のxとyが0.5になるように学習() ty = np.tile(0.5, y.shape).astype(np.float32) if self.seen < self.unstable_seen: # centerの存在しないbbox誤差学習スケールは基本0.1 box_learning_scale = np.tile(0.1, x.shape).astype(np.float32) else: box_learning_scale = np.tile(0, x.shape).astype(np.float32) tconf = np.zeros(conf.shape, dtype=np.float32) # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32) tprob = prob.data.copy() # best_anchor以外は学習させない(自身との二乗和誤差 = 0) # 全bboxとtruthのiouを計算(batch単位で計算する) x_shift = Variable(np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:])) y_shift = Variable(np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape[1:])) w_anchor = Variable(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:])) h_anchor = Variable(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:])) x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu() best_ious = [] for batch in range(batch_size): n_truth_boxes = len(t[batch]) box_x = (x[batch] + x_shift) / grid_w box_y = (y[batch] + y_shift) / grid_h box_w = F.exp(w[batch]) * w_anchor / grid_w box_h = F.exp(h[batch]) * h_anchor / grid_h ious = [] for truth_index in range(n_truth_boxes): truth_box_x = Variable(np.broadcast_to(np.array(t[batch][truth_index]["x"], dtype=np.float32), box_x.shape)) truth_box_y = Variable(np.broadcast_to(np.array(t[batch][truth_index]["y"], dtype=np.float32), box_y.shape)) truth_box_w = Variable(np.broadcast_to(np.array(t[batch][truth_index]["w"], dtype=np.float32), box_w.shape)) truth_box_h = Variable(np.broadcast_to(np.array(t[batch][truth_index]["h"], dtype=np.float32), box_h.shape)) truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu(), truth_box_h.to_gpu() ious.append(multi_box_iou(Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h)).data.get()) ious = np.array(ious) best_ious.append(np.max(ious, axis=0)) best_ious = np.array(best_ious) # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。 tconf[best_ious > self.thresh] = conf.data.get()[best_ious > self.thresh] conf_learning_scale[best_ious > self.thresh] = 0 # objectの存在するanchor boxのみ、x、y、w、h、conf、probを個別修正 abs_anchors = self.anchors / np.array([grid_w, grid_h]) for batch in range(batch_size): for truth_box in t[batch]: truth_w = int(float(truth_box["x"]) * grid_w) truth_h = int(float(truth_box["y"]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou(Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。 box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = np.log(float(truth_box["w"]) / abs_anchors[truth_n][0]) th[batch, truth_n, :, truth_h, truth_w] = np.log(float(truth_box["h"]) / abs_anchors[truth_n][1]) tprob[batch, :, truth_n, truth_h, truth_w] = 0 tprob[batch, int(truth_box["label"]), truth_n, truth_h, truth_w] = 1 # IOUの観測 full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"])) predicted_box = Box( (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) / grid_w, (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) / grid_h, np.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][0], np.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][1] ) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0 # debug prints maps = F.transpose(prob[batch], (2, 3, 1, 0)).data print("best confidences and best conditional probability and predicted class of each grid:") for i in range(grid_h): for j in range(grid_w): print("%2d" % (int(conf[batch, :, :, i, j].data.max() * 100)), end=" ") print(" ", end="") for j in range(grid_w): print("%2d" % (maps[i][j][int(maps[i][j].max(axis=1).argmax())].argmax()), end=" ") print(" ", end="") for j in range(grid_w): print("%2d" % (maps[i][j][int(maps[i][j].max(axis=1).argmax())].max()*100), end=" ") print() print("best default iou: %.2f predicted iou: %.2f confidence: %.2f class: %s" % (best_iou, predicted_iou, conf[batch][truth_n][0][truth_h][truth_w].data, t[batch][0]["label"])) print("-------------------------------") print("seen = %d" % self.seen) # loss計算 tx, ty, tw, th, tconf, tprob = Variable(tx), Variable(ty), Variable(tw), Variable(th), Variable(tconf), Variable(tprob) box_learning_scale, conf_learning_scale = Variable(box_learning_scale), Variable(conf_learning_scale) tx.to_gpu(), ty.to_gpu(), tw.to_gpu(), th.to_gpu(), tconf.to_gpu(), tprob.to_gpu() box_learning_scale.to_gpu() conf_learning_scale.to_gpu() x_loss = F.sum((tx - x) ** 2 * box_learning_scale) / 2 y_loss = F.sum((ty - y) ** 2 * box_learning_scale) / 2 w_loss = F.sum((tw - w) ** 2 * box_learning_scale) / 2 h_loss = F.sum((th - h) ** 2 * box_learning_scale) / 2 c_loss = F.sum((tconf - conf) ** 2 * conf_learning_scale) / 2 p_loss = F.sum((tprob - prob) ** 2) / 2 print("x_loss: %f y_loss: %f w_loss: %f h_loss: %f c_loss: %f p_loss: %f" % (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data, F.sum(h_loss).data, F.sum(c_loss).data, F.sum(p_loss).data) ) loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss return loss
class TestFasterRCNN(unittest.TestCase): def setUp(self): chainer.set_debug(True) np.random.seed(0) dataset = VOC('train') img, im_info, bbox = dataset[1] self.x = Variable(img[None, ...]) self.im_info = Variable(im_info[None, ...]) self.gt_boxes = Variable(bbox[None, ...]) def test_forward_whole(self): rpn_in_ch = 512 rpn_out_ch = 512 feat_stride = 16 anchor_ratios = [0.5, 1, 2] anchor_scales = [8, 16, 32] num_classes = 21 model = FasterRCNN( self.trunk, rpn_in_ch, rpn_out_ch, feat_stride, anchor_ratios, anchor_scales, num_classes) model.rpn_train, model.rcnn_train = self.train if self.device >= 0: model.to_gpu(self.device) self.x.to_gpu(self.device) self.x.volatile = True self.assertIs(model.xp, cp) self.assertIs(model.trunk.xp, cp) st = time.time() ret = model(self.x, self.im_info) print('Forward whole device:{}, ({}, train:{}): {} sec'.format( self.device, self.trunk.__name__, self.train, time.time() - st)) assert(len(ret) == 2) assert(isinstance(ret[0], chainer.Variable)) assert(isinstance(ret[1], (cp.ndarray, np.ndarray))) def test_backward(self): rpn_in_ch = 512 rpn_out_ch = 512 feat_stride = 16 anchor_ratios = [0.5, 1, 2] anchor_scales = [8, 16, 32] num_classes = 21 model = FasterRCNN( self.trunk, rpn_in_ch, rpn_out_ch, feat_stride, anchor_ratios, anchor_scales, num_classes) model.rpn_train, model.rcnn_train = self.train if self.device >= 0: model.to_gpu(self.device) self.x.to_gpu(self.device) self.im_info.to_gpu(self.device) self.gt_boxes.to_gpu(self.device) self.assertIs(model.xp, cp) self.assertIs(model.trunk.xp, cp) opt = optimizers.Adam() opt.setup(model) if model.rpn_train: st = time.time() rpn_loss = model(self.x, self.im_info, self.gt_boxes) model.cleargrads() rpn_loss.backward() opt.update() print('Backward rpn device:{}, ({}, train:{}): {} sec'.format( self.device, self.trunk.__name__, self.train, time.time() - st)) rpn_cg = cg.build_computational_graph([rpn_loss]) with open('tests/rpn_cg.dot', 'w') as fp: fp.write(rpn_cg.dump()) elif model.rcnn_train: st = time.time() loss_rcnn = model(self.x, self.im_info, self.gt_boxes) model.cleargrads() loss_rcnn.backward() opt.update() print('Backward rcnn device:{}, ({}, train:{}): {} sec'.format( self.device, self.trunk.__name__, self.train, time.time() - st)) loss_rcnn_cg = cg.build_computational_graph([loss_rcnn]) with open('tests/loss_rcnn_cg.dot', 'w') as fp: fp.write(loss_rcnn_cg.dump())
# generate sample x, t = generator.generate_samples( n_samples=16, n_items=3, crop_width=input_width, crop_height=input_height, min_item_scale=0.5, max_item_scale=2.5, rand_angle=15, minimum_crop=0.8, delta_hue=0.01, delta_sat_scale=0.5, delta_val_scale=0.5 ) x = Variable(x) x.to_gpu() # forward loss = model(x, t) print("batch: %d input size: %dx%d learning rate: %f loss: %f" % (batch, input_height, input_width, optimizer.lr, loss.data)) print("/////////////////////////////////////") # backward and optimize optimizer.zero_grads() loss.backward() optimizer.update() # save model if (batch+1) % 500 == 0: model_file = "%s/%s.model" % (backup_path, batch+1) print("saving model to %s" % (model_file))