def grid_coord(guide, xx, yy, sz, small_sz, sigma_r, bs): gx = ((xx + 0.5) / sz) * small_sz gy = ((yy + 0.5) / sz) * small_sz expanded_guide = C.reshape(guide, [bs, 1, sz, sz]) gz = expanded_guide * sigma_r fx = C.floor(gx - 0.5) fy = C.floor(gy - 0.5) fz = C.clip(C.floor(gz - 0.5), 0, sigma_r - 1) cx = C.element_min(fx + 1, small_sz - 1) cy = C.element_min(fy + 1, small_sz - 1) cz = C.clip(fz + 1, 0, sigma_r - 1) return gx, gy, gz, fx, fy, fz, cx, cy, cz
def test_Clip(tmpdir): data = np.asarray([0.2, 1.3, 4., 5.5, 0.0], np.float32) min_v = 2 max_v = 4 model = C.clip(data, min_v, max_v) verify_no_input(model, tmpdir, 'clip_0') x = C.input_variable(data.shape) model = C.clip(x, min_v, max_v) verify_one_input(model, data, tmpdir, 'clip_1')
def test_Clip(tmpdir): data = np.asarray([0.2, 1.3, 4., 5.5, 0.0], np.float32) min_v = 2 max_v = 4 model = C.clip(data, min_v, max_v) verify_no_input(model, tmpdir, 'clip_0') x = C.input_variable(data.shape) model = C.clip(x, min_v, max_v) verify_one_input(model, data, tmpdir, 'clip_1')
def test_Clip(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype=dtype): data = np.asarray([0.2, 1.3, 4., 5.5, 0.0], dtype) min_v = 2 max_v = 4 model = C.clip(data, min_v, max_v) verify_no_input(model, tmpdir, 'clip_0') x = C.input_variable(data.shape) model = C.clip(x, min_v, max_v) verify_one_input(model, data, tmpdir, 'clip_1')
def test_Clip(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype = dtype): data = np.asarray([0.2, 1.3, 4., 5.5, 0.0], dtype) min_v = 2 max_v = 4 model = C.clip(data, min_v, max_v) verify_no_input(model, tmpdir, 'clip_0') x = C.input_variable(data.shape) model = C.clip(x, min_v, max_v) verify_one_input(model, data, tmpdir, 'clip_1')
def gaussian_mdn_loss(output_vector, target_vector, nmix: int, ndim: int): """ Loss function for gaussian mixture density network. Usually used for regression problems. Mixture density networks are useful when trying to represent arbitrary conditional probabilities the same way a conventional neural network can represent arbitrary functions. Example: ndim, nmix = 1, 3 input_tensor = C.input_variable(1, name="input_tensor") target_tensor = C.input_variable(1, name="target_tensor") # model inner = Dense(50, activation=C.relu)(input_tensor) inner = Dense(50, activation=C.relu)(inner) prediction_tensor = Dense((ndim + 2) * nmix, activation=None)(inner) loss = gaussian_mdn_loss(prediction_tensor, target_tensor, nmix=nmix, ndim=ndim) Arguments: output_vector: network output target_vector: ground truths (typically a continuous variable) nmix (int): number of mixtures ndim (int): number of dimensions in a gaussian kernel Returns: :class:`~cntk.ops.functions.Function` """ @C.typemap def gaussian_mdn_phi(target, mu, sigma, ndim: int): """ Calculates phi between the target tensor and the network prediction Does not assumes independence between components of target. Arguments: target: target tensor with shape (ndim, ) mu: means of gaussian mdn with shape (nmix, ndim) sigma: sigma of gaussian mdn nmix (int): number of mixtures ndim (int): number of dimensions in gaussian Returns: :class:`~cntk.ops.functions.Function` """ if not len(mu.shape) == 2: raise ValueError("mu {0} must have shape (nmix, ndim)".format(mu.shape)) t = C.expand_dims(target, axis=0) exp_term = C.exp(C.negate(C.square(C.reduce_l2(t - mu, axis=-1)) / (2 * C.square(sigma)))) factor = C.reciprocal((2 * pi) ** (ndim / 2) * C.pow(sigma, ndim)) return factor * exp_term alpha, mu, sigma = gaussian_mdn_coeff(output_vector, nmix=nmix, ndim=ndim) phi = gaussian_mdn_phi(target_vector, mu, sigma, ndim=ndim) loss = C.negate(C.log(C.clip(C.reduce_sum(alpha * phi, axis=0), 1e-10, 1e10))) return loss
def clip(x, min_value, max_value): """Element-wise value clipping. If min_value > max_value, clipping range is [min_value,min_value]. # Arguments x: Tensor or variable. min_value: Tensor, float, int, or None. If min_value is None, defaults to -infinity. max_value: Tensor, float, int, or None. If max_value is None, defaults to infinity. # Returns A tensor. """ if max_value is None: max_value = np.inf if min_value is None: min_value = -np.inf max_value = C.maximum(min_value, max_value) return C.clip(x, min_value, max_value)
def build(self): input_kernel = C.Parameter(shape=(self._input_size, self._hidden_dim), init=self._input_initializer) recur_kernel = C.Parameter(shape=(self._hidden_dim, ), init=self._recurrent_initializer) bias = C.Parameter(shape=(self._hidden_dim), init=0) if self._recurrent_min_abs > 0: abs_kernel = C.abs(recur_kernel) min_abs_kernel = C.element_max(abs_kernel, self._recurrent_min_abs) recur_kernel = min_abs_kernel * C.element_select( C.greater_equal(recur_kernel, C.constant(0)), C.constant(1), C.constant(-1)) if self._recurrent_max_abs: recur_kernel = C.clip(recur_kernel, -self._recurrent_max_abs, self._recurrent_max_abs) @C.Function def runit(h, x): h_t = C.times(x, input_kernel) + bias + recur_kernel * h return h_t return runit
def Loss(self): # Evaluating old actions and values : logprobs, state_value, dist_entropy = self.policy.evaluate() # Finding the ratio (pi_theta / pi_theta__old): # (importance sampling) c_old_logprobs = C.input_variable(logprobs.shape, name='old_log_probs') ratios = C.exp(logprobs - C.stop_gradient(c_old_logprobs)) c_rewards = C.input_variable(1, name='rewards') advantages = c_rewards - C.stop_gradient(state_value) # Finding Surrogate Loss: surr1 = ratios * advantages surr2 = C.clip(ratios, 1 - self.eps_clip, 1 + self.eps_clip) * advantages neglog_loss = -C.element_min(surr1, surr2) entropy_loss = -0.01 * dist_entropy actor_loss = C.reduce_mean(neglog_loss + entropy_loss) critic_loss = 0.5 * C.reduce_mean(C.square(state_value - c_rewards)) loss = actor_loss + critic_loss chunk = { 'neglog_loss': neglog_loss, 'entropy_loss': entropy_loss, 'actor_loss': actor_loss, 'critic_loss': critic_loss } trainer = C.Trainer( loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule_per_sample(self.lr), C.momentum_schedule_per_sample(self.betas[0]), variance_momentum=C.momentum_schedule_per_sample( self.betas[1]))) # trainer = C.Trainer(loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule(10), C.momentum_schedule(0.9), variance_momentum=C.momentum_schedule(0.999))) # higher learning rate return loss, chunk, trainer
def main(): show_image = False if show_image: bs = 1 ci = 3 co = 3 cg = co * (ci + 1) gd = 8 gh = 64 gw = 64 h = 256 w = 256 else: bs = 1 ci = 3 co = 3 cg = co * (ci + 1) gd = 8 gh = 64 gw = 64 h = 1024 w = 1024 im = C.input_variable([bs, ci, h, w], needs_gradient=True, dynamic_axes=[]) guide = C.input_variable([bs, h, w], needs_gradient=True, dynamic_axes=[]) guide_no_grad = C.input_variable([bs, h, w], needs_gradient=False, dynamic_axes=[]) grid = C.input_variable([bs, cg, gd, gh, gw], needs_gradient=True, dynamic_axes=[]) # Create indices xx = np.arange(0, w).reshape(1, -1).repeat(h, 0).astype(np.float32) yy = np.arange(0, h).reshape(-1, 1).repeat(w, 1).astype(np.float32) xx = C.Constant(xx, xx.shape) yy = C.Constant(yy, yy.shape) gx = ((xx + 0.5) / w) * gw gy = ((yy + 0.5) / h) * gh gz = C.clip(guide, 0.0, 1.0) * gd gz_no_grad = C.clip(guide_no_grad, 0.0, 1.0) * gd fx = C.element_max(C.floor(gx - 0.5), 0.0) fy = C.element_max(C.floor(gy - 0.5), 0.0) fz = C.element_max(C.floor(gz - 0.5), 0.0) fz_no_grad = C.element_max(C.floor(gz_no_grad - 0.5), 0.0) wx = gx - 0.5 - fx wy = gy - 0.5 - fy wx = C.expand_dims(C.expand_dims(wx, -1 - len(wx.shape)), -1 - len(wx.shape)) wy = C.expand_dims(C.expand_dims(wy, -1 - len(wy.shape)), -1 - len(wy.shape)) wz = C.abs(gz - 0.5 - fz) wz = C.expand_dims(wz, 0) fx = C.expand_dims(C.expand_dims(fx, -1 - len(fx.shape)), -1 - len(fx.shape)) fy = C.expand_dims(C.expand_dims(fy, -1 - len(fy.shape)), -1 - len(fy.shape)) cx = C.element_min(fx + 1, gw - 1) cy = C.element_min(fy + 1, gh - 1) cz = C.element_min(fz_no_grad + 1, gd - 1) batch_idx = np.arange(bs).reshape(bs, 1, 1, 1).astype(np.float32) batch_idx = C.Constant(batch_idx, batch_idx.shape) out = [] flat_grid = C.reshape(grid, [-1]) for c_ in range(co): c_idx = np.arange((ci + 1) * c_, (ci + 1) * (c_ + 1)).reshape(1, ci + 1, 1, 1).astype(np.float32) c_idx = C.Constant(c_idx, c_idx.shape) def flatten_and_gather(x, y, z): linear_idx = x + gw * y + gw * gh * z + c_idx * gw * gh * gd + batch_idx * gw * gh * gd * cg flat_linear_idx = C.reshape(linear_idx, [-1]) return C.reshape(C.gather(flat_grid, flat_linear_idx), linear_idx.shape) gather_fff = flatten_and_gather(fx, fy, fz_no_grad) gather_ffc = flatten_and_gather(fx, fy, cz) gather_fcf = flatten_and_gather(fx, cy, fz_no_grad) gather_fcc = flatten_and_gather(fx, cy, cz) gather_cff = flatten_and_gather(cx, fy, fz_no_grad) gather_cfc = flatten_and_gather(cx, fy, cz) gather_ccf = flatten_and_gather(cx, cy, fz_no_grad) gather_ccc = flatten_and_gather(cx, cy, cz) a = gather_fff*(1-wx)*(1-wy)*(1-wz) + \ gather_ffc*(1-wx)*(1-wy)*( wz) + \ gather_fcf*(1-wx)*( wy)*(1-wz) + \ gather_fcc*(1-wx)*( wy)*( wz) + \ gather_cff*( wx)*(1-wy)*(1-wz) + \ gather_cfc*( wx)*(1-wy)*( wz) + \ gather_ccf*( wx)*( wy)*(1-wz) + \ gather_ccc*( wx)*( wy)*( wz) o = C.reduce_sum(a[:, :-1, ...] * im, 1) + a[:, -1, ...] print(o.shape) out.append(C.expand_dims(o, 0)) out = C.splice(*out, axis=1) loss = C.reduce_l2(out) grid_val = np.random.rand(bs, cg, gd, gh, gw).astype(np.float32) if show_image: guide_val = skio.imread("/data/rgb.png").mean(2)[:h, :w].astype( np.float32) guide_val = np.expand_dims(guide_val / 255.0, 0) im_val = np.tile(np.expand_dims(guide_val, 1), [1, 3, 1, 1]) out_val = out.eval({ im: im_val, guide: guide_val, guide_no_grad: guide_val, grid: grid_val }) out_val = np.clip(np.transpose(np.squeeze(out_val), [1, 2, 0]), 0, 1) skio.imsave("/output/imout.png", out_val) else: im_val = np.random.randn(bs, ci, h, w) guide_val = np.random.rand(bs, h, w).astype(np.float32) # burning iteration for it in range(5): print('burning (', it, ')') g = loss.grad({ im: im_val, guide: guide_val, guide_no_grad: guide_val, grid: grid_val }) # actual iterations start = time.time() for it in range(50): print('profiling (', it, ')') g = loss.grad({ im: im_val, guide: guide_val, guide_no_grad: guide_val, grid: grid_val }) end = time.time() runtime = (end - start) * 1000.0 / 50.0 print('Runtime:', runtime)
def main(): bs = 4 c = 64 h = 512 w = 512 im = C.input_variable([bs, c, h, w], needs_gradient=True, dynamic_axes=[]) warp = C.input_variable([bs, 2, h, w], needs_gradient=True, dynamic_axes=[]) warp_ng = C.input_variable([bs, 2, h, w], needs_gradient=False, dynamic_axes=[]) # Create indices dx = 0.5 * (warp[:, 0, :, :] + 1.0) dy = 0.5 * (warp[:, 1, :, :] + 1.0) new_x = C.clip(dx * w, 0, w) new_y = C.clip(dy * h, 0, h) fx = C.clip(C.floor(new_x), 0, w - 2) fy = C.clip(C.floor(new_y), 0, h - 2) wx = new_x - fx wy = new_y - fy dx_ng = 0.5 * (warp_ng[:, 0, :, :] + 1.0) dy_ng = 0.5 * (warp_ng[:, 1, :, :] + 1.0) new_x_ng = C.clip(dx_ng * w, 0, w) new_y_ng = C.clip(dy_ng * h, 0, h) fx_ng = C.clip(C.floor(new_x_ng), 0, w - 2) fy_ng = C.clip(C.floor(new_y_ng), 0, h - 2) chan_idx = np.arange(c).reshape(1, c, 1, 1) chan_idx = C.Constant(chan_idx, chan_idx.shape) batch_idx = np.arange(bs).reshape(bs, 1, 1, 1) batch_idx = C.Constant(batch_idx, batch_idx.shape) flat_im = C.reshape(im, [-1]) def flatten_and_gather(x, y): linear_idx = x + w * y + w * h * chan_idx + w * h * c * batch_idx flat_linear_idx = C.reshape(linear_idx, [-1]) return C.reshape(C.gather(flat_im, flat_linear_idx), linear_idx.shape) gather_ff = flatten_and_gather(fx_ng, fy_ng) gather_fc = flatten_and_gather(fx_ng, fy_ng + 1) gather_cf = flatten_and_gather(fx_ng + 1, fy_ng) gather_cc = flatten_and_gather(fx_ng + 1, fy_ng + 1) out = gather_ff*(1-wx)*(1-wy) + \ gather_fc*(1-wx)*( wy) + \ gather_cf*( wx)*(1-wy) + \ gather_cc*( wx)*( wy) loss = C.reduce_l2(out) im_val = np.random.randn(bs, c, h, w).astype(np.float32) warp_val = np.random.rand(bs, 2, h, w).astype(np.float32) # burning iteration for it in range(5): print('burning (', it, ')') g = loss.grad({im: im_val, warp: warp_val, warp_ng: warp_val}) # actual iterations start = time.time() for it in range(50): print('profiling (', it, ')') g = loss.grad({im: im_val, warp: warp_val, warp_ng: warp_val}) end = time.time() runtime = (end - start) * 1000.0 / 50.0 print('Runtime:', runtime)
def main(): bs = 4 c = 16 h = 512 w = 512 im = C.input_variable([bs, c, h, w], needs_gradient=True, dynamic_axes=[]) affine_mtx = C.input_variable([bs, 2, 3], needs_gradient=True, dynamic_axes=[]) affine_mtx_ng = C.input_variable([bs, 2, 3], needs_gradient=False, dynamic_axes=[]) xx = np.arange(0, w).reshape(1, -1).repeat(h, 0).astype(np.float32) yy = np.arange(0, h).reshape(-1, 1).repeat(w, 1).astype(np.float32) xx = C.Constant(xx, xx.shape) yy = C.Constant(yy, yy.shape) nrm_x = 2.0 * (xx / w) - 1.0 nrm_y = 2.0 * (yy / h) - 1.0 nrm_x = C.expand_dims(nrm_x, -1 - len(nrm_x.shape)) nrm_y = C.expand_dims(nrm_y, -1 - len(nrm_y.shape)) xformed_x = affine_mtx[:, 0, 0] * nrm_x + \ affine_mtx[:, 0, 1] * nrm_y + \ affine_mtx[:, 0, 2] xformed_y = affine_mtx[:, 1, 0] * nrm_x + \ affine_mtx[:, 1, 1] * nrm_y + \ affine_mtx[:, 1, 2] xformed_x = 0.5 * xformed_x + 1.0 xformed_y = 0.5 * xformed_y + 1.0 xformed_x = C.expand_dims(xformed_x, 0) xformed_y = C.expand_dims(xformed_y, 0) xformed_x_ng = affine_mtx_ng[:, 0, 0] * nrm_x + \ affine_mtx_ng[:, 0, 1] * nrm_y + \ affine_mtx_ng[:, 0, 2] xformed_y_ng = affine_mtx_ng[:, 1, 0] * nrm_x + \ affine_mtx_ng[:, 1, 1] * nrm_y + \ affine_mtx_ng[:, 1, 2] xformed_x_ng = C.expand_dims(xformed_x_ng, 0) xformed_y_ng = C.expand_dims(xformed_y_ng, 0) fx = C.clip(w * xformed_x, 0, w-2) fy = C.clip(h * xformed_y, 0, h-2) wx = xformed_x - fx wy = xformed_y - fy fx_ng = C.clip(w * xformed_x_ng, 0, w-2) fy_ng = C.clip(h * xformed_y_ng, 0, h-2) chan_idx = np.arange(c).reshape(1, c, 1, 1) chan_idx = C.Constant(chan_idx, chan_idx.shape) batch_idx = np.arange(bs).reshape(bs, 1, 1, 1) batch_idx = C.Constant(batch_idx, batch_idx.shape) flat_im = C.reshape(im, [-1]) def flatten_and_gather(x, y): linear_idx = x + w*y linear_idx = linear_idx + w*h*chan_idx + w*h*c*batch_idx flat_linear_idx = C.reshape(linear_idx, [-1]) return C.reshape(C.gather(flat_im, flat_linear_idx),linear_idx.shape) gather_ff = flatten_and_gather(fx_ng , fy_ng ) gather_fc = flatten_and_gather(fx_ng , fy_ng + 1) gather_cf = flatten_and_gather(fx_ng + 1, fy_ng ) gather_cc = flatten_and_gather(fx_ng + 1, fy_ng + 1) out = gather_ff*(1-wx)*(1-wy) + \ gather_fc*(1-wx)*( wy) + \ gather_cf*( wx)*(1-wy) + \ gather_cc*( wx)*( wy) loss = C.reduce_l2(out) im_val = np.random.randn(bs, c, h, w).astype(np.float32) affine_mtx_val = np.zeros([bs, 2, 3], dtype=np.float32) affine_mtx_val[:, 0, 1] = 1.0 affine_mtx_val[:, 1, 0] = 1.0 # burning iteration for it in range(5): print('burning (', it, ')') g = loss.grad({im : im_val, affine_mtx : affine_mtx_val, affine_mtx_ng : affine_mtx_val}) # actual iterations start = time.time() for it in range(50): print('profiling (', it, ')') g = loss.grad({im : im_val, affine_mtx : affine_mtx_val, affine_mtx_ng : affine_mtx_val}) end = time.time() runtime = (end-start)*1000.0/50.0 print('Runtime:', runtime)
def run(self): while self.episode < EPISODES: obs, action, pred, reward = self.get_batch() obs, action, pred, reward = obs[: BUFFER_SIZE], action[: BUFFER_SIZE], pred[: BUFFER_SIZE], reward[: BUFFER_SIZE] old_prediction = pred # # from IPython import embed;embed(header='run') # exit() # # pred_values = self.critic.predict(obs) # advantage = reward - pred_values # actor_loss = self.actor.fit([obs, advantage, old_prediction], [action], batch_size=BATCH_SIZE, shuffle=True, epochs=EPOCHS, verbose=False) # critic_loss = self.critic.fit([obs], [reward], batch_size=BATCH_SIZE, shuffle=True, epochs=EPOCHS, verbose=False) # self.writer.add_scalar('Actor loss', actor_loss.history['loss'][-1], self.gradient_steps) # self.writer.add_scalar('Critic loss', critic_loss.history['loss'][-1], self.gradient_steps) #region actor training pred_values = self.critic.eval({self.critic.arguments[0]: obs}) advantage = reward - pred_values # actor_loss = c_action = C.input_variable(action.shape[-1], name='action') c_prediction = C.input_variable(old_prediction.shape[-1], name='old_prediction') c_advantage = C.input_variable(1, name='advantage') prob = C.reduce_sum(c_action * self.actor) old_prob = C.reduce_sum(c_action * c_prediction) ratio = prob / (old_prob + 1e-10) surr1 = c_advantage * ratio surr2 = c_advantage * C.clip(ratio, 1 - LOSS_CLIPPING, 1 + LOSS_CLIPPING) # loss = -C.reduce_mean(C.element_min(surr1, surr2) + ENTROPY_LOSS * -(prob * C.log(prob + 1e-10))) # from keras neglog_loss = -C.element_min(surr1, surr2) entropy_loss = -ENTROPY_LOSS * -(prob * C.log(prob + 1e-10)) # loss = -C.element_min(surr1, surr2) - ENTROPY_LOSS * -(prob * C.log(prob + 1e-10)) # from keras # loss = -C.element_min(surr1, surr2) + ENTROPY_LOSS * -(prob * C.log(prob + 1e-10)) # from pytorch ??? loss = C.reduce_mean(neglog_loss + entropy_loss) actor_loss = loss trainer = C.Trainer( actor_loss, (actor_loss, None), C.adam(actor_loss.parameters, C.learning_parameter_schedule_per_sample(LR), C.learning_parameter_schedule_per_sample(0.99))) avg = 0 avg_out = {neglog_loss.output: 0, entropy_loss.output: 0} for epoch in range(EPOCHS): data_size = action.shape[0] suffle_idx = random.sample(list(range(data_size)), data_size) mb_action = action[suffle_idx] mb_obs = obs[suffle_idx] mb_old_prediction = old_prediction[suffle_idx] mb_advantage = advantage[suffle_idx] updated, out = trainer.train_minibatch(dict( zip(actor_loss.arguments, [mb_advantage, mb_action, mb_obs, mb_old_prediction])), outputs=[ neglog_loss.output, entropy_loss.output ]) # print(trainer.previous_minibatch_loss_average) avg += trainer.previous_minibatch_loss_average avg_out[neglog_loss.output] += out[neglog_loss.output].mean() avg_out[entropy_loss.output] += out[entropy_loss.output].mean() #endregion self.writer.add_scalar('Actor loss', avg / EPOCHS, self.gradient_steps) self.writer.add_scalar('neglog loss', avg_out[neglog_loss.output] / EPOCHS, self.gradient_steps) self.writer.add_scalar('entropy loss', avg_out[entropy_loss.output] / EPOCHS, self.gradient_steps) #region critic training c_reward = C.input_variable(1, name='reward') loss = C.reduce_mean(C.square(self.critic - c_reward)) critic_loss = loss trainer = C.Trainer( critic_loss, (critic_loss, None), C.adam(critic_loss.parameters, C.learning_parameter_schedule_per_sample(LR), C.learning_parameter_schedule_per_sample(0.99))) avg = 0 for epoch in range(EPOCHS): data_size = action.shape[0] suffle_idx = random.sample(list(range(data_size)), data_size) mb_obs = obs[suffle_idx] mb_reward = reward[suffle_idx] trainer.train_minibatch( dict(zip(critic_loss.arguments, [mb_obs, mb_reward]))) # print(trainer.previous_minibatch_loss_average) avg += trainer.previous_minibatch_loss_average #endregion self.writer.add_scalar('Critic loss', avg / EPOCHS, self.gradient_steps) self.gradient_steps += 1