def huber_loss(y, y_hat, delta): """ Compute the Huber Loss as part of the model graph Huber Loss is more robust to outliers. It is defined as: if |y - y_hat| < delta : 0.5 * (y - y_hat)**2 else : delta * |y - y_hat| - 0.5 * delta**2 Attributes: y (Tensor[-1, 1]): Target value y_hat(Tensor[-1, 1]): Estimated value delta (float): Outliers threshold Returns: CNTK Graph Node """ half_delta_squared = 0.5 * delta * delta error = y - y_hat abs_error = abs(error) less_than = 0.5 * square(error) more_than = (delta * abs_error) - half_delta_squared loss_per_sample = element_select(less(abs_error, delta), less_than, more_than) return reduce_sum(loss_per_sample, name='loss')
def huber_loss(y_hat, y, delta): """ Compute the Huber Loss as part of the model graph Huber Loss is more robust to outliers. It is defined as: if |y - h_hat| < delta : 0.5 * (y - y_hat)**2 else : delta * |y - y_hat| - 0.5 * delta**2 :param y: Target value :param y_hat: Estimated value :param delta: Outliers threshold :return: float """ half_delta_squared = 0.5 * delta * delta error = y - y_hat abs_error = abs(error) less_than = 0.5 * square(error) more_than = (delta * abs_error) - half_delta_squared loss_per_sample = element_select(less(abs_error, delta), less_than, more_than) return reduce_sum(loss_per_sample, name='loss')
def huber_loss(output, target): r"""See https://en.wikipedia.org/wiki/Huber_loss for definition. \delta is set to 1. This is not the right definition if output and target differ in more than one dimension. """ a = target - output return C.reduce_sum(C.element_select( C.less(C.abs(a), 1), C.square(a) * 0.5, C.abs(a) - 0.5))
def huber_loss(y, y_hat, delta): half_delta_squared = 0.5 * delta * delta error = y - y_hat abs_error = abs(error) less_than = 0.5 * square(error) more_than = (delta * abs_error) - half_delta_squared loss_per_sample = element_select(less(abs_error, delta), less_than, more_than) return reduce_sum(loss_per_sample, name='loss')
def rmse(y, y_hat, axis=0): """ Compute the Root Mean Squared error as part of the model graph :param y: CNTK Variable holding the true value of Y :param y_hat: CNTK variable holding the estimated value of Y :param axis: The axis over which to compute the mean, 0 by default :return: Root Mean Squared error """ return sqrt(reduce_mean(square(y_hat - y), axis=axis))
def huber_loss(output, target): r"""See https://en.wikipedia.org/wiki/Huber_loss for definition. \delta is set to 1. This is not the right definition if output and target differ in more than one dimension. """ a = target - output return C.reduce_sum( C.element_select(C.less(C.abs(a), 1), C.square(a) * 0.5, C.abs(a) - 0.5))
def __init__(self, state_dim, action_dim, gamma=0.99, learning_rate=1e-4, momentum=0.95): self.state_dim = state_dim self.action_dim = action_dim self.gamma = gamma with default_options(activation=relu, init=he_uniform()): # Convolution filter counts were halved to save on memory, no gpu :( self.model = Sequential([ Convolution2D((8, 8), 16, strides=4, name='conv1'), Convolution2D((4, 4), 32, strides=2, name='conv2'), Convolution2D((3, 3), 32, strides=1, name='conv3'), Dense(256, init=he_uniform(scale=0.01), name='dense1'), Dense(action_dim, activation=None, init=he_uniform(scale=0.01), name='actions') ]) self.model.update_signature(Tensor[state_dim]) # Create the target model as a copy of the online model self.target_model = None self.update_target() self.pre_states = input_variable(state_dim, name='pre_states') self.actions = input_variable(action_dim, name='actions') self.post_states = input_variable(state_dim, name='post_states') self.rewards = input_variable((), name='rewards') self.terminals = input_variable((), name='terminals') self.is_weights = input_variable((), name='is_weights') predicted_q = reduce_sum(self.model(self.pre_states) * self.actions, axis=0) # DQN - calculate target q values # post_q = reduce_max(self.target_model(self.post_states), axis=0) # DDQN - calculate target q values online_selection = one_hot( argmax(self.model(self.post_states), axis=0), self.action_dim) post_q = reduce_sum(self.target_model(self.post_states) * online_selection, axis=0) post_q = (1.0 - self.terminals) * post_q target_q = stop_gradient(self.rewards + self.gamma * post_q) # Huber loss delta = 1.0 self.td_error = minus(predicted_q, target_q, name='td_error') abs_error = abs(self.td_error) errors = element_select(less(abs_error, delta), square(self.td_error) * 0.5, delta * (abs_error - 0.5 * delta)) loss = errors * self.is_weights # Adam based SGD lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) m_scheule = momentum_schedule(momentum) vm_schedule = momentum_schedule(0.999) self._learner = adam(self.model.parameters, lr_schedule, m_scheule, variance_momentum=vm_schedule) self.writer = TensorBoardProgressWriter(log_dir='metrics', model=self.model) self.trainer = Trainer(self.model, (loss, None), [self._learner], self.writer)