示例#1
0
    def _compute_losses(self, model_output):
        mask = self.dataset.symb_mask

        # regression_outputs.shape = (batch_size, seq_length, out_dim)
        stopping_criteria_outputs = model_output[0][:, :, 0]
        regression_outputs = model_output[1]

        regression_targets = self.dataset.symb_targets[:, :, :3]
        stopping_criteria_targets = self.dataset.symb_targets[:, :, 3]

        if self.normalize_output:
            regression_outputs /= l2distance(regression_outputs,
                                             keepdims=True,
                                             eps=self.eps)

        self.samples = regression_outputs

        l2_loss_per_time_step = l2distance(self.samples,
                                           regression_targets,
                                           eps=self.eps)
        stopping_cross_entropy_per_time_step = T.nnet.binary_crossentropy(
            stopping_criteria_outputs, stopping_criteria_targets)

        # loss_per_time_step.shape = (batch_size, seq_len)
        self.loss_per_time_step = l2_loss_per_time_step + stopping_cross_entropy_per_time_step
        # loss_per_seq.shape = (batch_size,)
        self.loss_per_seq = T.sum(self.loss_per_time_step * mask, axis=1)

        if not self.sum_over_timestep:
            self.loss_per_seq /= T.sum(mask, axis=1)

        return self.loss_per_seq
示例#2
0
    def _compute_losses(self, model_output):
        # model_output.shape : (batch_size, seq_len, K, M, target_size)
        # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims)

        # mask.shape : (batch_size, seq_len) or None
        mask = self.dataset.symb_mask

        # mu.shape = (batch_size, seq_len, K, M, target_dims)
        mu = model_output[:, :, :, :, 0:3]

        # sigma.shape = (batch_size, seq_len, K, M, target_dims)
        sigma = model_output[:, :, :, :, 3:6]

        # Stack K targets for each input (sliding window style)
        # targets.shape = (batch_size, seq_len, K, target_dims)
        targets = T.stack(
            [self.dataset.symb_targets[:, i : (-self.model.k + i + 1) or None] for i in range(self.model.k)], axis=2
        )

        # Add new axis for sum over M
        # targets.shape = (batch_size, seq_len, K, 1, target_dims)
        targets = targets[:, :, :, None, :]

        # For monitoring the L2 error of using $mu$ as the predicted direction (should be comparable to MICCAI's work).
        normalized_mu = mu[:, :, 0, 0] / l2distance(mu[:, :, 0, 0], keepdims=True, eps=1e-8)
        normalized_targets = targets[:, :, 0, 0] / l2distance(targets[:, :, 0, 0], keepdims=True, eps=1e-8)
        self.L2_error_per_item = T.sqrt(T.sum(((normalized_mu - normalized_targets) ** 2), axis=2))
        if mask is not None:
            self.mean_sqr_error = T.sum(self.L2_error_per_item * mask, axis=1) / T.sum(mask, axis=1)
        else:
            self.mean_sqr_error = T.mean(self.L2_error_per_item, axis=1)

        # Likelihood of multivariate gaussian (n dimensions) is :
        # ((2 \pi)^D |\Sigma|)^{-1/2} exp(-1/2 (x - \mu)^T \Sigma^-1 (x - \mu))
        # We suppose a diagonal covariance matrix, so we have :
        #   => |\Sigma| = \prod_n \sigma_n^2
        #   => (x - \mu)^T \Sigma^-1 (x - \mu) = \sum_n ((x_n - \mu_n) / \sigma_n)^2
        m_log_likelihoods = -np.float32((self.target_dims / 2.0) * np.log(2 * np.pi)) + T.sum(
            -T.log(sigma) - 0.5 * T.sqr((targets - mu) / sigma), axis=4
        )

        # k_losses_per_timestep.shape : (batch_size, seq_len, K)
        self.k_losses_per_timestep = T.log(self.m) - logsumexp(m_log_likelihoods, axis=3, keepdims=False)

        # loss_per_timestep.shape : (batch_size, seq_len)
        self.loss_per_time_step = T.mean(self.k_losses_per_timestep, axis=2)

        # Average over sequence steps.
        # k_nlls_per_seq.shape :(batch_size, K)
        if mask is not None:
            self.k_losses_per_seq = T.sum(self.k_losses_per_timestep * mask[:, :, None], axis=1) / T.sum(
                mask, axis=1, keepdims=True
            )
        else:
            self.k_losses_per_seq = T.mean(self.k_losses_per_timestep, axis=1)

        # Average over K
        # loss_per_seq.shape :(batch_size,)
        self.loss_per_seq = T.mean(self.k_losses_per_seq, axis=1)
        return self.loss_per_seq
    def _compute_losses(self, model_output):
        mask = self.dataset.symb_mask
        regression_outputs, stopping = model_output

        # regression_outputs.shape = (batch_size, seq_length, out_dim)
        regression_outputs = model_output
        if self.normalize_output:
            regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=1e-8)

        # Regression part (next direction)
        # L2_errors_per_time_step.shape = (batch_size,)
        self.L2_errors_per_time_step = l2distance(regression_outputs, self.dataset.symb_targets)
        # avg_L2_error_per_seq.shape = (batch_size,)
        self.avg_L2_error_per_seq = T.sum(self.L2_errors_per_time_step*mask, axis=1) / T.sum(mask, axis=1)

        # Binary classification part (stopping criterion)
        lengths = T.sum(mask, axis=1)
        lengths_int = T.cast(lengths, dtype="int32")  # Mask values are floats.
        idx_examples = T.arange(mask.shape[0])
        # Create a mask that does not contain the last element of each sequence.
        smaller_mask = T.set_subtensor(mask[idx_examples, lengths_int-1], 0)

        # Compute cross-entropy for non-ending points.
        target = T.zeros(1)
        cross_entropy_not_ending = T.sum(T.nnet.binary_crossentropy(stopping, target)*smaller_mask[:, :, None], axis=[1, 2])

        # Compute cross-entropy for ending points.
        # We add a scaling factor because there is only one ending point per sequence whereas
        # there multiple non-ending points.
        target = T.ones(1)
        cross_entropy_ending = T.nnet.binary_crossentropy(stopping[idx_examples, lengths_int-1, 0], target) * (lengths-1)
        self.cross_entropy = (cross_entropy_not_ending + cross_entropy_ending) / lengths

        return self.avg_L2_error_per_seq + self.cross_entropy
示例#4
0
    def _compute_losses(self, model_output):
        # regression_outputs.shape = (batch_size, out_dim)
        regression_outputs = model_output
        if self.normalize_output:
            regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps)

        self.samples = regression_outputs

        # loss_per_time_step.shape = (batch_size,)
        self.loss_per_time_step = l2distance(self.samples, self.dataset.symb_targets)

        return self.loss_per_time_step
示例#5
0
    def _compute_losses(self, model_output):
        # regression_outputs.shape = (batch_size, out_dim)
        regression_outputs = model_output
        if self.normalize_output:
            regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps)

        self.samples = regression_outputs

        # loss_per_time_step.shape = (batch_size,)
        self.loss_per_time_step = T.min(
            T.stack([l2distance(self.samples, self.dataset.symb_targets), l2distance(self.samples, -self.dataset.symb_targets)], axis=1), axis=1)

        return self.loss_per_time_step
示例#6
0
    def _compute_losses(self, model_output):
        mask = self.dataset.symb_mask

        # regression_outputs.shape = (batch_size, seq_length, regression_layer_size)
        regression_outputs = model_output

        # mixture_weights.shape : (batch_size, seq_len, n_gaussians)
        # means.shape : (batch_size, seq_len, n_gaussians, 3)
        mixture_weights, means, stds = self.model.get_mixture_parameters(
            regression_outputs, ndim=4)
        maximum_component_ids = T.argmax(mixture_weights, axis=2)

        # samples.shape : (batch_size, seq_len, 3)
        self.samples = means[(T.arange(mixture_weights.shape[0])[:, None]),
                             (T.arange(mixture_weights.shape[1])[None, :]),
                             maximum_component_ids]

        # loss_per_time_step.shape = (batch_size, seq_len)
        self.loss_per_time_step = l2distance(self.samples,
                                             self.dataset.symb_targets)
        # loss_per_seq.shape = (batch_size,)
        self.loss_per_seq = T.sum(self.loss_per_time_step * mask,
                                  axis=1) / T.sum(mask, axis=1)

        return self.loss_per_seq
示例#7
0
    def fprop(self, X):
        out = T.dot(X, self.W) + self.b
        # Normalize the output vector.
        if self.normed:
            out /= l2distance(out, keepdims=True, eps=1e-8)

        return out
示例#8
0
    def _compute_losses(self, model_output):
        mask = self.dataset.symb_mask

        # regression_outputs.shape = (batch_size, seq_length, out_dim)
        regression_outputs = model_output
        if self.normalize_output:
            regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps)

        self.samples = regression_outputs

        # loss_per_time_step.shape = (batch_size, seq_len)
        self.loss_per_time_step = l2distance(self.samples, self.dataset.symb_targets)
        # loss_per_seq.shape = (batch_size,)
        self.loss_per_seq = T.sum(self.loss_per_time_step*mask, axis=1) / T.sum(mask, axis=1)

        return self.loss_per_seq
    def _compute_losses(self, model_output):
        mask = self.dataset.symb_mask
        regression_outputs, stopping = model_output

        # regression_outputs.shape = (batch_size, seq_length, out_dim)
        regression_outputs = model_output
        if self.normalize_output:
            regression_outputs /= l2distance(regression_outputs,
                                             keepdims=True,
                                             eps=1e-8)

        # Regression part (next direction)
        # L2_errors_per_time_step.shape = (batch_size,)
        self.L2_errors_per_time_step = l2distance(regression_outputs,
                                                  self.dataset.symb_targets)
        # avg_L2_error_per_seq.shape = (batch_size,)
        self.avg_L2_error_per_seq = T.sum(self.L2_errors_per_time_step * mask,
                                          axis=1) / T.sum(mask, axis=1)

        # Binary classification part (stopping criterion)
        lengths = T.sum(mask, axis=1)
        lengths_int = T.cast(lengths, dtype="int32")  # Mask values are floats.
        idx_examples = T.arange(mask.shape[0])
        # Create a mask that does not contain the last element of each sequence.
        smaller_mask = T.set_subtensor(mask[idx_examples, lengths_int - 1], 0)

        # Compute cross-entropy for non-ending points.
        target = T.zeros(1)
        cross_entropy_not_ending = T.sum(
            T.nnet.binary_crossentropy(stopping, target) *
            smaller_mask[:, :, None],
            axis=[1, 2])

        # Compute cross-entropy for ending points.
        # We add a scaling factor because there is only one ending point per sequence whereas
        # there multiple non-ending points.
        target = T.ones(1)
        cross_entropy_ending = T.nnet.binary_crossentropy(
            stopping[idx_examples, lengths_int - 1, 0], target) * (lengths - 1)
        self.cross_entropy = (cross_entropy_not_ending +
                              cross_entropy_ending) / lengths

        return self.avg_L2_error_per_seq + self.cross_entropy
示例#10
0
    def fprop(self, X, dropout_W=None):
        # dropout_W is a row vector of inputs to be dropped
        W = self.W
        if dropout_W:
            W *= dropout_W[:, None]
        out = T.dot(X, W) + self.b
        # Normalize the output vector.
        if self.normed:
            out /= l2distance(out, keepdims=True, eps=1e-8)

        return out
示例#11
0
    def _compute_losses(self, model_output):
        # regression_outputs.shape = (batch_size, out_dim)
        regression_outputs = model_output
        if self.normalize_output:
            regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps)

        self.samples = regression_outputs

        # Maximize squared cosine similarity = minimize -cos**2
        # loss_per_time_step.shape = (batch_size,)
        self.loss_per_time_step = -T.square(T.sum(self.samples*self.dataset.symb_targets, axis=1))

        return self.loss_per_time_step
示例#12
0
    def _compute_losses(self, model_output):
        # regression_outputs.shape = (batch_size, out_dim)
        regression_outputs = model_output
        if self.normalize_output:
            regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps)

        self.samples = regression_outputs

        # Maximize squared cosine similarity = minimize -cos**2
        # loss_per_time_step.shape = (batch_size,)
        self.loss_per_time_step = -T.square(T.sum(self.samples*self.dataset.symb_targets, axis=1))

        return self.loss_per_time_step
示例#13
0
    def _compute_losses(self, model_output):
        mask = self.dataset.symb_mask

        # regression_outputs.shape = (batch_size, seq_length, out_dim)
        regression_outputs = model_output
        if self.normalize_output:
            regression_outputs /= l2distance(regression_outputs,
                                             keepdims=True,
                                             eps=self.eps)

        self.samples = regression_outputs

        # loss_per_time_step.shape = (batch_size, seq_len)
        self.loss_per_time_step = l2distance(self.samples,
                                             self.dataset.symb_targets,
                                             eps=self.eps)
        # loss_per_seq.shape = (batch_size,)
        self.loss_per_seq = T.sum(self.loss_per_time_step * mask, axis=1)

        if not self.sum_over_timestep:
            self.loss_per_seq /= T.sum(mask, axis=1)

        return self.loss_per_seq
示例#14
0
    def fprop(self, X, dropout_W=None):
        # dropout_W is a row vector of inputs to be dropped
        W = self.W
        if dropout_W:
            W *= dropout_W[:, None]
        units_inputs = T.dot(X, W)

        mean = T.mean(units_inputs, axis=1, keepdims=True)
        std = T.std(units_inputs, axis=1, keepdims=True)

        units_inputs_normalized = (units_inputs - mean) / (std + self.eps)

        out = self.g * units_inputs_normalized + self.b

        # Normalize the output vector.
        if self.normed:
            out /= l2distance(out, keepdims=True, eps=1e-8)

        return out
示例#15
0
    def _compute_losses(self, model_output):
        # model_output.shape : (batch_size, seq_len, K, M, target_size)
        # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims)

        # targets.shape = (batch_size, seq_len, 3)
        targets = self.dataset.symb_targets[:, : -self.model.k + 1 or None, :]

        # mask.shape : (batch_size, seq_len)
        mask = self.dataset.symb_mask

        # samples.shape : (batch_size, seq_len, 3)
        # T.squeeze(.) should remove the K=1 and M=1 dimensions
        self.samples = self.model.get_max_component_samples(T.squeeze(model_output))

        # loss_per_time_step.shape = (batch_size, seq_len)
        self.loss_per_time_step = l2distance(self.samples, targets)
        # loss_per_seq.shape = (batch_size,)
        self.loss_per_seq = T.sum(self.loss_per_time_step * mask, axis=1) / T.sum(mask, axis=1)

        return self.loss_per_seq
示例#16
0
    def _compute_losses(self, model_output):
        mask = self.dataset.symb_mask

        # regression_outputs.shape = (batch_size, seq_length, regression_layer_size)
        regression_outputs = model_output

        mixture_weights, means, stds = self.model.get_mixture_parameters(regression_outputs, ndim=4)

        # mixture_weights.shape : (batch_size, seq_len, n_gaussians)
        # means.shape : (batch_size, seq_len, n_gaussians, 3)

        # samples.shape : (batch_size, seq_len, 3)
        self.samples = T.sum(mixture_weights[:, :, :, None] * means, axis=2)

        # loss_per_time_step.shape = (batch_size, seq_len)
        self.loss_per_time_step = l2distance(self.samples, self.dataset.symb_targets)
        # loss_per_seq.shape = (batch_size,)
        self.loss_per_seq = T.sum(self.loss_per_time_step*mask, axis=1) / T.sum(mask, axis=1)

        return self.loss_per_seq
示例#17
0
    def _compute_losses(self, model_output):
        # model_output.shape : (batch_size, seq_len, K, M, target_size)
        # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims)

        # targets.shape = (batch_size, seq_len, 3)
        targets = self.dataset.symb_targets[:, :-self.model.k + 1 or None, :]

        # mask.shape : (batch_size, seq_len)
        mask = self.dataset.symb_mask

        # samples.shape : (batch_size, seq_len, 3)
        # T.squeeze(.) should remove the K=1 and M=1 dimensions
        self.samples = self.model.get_max_component_samples(
            T.squeeze(model_output))

        # loss_per_time_step.shape = (batch_size, seq_len)
        self.loss_per_time_step = l2distance(self.samples, targets)
        # loss_per_seq.shape = (batch_size,)
        self.loss_per_seq = T.sum(self.loss_per_time_step * mask,
                                  axis=1) / T.sum(mask, axis=1)

        return self.loss_per_seq
示例#18
0
    def _compute_losses(self, model_output):
        mask = self.dataset.symb_mask

        # regression_outputs.shape = (batch_size, seq_length, regression_layer_size)
        regression_outputs = model_output

        # mu.shape : (batch_size, seq_len, 3)
        # sigma.shape : (batch_size, seq_len, 3)
        mu, sigma = self.model.get_distribution_parameters(regression_outputs)

        # targets.shape : (batch_size, seq_len, 3)
        targets = self.dataset.symb_targets

        # samples.shape : (batch_size, seq_len, 3)
        self.samples = mu

        # loss_per_time_step.shape = (batch_size, seq_len)
        self.loss_per_time_step = l2distance(self.samples, targets)
        # loss_per_seq.shape = (batch_size,)
        self.loss_per_seq = T.sum(self.loss_per_time_step * mask,
                                  axis=1) / T.sum(mask, axis=1)

        return self.loss_per_seq
示例#19
0
    def _compute_losses(self, model_output):
        # model_output.shape : (batch_size, seq_len, K, M, target_size)
        # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims)

        # mask.shape : (batch_size, seq_len) or None
        mask = self.dataset.symb_mask

        # mu.shape = (batch_size, seq_len, K, M, target_dims)
        mu = model_output[:, :, :, :, 0:3]

        # sigma.shape = (batch_size, seq_len, K, M, target_dims)
        sigma = model_output[:, :, :, :, 3:6]

        # Stack K targets for each input (sliding window style)
        # targets.shape = (batch_size, seq_len, K, target_dims)
        targets = T.stack([
            self.dataset.symb_targets[:, i:(-self.model.k + i + 1) or None]
            for i in range(self.model.k)
        ],
                          axis=2)

        # Add new axis for sum over M
        # targets.shape = (batch_size, seq_len, K, 1, target_dims)
        targets = targets[:, :, :, None, :]

        # For monitoring the L2 error of using $mu$ as the predicted direction (should be comparable to MICCAI's work).
        normalized_mu = mu[:, :, 0, 0] / l2distance(
            mu[:, :, 0, 0], keepdims=True, eps=1e-8)
        normalized_targets = targets[:, :, 0, 0] / l2distance(
            targets[:, :, 0, 0], keepdims=True, eps=1e-8)
        self.L2_error_per_item = T.sqrt(
            T.sum(((normalized_mu - normalized_targets)**2), axis=2))
        if mask is not None:
            self.mean_sqr_error = T.sum(self.L2_error_per_item * mask,
                                        axis=1) / T.sum(mask, axis=1)
        else:
            self.mean_sqr_error = T.mean(self.L2_error_per_item, axis=1)

        # Likelihood of multivariate gaussian (n dimensions) is :
        # ((2 \pi)^D |\Sigma|)^{-1/2} exp(-1/2 (x - \mu)^T \Sigma^-1 (x - \mu))
        # We suppose a diagonal covariance matrix, so we have :
        #   => |\Sigma| = \prod_n \sigma_n^2
        #   => (x - \mu)^T \Sigma^-1 (x - \mu) = \sum_n ((x_n - \mu_n) / \sigma_n)^2
        m_log_likelihoods = -np.float32(
            (self.target_dims / 2.) * np.log(2 * np.pi)) + T.sum(
                -T.log(sigma) - 0.5 * T.sqr((targets - mu) / sigma), axis=4)

        # k_losses_per_timestep.shape : (batch_size, seq_len, K)
        self.k_losses_per_timestep = T.log(self.m) - logsumexp(
            m_log_likelihoods, axis=3, keepdims=False)

        # loss_per_timestep.shape : (batch_size, seq_len)
        self.loss_per_time_step = T.mean(self.k_losses_per_timestep, axis=2)

        # Average over sequence steps.
        # k_nlls_per_seq.shape :(batch_size, K)
        if mask is not None:
            self.k_losses_per_seq = T.sum(
                self.k_losses_per_timestep * mask[:, :, None], axis=1) / T.sum(
                    mask, axis=1, keepdims=True)
        else:
            self.k_losses_per_seq = T.mean(self.k_losses_per_timestep, axis=1)

        # Average over K
        # loss_per_seq.shape :(batch_size,)
        self.loss_per_seq = T.mean(self.k_losses_per_seq, axis=1)
        return self.loss_per_seq