Exemplo n.º 1
0
    def __init__(self,
                 inputs,
                 targets=None,
                 name="dataset",
                 keep_on_cpu=False):
        """
        Parameters
        ----------
        inputs : list of ndarray
            Training examples (can be variable length sequences).
        targets : ndarray (optional)
            Target for each training example (can be variable length sequences).
        name : str (optional)
            The name of the dataset is used to name Theano variables. Default: 'dataset'.
        """
        self.keep_on_cpu = keep_on_cpu
        self.name = name
        self.inputs = inputs
        self.targets = targets

        self.symb_inputs = T.TensorVariable(type=T.TensorType(
            "floatX", [False] * (inputs[0].ndim + 1)),
                                            name=self.name + '_symb_inputs')
        self.symb_inputs.tag.test_value = inputs[0][
            None, ...]  # For debugging Theano graphs.

        self.symb_targets = None
        if self.has_targets:
            self.symb_targets = T.TensorVariable(
                type=T.TensorType("floatX", [False] * (targets[0].ndim + 1)),
                name=self.name + '_symb_targets')
            self.symb_targets.tag.test_value = targets[0][
                None, ...]  # For debugging Theano graphs.
Exemplo n.º 2
0
    def __init__(self, inputs, targets=None, name="dataset"):
        """
        Parameters
        ----------
        inputs : ndarray
            Training examples
        targets : ndarray (optional)
            Target for each training example.
        name : str (optional)
            The name of the dataset is used to name Theano variables. Default: 'dataset'.
        """
        self.name = name
        self.inputs = inputs
        self.targets = targets
        self.symb_inputs = T.TensorVariable(type=T.TensorType(
            "floatX", [False] * self.inputs.ndim),
                                            name=self.name + '_symb_inputs')
        self.symb_inputs.tag.test_value = self.inputs.get_value(
        )  # For debugging Theano graphs.

        self.symb_targets = None
        if self.has_targets:
            self.symb_targets = T.TensorVariable(
                type=T.TensorType("floatX", [False] * self.targets.ndim),
                name=self.name + '_symb_targets')
            self.symb_targets.tag.test_value = self.targets.get_value(
            )  # For debugging Theano graphs.
Exemplo n.º 3
0
    def expr(self, model, data):

        weights = model.get_weights()
        error = weights - weights.mean()
        kurtosis = weights.shape[0] * weights.shape[1] * N.sum(
            N.power(error, 4)) / N.power(N.sum(N.power(error, 2)), 2) - 3
        return T.TensorVariable(kurtosis)
Exemplo n.º 4
0
 def test_tensorvariable(self):
     ## Re-init counter
     Variable.__count__ = count(0)
     r1 = tensor.TensorType(dtype='int32', broadcastable=())('myvar')
     r2 = tensor.TensorVariable(
         tensor.TensorType(dtype='int32', broadcastable=()))
     r3 = shared(numpy.random.randn(3, 4))
     assert r1.auto_name == "auto_0"
     assert r2.auto_name == "auto_1"
     assert r3.auto_name == "auto_2"
Exemplo n.º 5
0
 def test_tensorvariable(self):
     ## Get counter value
     autoname_id = next(Variable.__count__)
     Variable.__count__ = count(autoname_id)
     r1 = tensor.TensorType(dtype='int32', broadcastable=())('myvar')
     r2 = tensor.TensorVariable(
         tensor.TensorType(dtype='int32', broadcastable=()))
     r3 = shared(numpy.random.randn(3, 4))
     assert r1.auto_name == "auto_" + str(autoname_id)
     assert r2.auto_name == "auto_" + str(autoname_id + 1)
     assert r3.auto_name == "auto_" + str(autoname_id + 2)
Exemplo n.º 6
0
    def __init__(self, dataset, batch_size, seed=1234):
        """
        Parameters
        ----------
        dataset : :class:`MaskClassifierDataset`
            Dataset from which to get the examples.
        batch_size : int
            Nb. of examples per batch.
        seed : int, optional
            Seed for the random generator when shuffling streamlines or adding noise to the streamlines.
        """
        self.dataset = dataset
        self.batch_size = batch_size

        self.indices = np.arange(len(self.dataset))

        self.seed = seed
        self.rng = np.random.RandomState(self.seed)

        # Shared variables
        self._shared_batch_inputs = sharedX(np.ndarray((0, 0)))
        self._shared_batch_targets = sharedX(np.ndarray((0, )))

        # Test value
        batch_inputs, batch_targets = self._next_batch(0)

        # Redefine symbolic variables for single input model
        self.dataset.symb_inputs = T.TensorVariable(
            type=T.TensorType("floatX", [False] * batch_inputs.ndim),
            name=self.dataset.name + '_symb_inputs')
        self.dataset.symb_inputs.tag.test_value = batch_inputs

        # Since this batch scheduler creates its own targets.
        if self.dataset.symb_targets is None:
            self.dataset.symb_targets = T.TensorVariable(
                type=T.TensorType("floatX", [False] * batch_targets.ndim),
                name=self.dataset.name + '_symb_targets')

        self.dataset.symb_targets.tag.test_value = batch_targets
Exemplo n.º 7
0
    def __init__(self,
                 dataset,
                 batch_size,
                 use_mask_as_input=False,
                 keep_mask=False,
                 seed=1234):
        """
        Parameters
        ----------
        dataset : `SequenceDataset` object
            Dataset of datasets (one for each bundle).
        batch_size : int
            Number of examples per batch. *Must be greater than the number of
            bundles in `bundles_dataset`.*
        seed : int (optional)
            Seed of the random numbers generator used to sample a different
            regressive mask for each example.
        """
        super().__init__(dataset, batch_size)

        self.use_mask_as_input = use_mask_as_input
        self.seed = seed
        self.rng = np.random.RandomState(self.seed)
        self.keep_mask = keep_mask

        # Allocate memory for the autoregressive mask.
        self.mask_shape = (len(dataset), ) + self.dataset.input_shape
        self._shared_mask_o_lt_d = sharedX(np.zeros(self.mask_shape),
                                           name='autoregressive_mask',
                                           keep_on_cpu=True)

        # Add a new attribute: a symbolic variable representing the auto regressive mask.
        self._shared_mask_o_lt_d.set_value(self.generate_autoregressive_mask())
        self.dataset.mask_o_lt_d = T.TensorVariable(
            type=T.TensorType("floatX", [False] * dataset.inputs.ndim),
            name=dataset.name + '_symb_mask')

        # Keep only `batch_size` masks as test values.
        self.dataset.mask_o_lt_d.tag.test_value = self._shared_mask_o_lt_d.get_value(
        )[:batch_size]  # For debugging Theano graphs.

        if self.use_mask_as_input:
            self.dataset.symb_inputs.tag.test_value = np.concatenate([
                self.dataset.symb_inputs.tag.test_value *
                self.dataset.mask_o_lt_d.tag.test_value,
                self.dataset.mask_o_lt_d.tag.test_value
            ],
                                                                     axis=1)
Exemplo n.º 8
0
    def __init__(self,
                 dataset,
                 batch_size,
                 k,
                 noisy_streamlines_sigma=None,
                 nb_updates_per_epoch=None,
                 seed=1234,
                 include_last_point=False):
        self.dataset = dataset
        self.batch_size = batch_size

        self.k = k

        self.include_last_point = include_last_point

        self.use_augment_by_flipping = True

        self._nb_updates_per_epoch = nb_updates_per_epoch
        self.use_sample_from_bundle = self._nb_updates_per_epoch is not None

        self.noisy_streamlines_sigma = noisy_streamlines_sigma
        self.use_noisy_streamlines = self.noisy_streamlines_sigma is not None

        self.seed = seed
        self.rng = np.random.RandomState(self.seed)
        self.rng_noise = np.random.RandomState(self.seed + 1)

        # No need for a mask since streamlines are going to be resampled.
        self.dataset.symb_mask = None

        # Shared variables
        self._shared_batch_inputs = sharedX(np.ndarray((0, 0, 0)))
        self._shared_batch_targets = sharedX(np.ndarray((0, 0, 0, 0)))

        # Test value
        batch_inputs, batch_targets = self._next_batch(0)
        self.dataset.symb_inputs.tag.test_value = batch_inputs

        # Since this batch scheduler creates its own targets.
        if self.dataset.symb_targets is None:
            self.dataset.symb_targets = T.TensorVariable(
                type=T.TensorType("floatX", [False] * batch_targets.ndim),
                name=self.dataset.name + '_symb_targets')

        self.dataset.symb_targets.tag.test_value = batch_targets
Exemplo n.º 9
0
 def __init__(self,
              inputs,
              targets=None,
              name="dataset",
              keep_on_cpu=False):
     """
     Parameters
     ----------
     inputs : list of ndarray
         Training examples (can be variable length sequences).
     targets : ndarray (optional)
         Target for each training example (can be variable length sequences).
     name : str (optional)
         The name of the dataset is used to name Theano variables. Default: 'dataset'.
     """
     super().__init__(inputs, targets, name, keep_on_cpu)
     self.symb_mask = T.TensorVariable(type=T.TensorType(
         "floatX", [False] * inputs[0].ndim),
                                       name=self.name + '_symb_mask')
     self.symb_mask.tag.test_value = (
         inputs[0][:, 0] >
         0.5).astype(floatX)[None, ...]  # For debugging Theano graphs.
Exemplo n.º 10
0
def test_th_matmul():
    vlist = []
    flist = []
    ndlist = []
    for i in range(2, 30):
        dims = int(np.random.random() * 4 + 2)

        # Create a tuple of tensors with potentially different broadcastability.
        vs = tuple(
            tt.TensorVariable(
                tt.TensorType(
                    'float64',
                    tuple((p < .3) for p in np.random.ranf(dims - 2))
                    # Make full matrices
                    + (False, False))) for _ in range(2))
        vs = tuple(
            tt.swapaxes(v, -2, -1) if j % 2 == 0 else v
            for j, v in enumerate(vs))

        f = th.function([*vs], [matmul(*vs)])

        # Create the default shape for the test ndarrays
        defshape = tuple(int(np.random.random() * 5 + 1) for _ in range(dims))
        # Create a test array matching the broadcastability of each v, for each v.
        nds = tuple(
            np.random.ranf(
                tuple(s if not v.broadcastable[j] else 1
                      for j, s in enumerate(defshape))) for v in vs)
        nds = tuple(
            np.swapaxes(nd, -2, -1) if j % 2 == 0 else nd
            for j, nd in enumerate(nds))

        ndlist.append(nds)
        vlist.append(vs)
        flist.append(f)

    for i in range(len(ndlist)):
        assert np.allclose(flist[i](*ndlist[i]), np.matmul(*ndlist[i]))
Exemplo n.º 11
0
    def fit(self, X, bounds=None, constraints=None, use_gradient=True,
            **kwargs):
        # Map parameters to placeholders
        param_to_placeholder = []
        param_to_index = {}

        for i, v in enumerate(self.parameters_):
            w = T.TensorVariable(v.type)
            param_to_placeholder.append((v, w))
            param_to_index[v] = i

        # Build bounds
        mapped_bounds = None

        if bounds is not None:
            mapped_bounds = [(None, None) for v in param_to_placeholder]

            for b in bounds:
                mapped_bounds[param_to_index[b["param"]]] = b["bounds"]

        # Build constraints
        mapped_constraints = None

        if constraints is not None:
            mapped_constraints = []

            for c in constraints:
                args = c["param"]
                if isinstance(args, SharedVariable):
                    args = (args, )

                m_c = {
                    "type": c["type"],
                    "fun": lambda x: c["fun"](*[x[param_to_index[a]]
                                                for a in args])
                }

                if "jac" in c:
                    m_c["jac"] = lambda x: c["jac"](*[x[param_to_index[a]]
                                                      for a in args])

                mapped_constraints.append(m_c)

        # Derive objective and gradient
        objective_ = theano.function(
            [self.X] + [w for _, w in param_to_placeholder] +
            [theano.In(v, name=v.name) for v in self.observeds_],
            T.sum(self.nnlf_),
            givens=param_to_placeholder,
            allow_input_downcast=True)

        def objective(x):
            return objective_(X, *x, **kwargs) / len(X)

        if use_gradient:
            gradient_ = theano.function(
                [self.X] + [w for _, w in param_to_placeholder] +
                [theano.In(v, name=v.name) for v in self.observeds_],
                theano.grad(T.sum(self.nnlf_),
                            [v for v, _ in param_to_placeholder]),
                givens=param_to_placeholder,
                allow_input_downcast=True)

            def gradient(x):
                return np.array(gradient_(X, *x, **kwargs)) / len(X)

        # Solve!
        x0 = np.array([v.get_value() for v, _ in param_to_placeholder])
        r = minimize(objective,
                     jac=gradient if use_gradient else None,
                     x0=x0,
                     method=self.optimizer,
                     bounds=mapped_bounds,
                     constraints=mapped_constraints)

        if r.success:
            # Assign the solution
            for i, value in enumerate(r.x):
                param_to_placeholder[i][0].set_value(value)

        else:
            print("Parameter fitting failed!")
            print(r)

        return self
Exemplo n.º 12
0
    def fit(self,
            X,
            bounds=None,
            constraints=None,
            use_gradient=True,
            optimizer=None,
            **kwargs):
        """Fit the distribution parameters to data by minimizing the negative
        log-likelihood of the data.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            The samples.

        * `bounds` [list of (parameter, (low, high))]:
            The parameter bounds.

        * `constraints`:
            The constraints on the parameters.

        * `use_gradient` [boolean, default=True]:
            Whether to use exact gradients (if `True`) or numerical gradients
            (if `False`).

        * `optimizer` [string]:
            The optimization method.

        Returns
        -------
        * `self` [object]:
            `self`.
        """
        # Map parameters to placeholders
        param_to_placeholder = []
        param_to_index = {}

        for i, v in enumerate(self.parameters_):
            w = T.TensorVariable(v.type)
            param_to_placeholder.append((v, w))
            param_to_index[v] = i

        # Build bounds
        mapped_bounds = None

        if bounds is not None:
            mapped_bounds = [(None, None) for v in param_to_placeholder]

            for b in bounds:
                mapped_bounds[param_to_index[b["param"]]] = b["bounds"]

        # Build constraints
        mapped_constraints = None

        if constraints is not None:
            mapped_constraints = []

            for c in constraints:
                args = c["param"]
                if isinstance(args, SharedVariable):
                    args = (args, )

                m_c = {
                    "type":
                    c["type"],
                    "fun":
                    lambda x: c["fun"](*[x[param_to_index[a]] for a in args])
                }

                if "jac" in c:
                    m_c["jac"] = lambda x: c["jac"](
                        *[x[param_to_index[a]] for a in args])

                mapped_constraints.append(m_c)

        # Derive objective and gradient
        objective_ = theano.function(
            [self.X] + [w for _, w in param_to_placeholder] +
            [theano.In(v, name=v.name) for v in self.observeds_],
            T.sum(self.nll_),
            givens=param_to_placeholder,
            allow_input_downcast=True)

        def objective(x):
            return objective_(X, *x, **kwargs) / len(X)

        if use_gradient:
            gradient_ = theano.function(
                [self.X] + [w for _, w in param_to_placeholder] +
                [theano.In(v, name=v.name) for v in self.observeds_],
                theano.grad(T.sum(self.nll_),
                            [v for v, _ in param_to_placeholder]),
                givens=param_to_placeholder,
                allow_input_downcast=True)

            def gradient(x):
                return np.array(gradient_(X, *x, **kwargs)) / len(X)

        # Solve!
        x0 = np.array([v.get_value() for v, _ in param_to_placeholder])
        r = minimize(objective,
                     jac=gradient if use_gradient else None,
                     x0=x0,
                     method=optimizer,
                     bounds=mapped_bounds,
                     constraints=mapped_constraints)

        if r.success:
            # Assign the solution
            for i, value in enumerate(r.x):
                param_to_placeholder[i][0].set_value(value)

        else:
            print("Parameter fitting failed!")
            print(r)

        return self
Exemplo n.º 13
0
    def __init__(self,
                 dataset,
                 batch_size,
                 noisy_streamlines_sigma=None,
                 seed=1234,
                 use_data_augment=True,
                 normalize_target=False,
                 shuffle_streamlines=True,
                 resample_streamlines=True,
                 feed_previous_direction=False):
        """
        Parameters
        ----------
        dataset : :class:`TractographyDataset`
            Dataset from which to get the examples.
        batch_size : int
            Nb. of examples per batch.
        seed : int, optional
            Seed for the random generator when shuffling streamlines or adding noise to the streamlines.
        use_data_augment : bool
            If true, perform data augmentation by flipping streamlines.
        normalize_target : bool
            If true, targets will have a norm of one (usually used by the GruRegression model).
        shuffle_streamlines : bool
            Shuffle streamlines in the dataset between each epoch.
        resample_streamlines : bool
            Streamlines in a same batch will all have the same number of points.
            Should be always set to True for now (until the method _process_batch supports it).
        feed_previous_direction : bool
            Should the previous direction be appended to the input when making a prediction?
        """
        self.dataset = dataset
        self.batch_size = batch_size
        self.normalize_target = normalize_target

        self.noisy_streamlines_sigma = noisy_streamlines_sigma
        self.use_noisy_streamlines = self.noisy_streamlines_sigma is not None

        # Parameter use_data_augment cannot be used in the case of a FFNN model (or any other non-recurrent model,
        # without feed_previous_direction because the targets are flipped but the inputs stay the same)
        self.use_augment_by_flipping = feed_previous_direction and use_data_augment
        self.seed = seed
        self.rng = np.random.RandomState(self.seed)
        self.rng_noise = np.random.RandomState(self.seed + 1)
        self.shuffle_streamlines = shuffle_streamlines
        self.resample_streamlines = resample_streamlines
        self.indices = np.arange(len(self.dataset))

        self.feed_previous_direction = feed_previous_direction

        # Shared variables
        self._shared_batch_inputs = sharedX(np.ndarray((0, 0)))
        self._shared_batch_targets = sharedX(np.ndarray((0, 0)))

        # Test value
        batch_inputs, batch_targets = self._next_batch(0)

        # Redefine symbolic variables for single input model
        self.dataset.symb_inputs = T.TensorVariable(
            type=T.TensorType("floatX", [False] * batch_inputs.ndim),
            name=self.dataset.name + '_symb_inputs')
        self.dataset.symb_inputs.tag.test_value = batch_inputs

        # Since this batch scheduler creates its own targets.
        if self.dataset.symb_targets is None:
            self.dataset.symb_targets = T.TensorVariable(
                type=T.TensorType("floatX", [False] * batch_targets.ndim),
                name=self.dataset.name + '_symb_targets')

        self.dataset.symb_targets.tag.test_value = batch_targets
Exemplo n.º 14
0
    def __init__(self,
                 dataset,
                 batch_size,
                 noisy_streamlines_sigma=None,
                 seed=1234,
                 use_data_augment=True,
                 normalize_target=False,
                 shuffle_streamlines=True,
                 resample_streamlines=True,
                 feed_previous_direction=False,
                 sort_streamlines_by_length=False,
                 learn_to_stop=False):
        """
        Parameters
        ----------
        dataset : :class:`TractographyDataset`
            Dataset from which to get the examples.
        batch_size : int
            Nb. of examples per batch.
        seed : int, optional
            Seed for the random generator when shuffling streamlines or adding noise to the streamlines.
        use_data_augment : bool
            If true, perform data augmentation by flipping streamlines.
        normalize_target : bool
            If true, targets will have a norm of one (usually used by the GruRegression model).
        shuffle_streamlines : bool
            Shuffle streamlines in the dataset between each epoch.
        resample_streamlines : bool
            Streamlines in a same batch will all have the same number of points.
            Should be always set to True for now (until the method _process_batch supports it).
        feed_previous_direction : bool
            Should the previous direction be appended to the input when making a prediction?
        sort_streamlines_by_length : bool
            Streamlines will be approximatively regrouped according to their length.
        learn_to_stop : bool
            Predict whether the streamline being generated should stop or not
        """
        self.dataset = dataset
        self.batch_size = batch_size
        self.use_augment_by_flipping = use_data_augment
        self.normalize_target = normalize_target

        self.noisy_streamlines_sigma = noisy_streamlines_sigma
        self.use_noisy_streamlines = self.noisy_streamlines_sigma is not None

        self.seed = seed
        self.rng = np.random.RandomState(self.seed)
        self.rng_noise = np.random.RandomState(self.seed + 1)
        self.shuffle_streamlines = shuffle_streamlines
        self.resample_streamlines = resample_streamlines
        self.sort_streamlines_by_length = sort_streamlines_by_length
        self.feed_previous_direction = feed_previous_direction
        self.learn_to_stop = learn_to_stop

        # Sort streamlines according to their length by default.
        # This should speed up validation.
        self.indices = np.argsort(self.dataset.streamlines._lengths)

        # Shared variables
        self._shared_batch_inputs = sharedX(np.ndarray((0, 0, 0)))
        self._shared_batch_targets = sharedX(np.ndarray((0, 0, 0)))
        self._shared_batch_mask = sharedX(np.ndarray((0, 0)))

        # Test value
        batch_inputs, batch_targets, batch_mask = self._next_batch(0)
        self.dataset.symb_inputs.tag.test_value = batch_inputs
        self.dataset.symb_mask.tag.test_value = batch_mask

        # Since this batch scheduler creates its own targets.
        if self.dataset.symb_targets is None:
            self.dataset.symb_targets = T.TensorVariable(
                type=T.TensorType("floatX", [False] * (batch_targets.ndim)),
                name=self.dataset.name + '_symb_targets')

        self.dataset.symb_targets.tag.test_value = batch_targets
Exemplo n.º 15
0
    def __init__(self,
                 dataset,
                 batch_size,
                 batch_id,
                 ordering_id,
                 use_mask_as_input=False,
                 seed=1234):
        """
        Parameters
        ----------
        dataset : `SequenceDataset` object
            Dataset of datasets (one for each bundle).
        batch_size : int
            Number of examples per batch. *Must be greater than the number of
            bundles in `bundles_dataset`.*
        seed : int (optional)
            Seed of the random numbers generator used to sample a different
            regressive mask for each example.
        """
        super().__init__(dataset)
        self.use_mask_as_input = use_mask_as_input
        self.seed = seed
        self.rng = np.random.RandomState(self.seed)
        self.batch_size = batch_size
        self.batch_id = batch_id
        self.ordering_id = ordering_id

        # Determine the start and the end of the batch that will be used by this batch scheduler.
        assert batch_id * self.batch_size < len(self.dataset)
        self.batch_start = batch_id * self.batch_size
        self.batch_end = min((batch_id + 1) * self.batch_size, len(dataset))

        # Determine the ordering that will be used by this batch scheduler.
        self.d = 0
        self.D = self.dataset.input_shape[0]
        self.ordering = np.arange(self.D)
        for _ in range(ordering_id + 1):
            self.rng.shuffle(self.ordering)

        # Matrix mask that will be used when concatenating the mask.
        self._shared_Moltd = sharedX(np.zeros(
            (self.batch_end - self.batch_start, self.D)),
                                     name='Moltd')

        # Vector mask that will be broadcasted across all inputs.
        # self._shared_mod = sharedX(np.zeros((1, self.D)), name='mod')
        self._shared_mod = sharedX(np.zeros((self.D, )), name='mod')

        # Add a new attributes: a symbolic variable representing the auto regressive mask.
        self.change_masks(self.d)
        self.Moltd = T.TensorVariable(type=T.TensorType(
            "floatX", [False] * dataset.inputs.ndim),
                                      name="symb_Moltd")
        self.mod = T.TensorVariable(type=T.TensorType("floatX", [True, False]),
                                    name="symb_mod")

        # Keep only `(self.batch_end-self.batch_start)` examples as test values.
        self.dataset.symb_inputs.tag.test_value = self.dataset.inputs.get_value(
        )[:(self.batch_end - self.batch_start)]
        if self.dataset.has_targets:
            self.dataset.symb_targets.tag.test_value = self.dataset.targets.get_value(
            )[:(self.batch_end - self.batch_start)]

        self.Moltd.tag.test_value = self._shared_Moltd.get_value()[:(
            self.batch_end - self.batch_start)]
        self.mod.tag.test_value = self._shared_mod.get_value()[None, :]

        if self.use_mask_as_input:
            self.dataset.symb_inputs.tag.test_value = np.concatenate([
                self.dataset.symb_inputs.tag.test_value *
                self.Moltd.tag.test_value, self.Moltd.tag.test_value
            ],
                                                                     axis=1)