def test_forward_dense_array_mapper(): mask = np.ones((3, 2), dtype='bool') map_ = mask_mapper(mask) # test shape reports assert_equal(map_.forward1(mask).shape, (6, )) # test 1sample mapping assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)), [0, 1, 2, 3, 4, 5]) # test 4sample mapping foursample = map_.forward(np.arange(24).reshape(4, 3, 2)) assert_array_equal(foursample, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]]) # check incomplete masks mask[1, 1] = 0 map_ = mask_mapper(mask) assert_equal(map_.forward1(mask).shape, (5, )) assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)), [0, 1, 2, 4, 5]) # check that it doesn't accept wrong dataspace assert_raises(ValueError, map_.forward, np.arange(4).reshape(2, 2)) # check fail if neither mask nor shape assert_raises(ValueError, mask_mapper) # check that a full mask is automatically created when providing shape m = mask_mapper(shape=(2, 3, 4)) mp = m.forward1(np.arange(24).reshape(2, 3, 4)) assert_array_equal(mp, np.arange(24))
def test_mapped_classifier(self): samples = np.array([[0, 0, -1], [1, 0, 1], [-1, -1, 1], [-1, 0, 1], [1, -1, 1]]) for mask, res in (([1, 1, 0], [1, 1, 1, -1, -1]), ([1, 0, 1], [-1, 1, -1, -1, 1]), ([0, 1, 1], [-1, 1, -1, 1, -1])): clf = MappedClassifier(clf=self.clf_sign, mapper=mask_mapper( np.array(mask, dtype=bool))) self.assertEqual(clf.predict(samples), res)
def test_mapped_classifier(self): samples = np.array([ [ 0, 0, -1], [ 1, 0, 1], [-1, -1, 1], [-1, 0, 1], [ 1, -1, 1] ]) for mask, res in (([1, 1, 0], [ 1, 1, 1, -1, -1]), ([1, 0, 1], [-1, 1, -1, -1, 1]), ([0, 1, 1], [-1, 1, -1, 1, -1])): clf = MappedClassifier(clf=self.clf_sign, mapper=mask_mapper(np.array(mask, dtype=bool))) self.assertEqual(clf.predict(samples), res)
def test_reverse_dense_array_mapper(): mask = np.ones((3, 2), dtype='bool') mask[1, 1] = 0 map_ = mask_mapper(mask) rmapped = map_.reverse1(np.arange(1, 6)) assert_equal(rmapped.shape, (3, 2)) assert_equal(rmapped[1, 1], 0) assert_equal(rmapped[2, 1], 5) # check that it doesn't accept wrong dataspace assert_raises(ValueError, map_.forward, np.arange(6)) rmapped2 = map_.reverse(np.arange(1, 11).reshape(2, 5)) assert_equal(rmapped2.shape, (2, 3, 2)) assert_equal(rmapped2[0, 1, 1], 0) assert_equal(rmapped2[1, 1, 1], 0) assert_equal(rmapped2[0, 2, 1], 5) assert_equal(rmapped2[1, 2, 1], 10)
def test_selects(): mask = np.ones((3, 2), dtype='bool') mask[1, 1] = 0 mask0 = mask.copy() data = np.arange(6).reshape(mask.shape) map_ = mask_mapper(mask) # check if any exception is thrown if we get # out of the outIds #assert_raises(IndexError, map_.select_out, [0,1,2,6]) # remove 1,2 map_.append(StaticFeatureSelection([0, 3, 4])) assert_array_equal(map_.forward1(data), [0, 4, 5]) # remove 1 more map_.append(StaticFeatureSelection([0, 2])) assert_array_equal(map_.forward1(data), [0, 5]) # check if original mask wasn't perturbed assert_array_equal(mask, mask0) # check if original mask wasn't perturbed assert_array_equal(mask, mask0)
def from_wizard(cls, samples, targets=None, chunks=None, mask=None, mapper=None, flatten=None, space=None): """Convenience method to create dataset. Datasets can be created from N-dimensional samples. Data arrays with more than two dimensions are going to be flattened, while preserving the first axis (separating the samples) and concatenating all other as the second axis. Optionally, it is possible to specify targets and chunk attributes for all samples, and masking of the input data (only selecting elements corresponding to non-zero mask elements Parameters ---------- samples : ndarray N-dimensional samples array. The first axis separates individual samples. targets : scalar or ndarray, optional Labels for all samples. If a scalar is provided its values is assigned as label to all samples. chunks : scalar or ndarray, optional Chunks definition for all samples. If a scalar is provided its values is assigned as chunk of all samples. mask : ndarray, optional The shape of the array has to correspond to the shape of a single sample (shape(samples)[1:] == shape(mask)). Its non-zero elements are used to mask the input data. mapper : Mapper instance, optional A trained mapper instance that is used to forward-map possibly already flattened (see flatten) and masked samples upon construction of the dataset. The mapper must have a simple feature space (samples x features) as output. Use a `ChainMapper` to achieve that, if necessary. flatten : None or bool, optional If None (default) and no mapper provided, data would get flattened. Bool value would instruct explicitly either to flatten before possibly passing into the mapper if no mask is given. space : str, optional If provided it is assigned to the mapper instance that performs the initial flattening of the data. Returns ------- instance : Dataset """ # for all non-ndarray samples you need to go with the constructor samples = np.asanyarray(samples) # compile the necessary samples attributes collection sa_items = {} if not targets is None: sa_items['targets'] = _expand_attribute(targets, samples.shape[0], 'targets') if not chunks is None: # unlike previous implementation, we do not do magic to do chunks # if there are none, there are none sa_items['chunks'] = _expand_attribute(chunks, samples.shape[0], 'chunks') # common checks should go into __init__ ds = cls(samples, sa=sa_items) # apply mask through mapper if mask is None: # if we have multi-dim data if len(samples.shape) > 2 and \ ((flatten is None and mapper is None) # auto case or flatten): # bool case fm = FlattenMapper(shape=samples.shape[1:], space=space) ds = ds.get_mapped(fm) else: mm = mask_mapper(mask, space=space) mm.train(ds) ds = ds.get_mapped(mm) # apply generic mapper if not mapper is None: ds = ds.get_mapped(mapper) return ds
def test_mapper_aliases(): mm = mask_mapper(np.ones((3, 4, 2), dtype='bool')) assert_array_equal(mm.forward(np.ones((2, 3, 4, 2))), mm.forward(np.ones((2, 3, 4, 2))))