def test_identity(): ''' Identity sets are just ranges of numbers. ''' iset = IdentitySet(10) eq_(iset[5], 5) eq_(iset.index(2), 2) eq_(len(iset), 10) assert iset == OrderedSet(range(10)) iset = pickle.loads(pickle.dumps(iset)) eq_(iset[5], 5) eq_(iset.index(2), 2) eq_(len(iset), 10) assert iset == OrderedSet(range(10))
def svd(self, k=50, normalized=True): '''Run an SVD on this unfolding. Compacts, runs, and returns an SVD2DResults.''' # Set up a LabeledView to map column indices from unfolded products # to unique indices. col_indices = OrderedSet() compact = LabeledView(DictTensor(2), [IdentitySet(0), col_indices]) self.compact_to(compact) if normalized: compact = compact.normalized(mode=0) svd = compact.svd(k) # Wrap the output so that the labeling all works out. if hasattr(self.tensor, '_labels'): # Case for labeled view beneath # FIXME: try not to rely on private vars. # TODO: it would be nice to factor this in such a way that we # didn't have to worry about the labeling case here. u = LabeledView(svd.u, [self.tensor._labels[self.dim], None]) v = LabeledView(svd.v, [ UnfoldedSet.from_unfolding(self.dim, self.tensor.label_sets()), None ]) else: u = svd.u v = LabeledView(svd.v, [ UnfoldedSet.from_unfolding( self.dim, [IdentitySet(dim) for dim in self.tensor.shape]), None ]) from csc.divisi.svd import SVD2DResults return SVD2DResults(u, v, svd.svals)
def compressed_svd_u(self, k=100): """ Not done yet. --Rob """ labelset = set() for t in self.weights: labelset += set(t.label_list(0)) ulabels = OrderedSet(list(labelset)) svds = [t.svd(k) for t in self.weights]
def __init__(self, *a, **kw): if 'ndim' in kw: ndim = kw.pop('ndim') data = DictTensor(ndim) label_lists = [OrderedSet() for i in xrange(ndim)] LabeledView.__init__(self, data, label_lists, *a, **kw) else: LabeledView.__init__(self, *a, **kw) self._slice_cache = {}
def _set_tensors(self, tensors): ''' Set the input tensors. Computes the label lists also. You should not call this function directly; rather, assign to blend.tensors. You can pass a ``dict`` or sequence of ``(label, tensor)`` pairs; the tensors will be labeled according to the keys. ''' if isinstance(tensors, Tensor): raise TypeError( 'Give Blend a _list_ (or dict or whatever) of tensors.') if hasattr(tensors, 'items'): # Extract the items, if we have some. tensors = tensors.items() if isinstance(tensors[0], (list, tuple)): # Assign names. Don't call `dict()`, in case a sequence # was passed and two tensors have the same label. names, tensors = zip(*tensors) else: names = map(repr, tensors) for tensor in tensors: if tensor.stack_contains(MeanSubtractedView): raise TypeError( "You can't blend MeanSubtractedViews. Try mean-subtracting the resulting blend." ) self._tensors = tuple(tensors) self.names = tuple(names) self.logger.info('tensors: %s', ', '.join(self.names)) self.ndim = ndim = tensors[0].ndim if not all(tensor.ndim == ndim for tensor in tensors): raise TypeError( 'Blended tensors must have the same dimensionality.') self.logger.info('Making ordered sets') self._labels = labels = [OrderedSet() for _ in xrange(ndim)] self.label_overlap = label_overlap = [0] * ndim for tensor in self._tensors: for dim, label_list in enumerate(labels): for key in tensor.label_list(dim): # XXX(kcarnold) This checks containment twice. if key in label_list: label_overlap[dim] += 1 else: label_list.add(key) self._shape = tuple(map(len, labels)) self._keys_never_overlap = not all(label_overlap) self.logger.info('Done making ordered sets. label_overlap: %r', label_overlap) if not any(label_overlap): self.logger.warn('No labels overlap.') # Invalidate other data self._weights = self._tensor = self._svals = None
def test_delete_and_pickle(): ''' Deleting an element doesn't affect the remaining elements' indices. ''' s = OrderedSet(['dog','cat','banana']) del s[1] eq_(s[1], None) eq_(s.index('banana'), 2) # Pickling doesn't change things. s2 = pickle.loads(pickle.dumps(s)) eq_(s, s2) eq_(s2[1], None) eq_(s2.index('banana'), 2) assert None not in s2 assert None not in s2
def test_delete_and_pickle(): ''' Deleting an element doesn't affect the remaining elements' indices. ''' s = OrderedSet(['dog', 'cat', 'banana']) del s[1] eq_(s[1], None) eq_(s.index('banana'), 2) # Pickling doesn't change things. s2 = pickle.loads(pickle.dumps(s)) eq_(s, s2) eq_(s2[1], None) eq_(s2.index('banana'), 2) assert None not in s2 assert None not in s2
def test_pickle(): ''' Test that OrderedSets can be pickled. ''' s = OrderedSet(['dog', 'cat', 'banana']) import cPickle as pickle s2 = pickle.loads(pickle.dumps(s)) eq_(s, s2) eq_(s2[0], 'dog') eq_(s2.index('cat'), 1)
def make_sparse_labeled_tensor(ndim, labels=None, initial=None, accumulate=None, normalize=False): ''' Create a sparse labeled tensor. ndim: number of dimensions (usually 2) labels: if you already have label lists, pass them in here. (A None in this list means an unlabeled dimension. If you simply don't have labels yet, pass an OrderedSet().) initial / accumulate: sequences of (key, value) pairs to add to the tensor. ``initial`` is applied first by ``.update``, meaning that later values will override earlier ones. ``accumulate`` is applied afterwards, and all values add to anything already there. normalize: an int or tuple of ints: normalize along that dimension True: normalize along axis 0 'tfidf': use tf-idf 'tfidf.T': use tf-idf, transposed (matrix is documents by terms) a class: adds that class as a layer. ''' if labels is None: labels = [OrderedSet() for _ in xrange(ndim)] tensor = LabeledView(DictTensor(ndim), labels) tensor.tensor._shape[:] = [len(label_list) for label_list in labels] if initial is not None: tensor.update(initial) for k, v in accumulate or []: tensor.inc(k, v) if normalize: return tensor.normalized(normalize) else: return tensor
def concatenate(self, other): concat_dense = self.tensor.concatenate(other.tensor) newlabels = OrderedSet(list(self.label_list(0)) + list(other.label_list(0))) return LabeledView(concat_dense, [newlabels] + self.label_lists()[1:])
def __init__(self): super(FeatureByConceptMatrix, self).__init__( DictTensor(2), [OrderedSet() for _ in '01'])
def __init__(self): # FIXME: yes this saves space, but it might make a row or column be zero. concepts, relations = OrderedSet(), OrderedSet() super(ConceptRelationConceptTensor, self).__init__( DictTensor(3), [concepts, relations, concepts])
def setUp(self): self.sets = [ OrderedSet([2, 4, 6, 8, 10]), # 5 items IdentitySet(10), # 10 items OrderedSet(['a', 'b', 'c', 'd']), # 4 items ]
def test_make_shape(): labels = OrderedSet(list('abcde')) t = make_sparse_labeled_tensor(ndim=1, labels=[labels]) eq_(t.shape[0], len(labels)) eq_(t.tensor.shape[0], len(labels))
def test_reprOfEmpty(): ''' repr() of an empty OrderedSet should not fail. ''' repr(OrderedSet())