def svd(self, k=50, normalized=True): '''Run an SVD on this unfolding. Compacts, runs, and returns an SVD2DResults.''' # Set up a LabeledView to map column indices from unfolded products # to unique indices. col_indices = OrderedSet() compact = LabeledView(DictTensor(2), [IdentitySet(0), col_indices]) self.compact_to(compact) if normalized: compact = compact.normalized(mode=0) svd = compact.svd(k) # Wrap the output so that the labeling all works out. if hasattr(self.tensor, '_labels'): # Case for labeled view beneath # FIXME: try not to rely on private vars. # TODO: it would be nice to factor this in such a way that we # didn't have to worry about the labeling case here. u = LabeledView(svd.u, [self.tensor._labels[self.dim], None]) v = LabeledView(svd.v, [UnfoldedSet.from_unfolding(self.dim, self.tensor.label_sets()), None]) else: u = svd.u v = LabeledView(svd.v, [UnfoldedSet.from_unfolding(self.dim, [IdentitySet(dim) for dim in self.tensor.shape]), None]) from csc.divisi.svd import SVD2DResults return SVD2DResults(u, v, svd.svals)
def __init__(self, *a, **kw): if 'ndim' in kw: ndim = kw.pop('ndim') data = DictTensor(ndim) label_lists = [OrderedSet() for i in xrange(ndim)] LabeledView.__init__(self, data, label_lists, *a, **kw) else: LabeledView.__init__(self, *a, **kw) self._slice_cache = {}
def test_iter_dim_keys(): raw = DenseTensor(zeros((2, 3))) labels = [['a', 'b'], ['c', 'd', 'e']] tensor = LabeledView(raw, labels) i = 0 for key in tensor.iter_dim_keys(0): eq_(key, labels[0][i]) i += 1 eq_(i, 2) i = 0 for key in tensor.iter_dim_keys(1): eq_(key, labels[1][i]) i += 1 eq_(i, 3)
def weight_feature_vector(vec, weight_dct, default_weight=0.0): ''' Weights a feature vector by relation. vec: a feature vector (e.g., a slice of a reconstructed tensor) weight_dct: a mapping from (side, relation) tuples to weights, where side is 'left' or 'right'. default_weight: the weight to give entries that are not specified. Example: >>> from csc.conceptnet4.analogyspace import conceptnet_2d_from_db >>> t = conceptnet_2d_from_db('en') >>> svd = t.svd() >>> baseball = svd.reconstructed['baseball',:] >>> weights = {} >>> weights['right', 'IsA'] = 1.0 >>> weights['right', 'AtLocation'] = 0.8 >>> weight_feature_vector(baseball, weights).top_items() ''' if vec.ndim != 1: raise TypeError('Feature vectors can only have one dimension') res = LabeledView(DictTensor(ndim=1), label_lists=vec.label_lists()) for k, v in vec.iteritems(): res[k] = v*weight_dct.get(k[0][:2], default_weight) return res
def reconstruct(self, weights): """ Get a linear combination of the eigenvectors, and re-express it as a Divisi tensor (a dense labeled vector). """ array = self.reconstruct_array(weights) assert str(array[0]) != 'nan' return LabeledView(DenseTensor(array), [self._labels])
def svd(self, k=50, normalized=True): '''Run an SVD on this unfolding. Compacts, runs, and returns an SVD2DResults.''' # Set up a LabeledView to map column indices from unfolded products # to unique indices. col_indices = OrderedSet() compact = LabeledView(DictTensor(2), [IdentitySet(0), col_indices]) self.compact_to(compact) if normalized: compact = compact.normalized(mode=0) svd = compact.svd(k) # Wrap the output so that the labeling all works out. if hasattr(self.tensor, '_labels'): # Case for labeled view beneath # FIXME: try not to rely on private vars. # TODO: it would be nice to factor this in such a way that we # didn't have to worry about the labeling case here. u = LabeledView(svd.u, [self.tensor._labels[self.dim], None]) v = LabeledView(svd.v, [ UnfoldedSet.from_unfolding(self.dim, self.tensor.label_sets()), None ]) else: u = svd.u v = LabeledView(svd.v, [ UnfoldedSet.from_unfolding( self.dim, [IdentitySet(dim) for dim in self.tensor.shape]), None ]) from csc.divisi.svd import SVD2DResults return SVD2DResults(u, v, svd.svals)
def test_combine_by_element(): t1 = LabeledView(DenseTensor(zeros((2,2))), [['a', 'b'], ['c', 'd']]) t2 = LabeledView(DenseTensor(zeros((2,2))), [['a', 'b'], ['c', 'd']]) t1['a', 'c'] = 1 t1['b', 'c'] = 2 t2['a', 'c'] = 4 t2['a', 'd'] = 5 t3 = t1.combine_by_element(t2, lambda x, y: x + (2*y)) eq_(t3['a', 'c'], 9) eq_(t3['b', 'c'], 2) eq_(t3['a', 'd'], 10) eq_(t3['b', 'd'], 0) t4 = DenseTensor(zeros((3, 2))) assert_raises(IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y)) t4 = DenseTensor(zeros((2, 2, 1))) assert_raises(IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y))
def aspace_mds(): from csc.conceptnet.analogyspace import conceptnet_2d_from_db cnet = conceptnet_2d_from_db('en') aspace = cnet.normalized().svd(k=100) labels = cnet.label_list(0) ptmatrix = data(aspace.u) ptmatrix *= data(aspace.svals) proj = mds(ptmatrix) result = proj.project(data(aspace.u)) return LabeledView(DenseTensor(result), [labels, None])
def test_combine_by_element(): t1 = LabeledView(DenseTensor(zeros((2, 2))), [['a', 'b'], ['c', 'd']]) t2 = LabeledView(DenseTensor(zeros((2, 2))), [['a', 'b'], ['c', 'd']]) t1['a', 'c'] = 1 t1['b', 'c'] = 2 t2['a', 'c'] = 4 t2['a', 'd'] = 5 t3 = t1.combine_by_element(t2, lambda x, y: x + (2 * y)) eq_(t3['a', 'c'], 9) eq_(t3['b', 'c'], 2) eq_(t3['a', 'd'], 10) eq_(t3['b', 'd'], 0) t4 = DenseTensor(zeros((3, 2))) assert_raises(IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y)) t4 = DenseTensor(zeros((2, 2, 1))) assert_raises(IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y))
def bake(self): ''' Simplify the representation. ''' return LabeledView(self.tensor, self._labels)
def test_dense_data(): t1 = LabeledView(DenseTensor(zeros((2, 2))), [['a', 'b'], ['c', 'd']]) assert isinstance(data(t1), ndarray)
def __repr__(self): return '<PTLabeledTensor: %s>' % LabeledView.__repr__(self)
def bake(self): ''' Return a normal LabeledView with the current contents of the blend. ''' if self._tensor is None: self.build_tensor() return LabeledView(self.tensor, self._labels)
def empty_labeled_dense_vector(labels): return LabeledView(DenseTensor(zeros((len(labels), ))), [labels])