def test_predicted_svals(): ''' The predicted_svals function shows the predicted singular values. ''' t1 = MockTensor() t1.svals = range(5, 0, -1) t2 = MockTensor() t2.svals = range(10, 0, -2) # Weighting one side heavily should make its svals uniquely show up. weight = 0.999999 b = Blend([t1, t2], weights=[weight, 1-weight], k_values=1) # with origin tracking: svals = b.predicted_svals(num=5, for_each_tensor=5, track_origin=True) for expected, (actual, src) in zip(t1.svals, svals): assert_almost_equal(actual/weight, expected) eq_(src, 0) # without origin tracking svals = b.predicted_svals(num=5, for_each_tensor=5) for expected, actual in zip(t1.svals, svals): assert_almost_equal(actual/weight, expected) # Flip it around. b.weights = [1-weight, weight] # Note: this is an easy way to transpose the "matrix" sval, src = zip(*b.predicted_svals(num=5, for_each_tensor=5, track_origin=True)) for actual, expected in zip(sval, t2.svals): assert_almost_equal(actual/weight, expected) eq_(src, (1,)*5)
def test_autoblend(): ''' If weights are not specified explicitly, Blend computes them automatically so as to maximize the amount of interaction between the two matrices. This is hard to test in general. The strategy used here is to blend two copies of the same matrix (so the singular values are the same), but with different labels. ''' t1 = ez_matrix('0013', '0421', [1,2,3,4]) t2 = ez_matrix('2214', '0421', [3,6,9,12]) # one overlapping label, 3x the values b = Blend([t1, t2]) # don't specify weights => autoblend. eq_(b.label_overlap[0], 1) eq_(b.label_overlap[1], 4) # This should result in t2 getting weighted 1/3 the weight of t1: logging.info(b.weights) assert allclose(b.weights, [.75, .25]) # Test the resulting tensor b.build_tensor() # -non-overlapping elements assert t1['0', '0'] == 1 assert_almost_equal(b['0', '0'], .75*1) # remember that the original tensors had non-unity values. assert t2['4', '1'] == 4*3 assert_almost_equal(b['4', '1'], .25*4*3) # -overlapping element assert t1['1', '2'] == 3 assert t2['1', '2'] == 3*3 assert_almost_equal(b['1', '2'], 0.75*3 + 0.25*3*3) # just to be explicit...
def test_manual_weights(): ''' Specifying weights manually causes the result to be the weighted sum. ''' b = Blend([t1, t2], weights=[.75,.25]) b.build_tensor() expected = dict(zip(zip('aabc', '1211'), (.75, 1.0, .75, .25))) assertTensorEqualCompleteDict(b, expected)
def test_manual_weights(): ''' Specifying weights manually causes the result to be the weighted sum. ''' b = Blend([t1, t2], weights=[.75, .25]) b.build_tensor() expected = dict(zip(zip('aabc', '1211'), (.75, 1.0, .75, .25))) assertTensorEqualCompleteDict(b, expected)
def test_no_overlap(): ''' Certain optimizations are possible if there is no overlap. ''' t2 = ez_matrix('dc', '21', [1] * 2) b = Blend([t1, t2], weights=[.75, .25]) b.build_tensor() expected = dict(zip(zip('aabdc', '12121'), (.75, .75, .75, .25, .25))) assertTensorEqualCompleteDict(b, expected)
def test_no_overlap(): ''' Certain optimizations are possible if there is no overlap. ''' t2 = ez_matrix('dc', '21', [1]*2) b = Blend([t1, t2], weights=[.75,.25]) b.build_tensor() expected = dict(zip(zip('aabdc', '12121'), (.75, .75, .75, .25, .25))) assertTensorEqualCompleteDict(b, expected)
def test_fake_tensor(): ''' Blends have a fake tensor that does iteritems really fast. ''' # labels will be abc, 12 t3 = ez_matrix('c', '1', [1]) b = Blend([t1, t3], weights=[.75, .25]) fake_tensor = b.fake_tensor() items = set(fake_tensor.iteritems()) expected_items = set( (((0, 0), .75), ((0, 1), .75), ((1, 0), .75), ((2, 0), .25))) assertSetEquals(items, expected_items)
def test_fake_tensor(): ''' Blends have a fake tensor that does iteritems really fast. ''' # labels will be abc, 12 t3 = ez_matrix('c', '1', [1]) b = Blend([t1, t3], weights=[.75,.25]) fake_tensor = b.fake_tensor() items = set(fake_tensor.iteritems()) expected_items = set(( ((0,0), .75), ((0,1), .75), ((1,0), .75), ((2,0), .25))) assertSetEquals(items, expected_items)
def __init__(self, emoticon_file=path+'/data/emoticons.csv', \ affect_wordnet_file=path+'/data/affectiveWNmatrix.pickle'): # Build emoticon dictionary self.emoticon = {} emoticon_reader = csv.reader(open(emoticon_file, 'r')) for emoticon, meaning in emoticon_reader: self.emoticon[emoticon.decode('utf-8')] = meaning self.emoticon_list = self.emoticon.keys() # Create blending of affect WordNet and ConceptNet cnet = conceptnet_2d_from_db('en') affectwn_raw = get_picklecached_thing(affect_wordnet_file) affectwn_normalized = affectwn_raw.normalized() theblend = Blend([affectwn_normalized, cnet]) self.affectwn = theblend.svd() # Get natural language processing tool self.nl = get_nl('en')
def test_rough_blend2(): ''' You can pass in the svals and it doesn't try to compute them. ''' t1 = MockTensor() t2 = MockTensor() b = Blend([t1, t2], k_values=1, svals=[range(5, 0, -1), range(15, 0, -3)]) eq_(b.weights, (.75, .25))
def test_names(): ''' Blends can name their tensors. ''' # Uses repr() by default b = Blend([t1, t2]) eq_(b.names, (repr(t1), repr(t2))) # but you can specify it with a dict: b = Blend(dict(t1=t1, t2=t2)) tensors = sorted(list(zip(b.names, b.tensors))) eq_(tensors, [('t1', t1), ('t2', t2)]) # or a sequence of items b = Blend([('t1', t1), ('t2', t2)]) eq_(b.names, ('t1', 't2')) eq_(b.tensors, (t1, t2))
def __init__(self): self.helper = DivsiHelper() self.cnet_normalized = conceptnet_2d_from_db('en') self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle') self.affectWN = self.affectwn_raw.normalized() self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd() self.EN_NL = get_nl('en')
def test_rough_blend(): ''' The rough weights are 1/svals[0], normalized. ''' t1 = MockTensor() t1.svals = range(5, 0, -1) t2 = MockTensor() t2.svals = range(15, 0, -3) b = Blend([t1, t2], k_values=1) eq_(b.weights, (.75, .25))
def test_predicted_svals(): ''' The predicted_svals function shows the predicted singular values. ''' t1 = MockTensor() t1.svals = range(5, 0, -1) t2 = MockTensor() t2.svals = range(10, 0, -2) # Weighting one side heavily should make its svals uniquely show up. weight = 0.999999 b = Blend([t1, t2], weights=[weight, 1 - weight], k_values=1) # with origin tracking: svals = b.predicted_svals(num=5, for_each_tensor=5, track_origin=True) for expected, (actual, src) in zip(t1.svals, svals): assert_almost_equal(actual / weight, expected) eq_(src, 0) # without origin tracking svals = b.predicted_svals(num=5, for_each_tensor=5) for expected, actual in zip(t1.svals, svals): assert_almost_equal(actual / weight, expected) # Flip it around. b.weights = [1 - weight, weight] # Note: this is an easy way to transpose the "matrix" sval, src = zip( *b.predicted_svals(num=5, for_each_tensor=5, track_origin=True)) for actual, expected in zip(sval, t2.svals): assert_almost_equal(actual / weight, expected) eq_(src, (1, ) * 5)
class Divsi: svd = None def __init__(self): self.helper = DivsiHelper() self.cnet_normalized = conceptnet_2d_from_db('en') self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle') self.affectWN = self.affectwn_raw.normalized() self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd() self.EN_NL = get_nl('en') def load_svd(self, k=100): svd = self.tensor.svd(k=k) return svd def concept_similarity(self, universal_word): similarity = {} common = {} for interesting in self.helper.interestingTags(universal_word): L = interesting[0] R = interesting[1] try: left = self.analogySpace.weighted_u_vec(L) right = self.analogySpace.weighted_u_vec(R) similar = left.hat() * right.hat() common[L] = self.analogySpace.u_dotproducts_with(left).top_items(10) common[R] = self.analogySpace.u_dotproducts_with(right).top_items(10) similarity[similar] = [L, R] except Exception, E: pass sorted_similarity = self.helper.sortDictionary(similarity) return (sorted_similarity, common)
def test_autoblend(): ''' If weights are not specified explicitly, Blend computes them automatically so as to maximize the amount of interaction between the two matrices. This is hard to test in general. The strategy used here is to blend two copies of the same matrix (so the singular values are the same), but with different labels. ''' t1 = ez_matrix('0013', '0421', [1, 2, 3, 4]) t2 = ez_matrix('2214', '0421', [3, 6, 9, 12]) # one overlapping label, 3x the values b = Blend([t1, t2]) # don't specify weights => autoblend. eq_(b.label_overlap[0], 1) eq_(b.label_overlap[1], 4) # This should result in t2 getting weighted 1/3 the weight of t1: logging.info(b.weights) assert allclose(b.weights, [.75, .25]) # Test the resulting tensor b.build_tensor() # -non-overlapping elements assert t1['0', '0'] == 1 assert_almost_equal( b['0', '0'], .75 * 1) # remember that the original tensors had non-unity values. assert t2['4', '1'] == 4 * 3 assert_almost_equal(b['4', '1'], .25 * 4 * 3) # -overlapping element assert t1['1', '2'] == 3 assert t2['1', '2'] == 3 * 3 assert_almost_equal(b['1', '2'], 0.75 * 3 + 0.25 * 3 * 3) # just to be explicit...
def test_factor_too_big(): "Factor is between 0 and 1." Blend([t1, t2], factor=1.5)
def test_specifying_factor(): ''' When a factor is supplied, use it as the weight of the second matrix. ''' eq_(Blend([t1, t2], factor=.25).weights, (0.75, 0.25))
def test_bare_blend(): ''' A matrix blended with nothing else changes nothing. ''' b = Blend([t1], weights=[1]) assertSetEquals(set(t1.label_list(0)), set(b.label_list(0)))
def make_blend(other): return Blend([cnet, other])
def test_nonlist(): Blend(t1, weights=[1])
def test_factor_wrong_dims(): "Factor only applies to two matrices." Blend([t1, t2, t1], factor=0.5)
def test_blend_mean_subtracted(): from csc.divisi.blend import Blend Blend([tensor.mean_subtracted()])
def veering_by_overlap(mat1, mat2, row_overlap, col_overlap): t1, t2 = overlap_matrices(mat1, mat2, row_overlap, col_overlap) blend = Blend([t1, t2], factor=0) return [blend.total_veering_at_factor(factor, num=15) for factor in factors]
def predicted_by_overlap(factors, mat1, mat2, row_overlap, col_overlap): blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap), factor=0) return [blend.predicted_svals_at_factor(factor, num=15) for factor in factors]
def svals_by_overlap(factors, mat1, mat2, row_overlap, col_overlap): blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap), factor=0) return [blend.svals_at_factor(factor, k=15) for factor in factors]
def test_specifying_factor_and_weights(): "Shouldn't specify both factor and weights at the same time." Blend([t1, t2], factor=0.5, weights=[0.5, 0.5])
from csc.conceptnet4.analogyspace import conceptnet_by_relations, identities_for_all_relations from csc.divisi.blend import Blend from csc.divisi import export_svdview byrel = conceptnet_by_relations('en') t=identities_for_all_relations(byrel) b=Blend(byrel.values()+[t]) s=b.svd() export_svdview.write_packed(s.u, 'littleblend', lambda x:x) s.summarize()
def _get_color_blend(): colors = get_picklecached_thing(FILEPATH+os.sep+'colormatrix.pickle.gz', _make_color_matrix) cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en')) colorblend = Blend([colors, cnet]).normalized(mode=[0,1]).bake() return colorblend
def veering_by_overlap(mat1, mat2, row_overlap, col_overlap): t1, t2 = overlap_matrices(mat1, mat2, row_overlap, col_overlap) blend = Blend([t1, t2], factor=0) return [ blend.total_veering_at_factor(factor, num=15) for factor in factors ]
def predicted_by_overlap(factors, mat1, mat2, row_overlap, col_overlap): blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap), factor=0) return [ blend.predicted_svals_at_factor(factor, num=15) for factor in factors ]
def test_wrong_dims(): from csc.divisi.labeled_view import make_sparse_labeled_tensor t1 = make_sparse_labeled_tensor(ndim=1) t2 = make_sparse_labeled_tensor(ndim=2) Blend([t1, t2])
from csc.conceptnet4.analogyspace import conceptnet_by_relations, identities_for_all_relations from csc.divisi.blend import Blend from csc.divisi import export_svdview byrel = conceptnet_by_relations('en') t = identities_for_all_relations(byrel) b = Blend(byrel.values() + [t]) s = b.svd() export_svdview.write_packed(s.u, 'littleblend', lambda x: x) s.summarize()