Пример #1
0
def test_predicted_svals():
    '''
    The predicted_svals function shows the predicted singular values.
    '''
    t1 = MockTensor()
    t1.svals = range(5, 0, -1)
    t2 = MockTensor()
    t2.svals = range(10, 0, -2)

    # Weighting one side heavily should make its svals uniquely show up.
    weight = 0.999999
    b = Blend([t1, t2], weights=[weight, 1-weight], k_values=1)

    # with origin tracking:
    svals = b.predicted_svals(num=5, for_each_tensor=5, track_origin=True)
    for expected, (actual, src) in zip(t1.svals, svals):
        assert_almost_equal(actual/weight, expected)
        eq_(src, 0)

    # without origin tracking
    svals = b.predicted_svals(num=5, for_each_tensor=5)
    for expected, actual in zip(t1.svals, svals):
        assert_almost_equal(actual/weight, expected)

    # Flip it around.
    b.weights = [1-weight, weight]
    # Note: this is an easy way to transpose the "matrix"
    sval, src = zip(*b.predicted_svals(num=5, for_each_tensor=5, track_origin=True))
    for actual, expected in zip(sval, t2.svals):
        assert_almost_equal(actual/weight, expected)
    eq_(src, (1,)*5)
Пример #2
0
def test_autoblend():
    '''
    If weights are not specified explicitly, Blend computes them automatically
    so as to maximize the amount of interaction between the two matrices.

    This is hard to test in general. The strategy used here is to
    blend two copies of the same matrix (so the singular values are
    the same), but with different labels.
    '''

    t1 = ez_matrix('0013', '0421', [1,2,3,4])
    t2 = ez_matrix('2214', '0421', [3,6,9,12]) # one overlapping label, 3x the values
    b = Blend([t1, t2]) # don't specify weights => autoblend.

    eq_(b.label_overlap[0], 1)
    eq_(b.label_overlap[1], 4)

    # This should result in t2 getting weighted 1/3 the weight of t1:
    logging.info(b.weights)
    assert allclose(b.weights, [.75, .25])

    # Test the resulting tensor
    b.build_tensor()
    # -non-overlapping elements
    assert t1['0', '0'] == 1
    assert_almost_equal(b['0', '0'], .75*1) # remember that the original tensors had non-unity values.
    assert t2['4', '1'] == 4*3
    assert_almost_equal(b['4', '1'], .25*4*3)
    # -overlapping element
    assert t1['1', '2'] == 3
    assert t2['1', '2'] == 3*3
    assert_almost_equal(b['1', '2'],  0.75*3 + 0.25*3*3) # just to be explicit...
Пример #3
0
def test_manual_weights():
    '''
    Specifying weights manually causes the result to be the weighted sum.
    '''
    b = Blend([t1, t2], weights=[.75,.25])
    b.build_tensor()
    expected = dict(zip(zip('aabc', '1211'), (.75, 1.0, .75, .25)))
    assertTensorEqualCompleteDict(b, expected)
Пример #4
0
def test_manual_weights():
    '''
    Specifying weights manually causes the result to be the weighted sum.
    '''
    b = Blend([t1, t2], weights=[.75, .25])
    b.build_tensor()
    expected = dict(zip(zip('aabc', '1211'), (.75, 1.0, .75, .25)))
    assertTensorEqualCompleteDict(b, expected)
Пример #5
0
def test_no_overlap():
    '''
    Certain optimizations are possible if there is no overlap.
    '''
    t2 = ez_matrix('dc', '21', [1] * 2)
    b = Blend([t1, t2], weights=[.75, .25])
    b.build_tensor()
    expected = dict(zip(zip('aabdc', '12121'), (.75, .75, .75, .25, .25)))
    assertTensorEqualCompleteDict(b, expected)
Пример #6
0
def test_no_overlap():
    '''
    Certain optimizations are possible if there is no overlap.
    '''
    t2 = ez_matrix('dc', '21', [1]*2)
    b = Blend([t1, t2], weights=[.75,.25])
    b.build_tensor()
    expected = dict(zip(zip('aabdc', '12121'), (.75, .75, .75, .25, .25)))
    assertTensorEqualCompleteDict(b, expected)
Пример #7
0
def test_fake_tensor():
    '''
    Blends have a fake tensor that does iteritems really fast.
    '''
    # labels will be abc, 12
    t3 = ez_matrix('c', '1', [1])
    b = Blend([t1, t3], weights=[.75, .25])
    fake_tensor = b.fake_tensor()
    items = set(fake_tensor.iteritems())
    expected_items = set(
        (((0, 0), .75), ((0, 1), .75), ((1, 0), .75), ((2, 0), .25)))
    assertSetEquals(items, expected_items)
Пример #8
0
def test_fake_tensor():
    '''
    Blends have a fake tensor that does iteritems really fast.
    '''
    # labels will be abc, 12
    t3 = ez_matrix('c', '1', [1])
    b = Blend([t1, t3], weights=[.75,.25])
    fake_tensor = b.fake_tensor()
    items = set(fake_tensor.iteritems())
    expected_items = set((
            ((0,0), .75),
            ((0,1), .75),
            ((1,0), .75),
            ((2,0), .25)))
    assertSetEquals(items, expected_items)
Пример #9
0
	def __init__(self, emoticon_file=path+'/data/emoticons.csv', \
			affect_wordnet_file=path+'/data/affectiveWNmatrix.pickle'):
		# Build emoticon dictionary
		self.emoticon = {}
		emoticon_reader = csv.reader(open(emoticon_file, 'r'))
		for emoticon, meaning in emoticon_reader:
			self.emoticon[emoticon.decode('utf-8')] = meaning
		self.emoticon_list = self.emoticon.keys()
		# Create blending of affect WordNet and ConceptNet
		cnet = conceptnet_2d_from_db('en')
		affectwn_raw = get_picklecached_thing(affect_wordnet_file)
		affectwn_normalized = affectwn_raw.normalized()
		theblend = Blend([affectwn_normalized, cnet])
		self.affectwn = theblend.svd()
		# Get natural language processing tool
		self.nl = get_nl('en')
Пример #10
0
def test_rough_blend2():
    '''
    You can pass in the svals and it doesn't try to compute them.
    '''
    t1 = MockTensor()
    t2 = MockTensor()
    b = Blend([t1, t2], k_values=1, svals=[range(5, 0, -1), range(15, 0, -3)])
    eq_(b.weights, (.75, .25))
Пример #11
0
def test_names():
    '''
    Blends can name their tensors.
    '''
    # Uses repr() by default
    b = Blend([t1, t2])
    eq_(b.names, (repr(t1), repr(t2)))

    # but you can specify it with a dict:
    b = Blend(dict(t1=t1, t2=t2))
    tensors = sorted(list(zip(b.names, b.tensors)))
    eq_(tensors, [('t1', t1), ('t2', t2)])

    # or a sequence of items
    b = Blend([('t1', t1), ('t2', t2)])
    eq_(b.names, ('t1', 't2'))
    eq_(b.tensors, (t1, t2))
Пример #12
0
 def __init__(self):
     self.helper = DivsiHelper()
     self.cnet_normalized = conceptnet_2d_from_db('en')
     self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle')
     self.affectWN = self.affectwn_raw.normalized()
     self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd()
     
     
     self.EN_NL = get_nl('en')
Пример #13
0
def test_rough_blend():
    '''
    The rough weights are 1/svals[0], normalized.
    '''
    t1 = MockTensor()
    t1.svals = range(5, 0, -1)
    t2 = MockTensor()
    t2.svals = range(15, 0, -3)

    b = Blend([t1, t2], k_values=1)
    eq_(b.weights, (.75, .25))
Пример #14
0
def test_predicted_svals():
    '''
    The predicted_svals function shows the predicted singular values.
    '''
    t1 = MockTensor()
    t1.svals = range(5, 0, -1)
    t2 = MockTensor()
    t2.svals = range(10, 0, -2)

    # Weighting one side heavily should make its svals uniquely show up.
    weight = 0.999999
    b = Blend([t1, t2], weights=[weight, 1 - weight], k_values=1)

    # with origin tracking:
    svals = b.predicted_svals(num=5, for_each_tensor=5, track_origin=True)
    for expected, (actual, src) in zip(t1.svals, svals):
        assert_almost_equal(actual / weight, expected)
        eq_(src, 0)

    # without origin tracking
    svals = b.predicted_svals(num=5, for_each_tensor=5)
    for expected, actual in zip(t1.svals, svals):
        assert_almost_equal(actual / weight, expected)

    # Flip it around.
    b.weights = [1 - weight, weight]
    # Note: this is an easy way to transpose the "matrix"
    sval, src = zip(
        *b.predicted_svals(num=5, for_each_tensor=5, track_origin=True))
    for actual, expected in zip(sval, t2.svals):
        assert_almost_equal(actual / weight, expected)
    eq_(src, (1, ) * 5)
Пример #15
0
class Divsi:
    svd = None
    def __init__(self):
        self.helper = DivsiHelper()
        self.cnet_normalized = conceptnet_2d_from_db('en')
        self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle')
        self.affectWN = self.affectwn_raw.normalized()
        self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd()
        
        
        self.EN_NL = get_nl('en')

    def load_svd(self, k=100):
        svd = self.tensor.svd(k=k)
        return svd
    
    def concept_similarity(self, universal_word):
        similarity = {}
        common = {}
        for interesting in self.helper.interestingTags(universal_word):
            L = interesting[0]
            R = interesting[1]
            try:
                left  = self.analogySpace.weighted_u_vec(L)
                right = self.analogySpace.weighted_u_vec(R)
                similar = left.hat() * right.hat()
                
                common[L] = self.analogySpace.u_dotproducts_with(left).top_items(10)
                common[R] = self.analogySpace.u_dotproducts_with(right).top_items(10)
                
                similarity[similar] = [L, R]
            except Exception, E:
                pass
                
        sorted_similarity = self.helper.sortDictionary(similarity)
        return (sorted_similarity, common)
Пример #16
0
def test_autoblend():
    '''
    If weights are not specified explicitly, Blend computes them automatically
    so as to maximize the amount of interaction between the two matrices.

    This is hard to test in general. The strategy used here is to
    blend two copies of the same matrix (so the singular values are
    the same), but with different labels.
    '''

    t1 = ez_matrix('0013', '0421', [1, 2, 3, 4])
    t2 = ez_matrix('2214', '0421',
                   [3, 6, 9, 12])  # one overlapping label, 3x the values
    b = Blend([t1, t2])  # don't specify weights => autoblend.

    eq_(b.label_overlap[0], 1)
    eq_(b.label_overlap[1], 4)

    # This should result in t2 getting weighted 1/3 the weight of t1:
    logging.info(b.weights)
    assert allclose(b.weights, [.75, .25])

    # Test the resulting tensor
    b.build_tensor()
    # -non-overlapping elements
    assert t1['0', '0'] == 1
    assert_almost_equal(
        b['0', '0'],
        .75 * 1)  # remember that the original tensors had non-unity values.
    assert t2['4', '1'] == 4 * 3
    assert_almost_equal(b['4', '1'], .25 * 4 * 3)
    # -overlapping element
    assert t1['1', '2'] == 3
    assert t2['1', '2'] == 3 * 3
    assert_almost_equal(b['1', '2'],
                        0.75 * 3 + 0.25 * 3 * 3)  # just to be explicit...
Пример #17
0
def test_factor_too_big():
    "Factor is between 0 and 1."
    Blend([t1, t2], factor=1.5)
Пример #18
0
def test_specifying_factor():
    '''
    When a factor is supplied, use it as the weight of the second matrix.
    '''
    eq_(Blend([t1, t2], factor=.25).weights, (0.75, 0.25))
Пример #19
0
def test_bare_blend():
    '''
    A matrix blended with nothing else changes nothing.
    '''
    b = Blend([t1], weights=[1])
    assertSetEquals(set(t1.label_list(0)), set(b.label_list(0)))
Пример #20
0
def make_blend(other):
    return Blend([cnet, other])
Пример #21
0
def test_bare_blend():
    '''
    A matrix blended with nothing else changes nothing.
    '''
    b = Blend([t1], weights=[1])
    assertSetEquals(set(t1.label_list(0)), set(b.label_list(0)))
Пример #22
0
def test_nonlist():
    Blend(t1, weights=[1])
Пример #23
0
def test_factor_wrong_dims():
    "Factor only applies to two matrices."
    Blend([t1, t2, t1], factor=0.5)
Пример #24
0
def test_blend_mean_subtracted():
    from csc.divisi.blend import Blend
    Blend([tensor.mean_subtracted()])
Пример #25
0
def veering_by_overlap(mat1, mat2, row_overlap, col_overlap):
    t1, t2 = overlap_matrices(mat1, mat2, row_overlap, col_overlap)
    blend = Blend([t1, t2], factor=0)
    return [blend.total_veering_at_factor(factor, num=15)
            for factor in factors]
Пример #26
0
def predicted_by_overlap(factors, mat1, mat2, row_overlap, col_overlap):
    blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap), factor=0)
    return [blend.predicted_svals_at_factor(factor, num=15) for factor in factors]
Пример #27
0
def svals_by_overlap(factors, mat1, mat2, row_overlap, col_overlap):
    blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap), factor=0)
    return [blend.svals_at_factor(factor, k=15) for factor in factors]
Пример #28
0
def test_specifying_factor_and_weights():
    "Shouldn't specify both factor and weights at the same time."
    Blend([t1, t2], factor=0.5, weights=[0.5, 0.5])
Пример #29
0
from csc.conceptnet4.analogyspace import conceptnet_by_relations, identities_for_all_relations
from csc.divisi.blend import Blend
from csc.divisi import export_svdview

byrel = conceptnet_by_relations('en')
t=identities_for_all_relations(byrel)
b=Blend(byrel.values()+[t])
s=b.svd()
export_svdview.write_packed(s.u, 'littleblend', lambda x:x)
s.summarize()
Пример #30
0
def _get_color_blend():
    colors = get_picklecached_thing(FILEPATH+os.sep+'colormatrix.pickle.gz', _make_color_matrix)
    cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en'))
    colorblend = Blend([colors, cnet]).normalized(mode=[0,1]).bake()
    return colorblend
Пример #31
0
def veering_by_overlap(mat1, mat2, row_overlap, col_overlap):
    t1, t2 = overlap_matrices(mat1, mat2, row_overlap, col_overlap)
    blend = Blend([t1, t2], factor=0)
    return [
        blend.total_veering_at_factor(factor, num=15) for factor in factors
    ]
Пример #32
0
def svals_by_overlap(factors, mat1, mat2, row_overlap, col_overlap):
    blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap),
                  factor=0)
    return [blend.svals_at_factor(factor, k=15) for factor in factors]
Пример #33
0
def predicted_by_overlap(factors, mat1, mat2, row_overlap, col_overlap):
    blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap),
                  factor=0)
    return [
        blend.predicted_svals_at_factor(factor, num=15) for factor in factors
    ]
Пример #34
0
def test_wrong_dims():
    from csc.divisi.labeled_view import make_sparse_labeled_tensor
    t1 = make_sparse_labeled_tensor(ndim=1)
    t2 = make_sparse_labeled_tensor(ndim=2)
    Blend([t1, t2])
Пример #35
0
from csc.conceptnet4.analogyspace import conceptnet_by_relations, identities_for_all_relations
from csc.divisi.blend import Blend
from csc.divisi import export_svdview

byrel = conceptnet_by_relations('en')
t = identities_for_all_relations(byrel)
b = Blend(byrel.values() + [t])
s = b.svd()
export_svdview.write_packed(s.u, 'littleblend', lambda x: x)
s.summarize()