Пример #1
0
from model import StringFeatures

feats = StringFeatures()

assert len(feats) == 0
assert len(feats.get_attributes("running")) == 8

feats.get_attributes("running", True)
assert len(feats.get_attributes("cattable", True)) == 8
assert len(feats.attributes) == 16
feats.store("running")
print feats["cattable"]
#print len(feats)
Пример #2
0
        test.append((lex_col[lang1][l1], lex_col[lang2][l2], embedding))
            
        entry1 = lex_col[lang1].pp(l1)
        entry2 = lex_col[lang2].pp(l2)

        #if "num=PL" in entry2[1]:
        #    print l2, entry2[1]

        
    for size in [100]: #sizes:
        train_keys = sample_from_d(d, size, lex_col[lang1], lex_col[lang2], test_keys_set)
        train = []
        features = StringFeatures()

        for (l1, l2) in train_keys:
            features.store(l2)
        for (l1, l2) in test_keys:
            features.store(l2)
            
        model = RRBM(lex_col.N, 64, len(features), [], C=0.1)

        # training data
        for (l1, l2) in train_keys:
            embedding = E_dict[l2] if l2 in E_dict else zero
            characters = np.zeros((len(features)))
            for i in features[l2]:
                characters[i] = 1.0
            
            train.append((lex_col[lang1][l1], lex_col[lang2][l2], embedding, characters))

        # test data