from model import StringFeatures feats = StringFeatures() assert len(feats) == 0 assert len(feats.get_attributes("running")) == 8 feats.get_attributes("running", True) assert len(feats.get_attributes("cattable", True)) == 8 assert len(feats.attributes) == 16 feats.store("running") print feats["cattable"] #print len(feats)
test.append((lex_col[lang1][l1], lex_col[lang2][l2], embedding)) entry1 = lex_col[lang1].pp(l1) entry2 = lex_col[lang2].pp(l2) #if "num=PL" in entry2[1]: # print l2, entry2[1] for size in [100]: #sizes: train_keys = sample_from_d(d, size, lex_col[lang1], lex_col[lang2], test_keys_set) train = [] features = StringFeatures() for (l1, l2) in train_keys: features.store(l2) for (l1, l2) in test_keys: features.store(l2) model = RRBM(lex_col.N, 64, len(features), [], C=0.1) # training data for (l1, l2) in train_keys: embedding = E_dict[l2] if l2 in E_dict else zero characters = np.zeros((len(features))) for i in features[l2]: characters[i] = 1.0 train.append((lex_col[lang1][l1], lex_col[lang2][l2], embedding, characters)) # test data