예제 #1
0
def write_word_vectors(model, weights_name, path):
    w = word_vectors.WordVectors(load=True)
    w.vectors = np.asarray(pairwise_models.get_weights(model, weights_name)[0])
    write_vectors(w, path + 'vectors_learned')

    w = word_vectors.WordVectors(keep_all_words=True)
    write_vectors(w, path + 'vectors_pretrained_all')
예제 #2
0
def write_document_vectors():
    vectors = word_vectors.WordVectors(load=True)
    for dataset_name in ["train", "dev", "test"]:
        print "Building document vectors for", dataset_name
        doc_vectors = {}
        for d in docs(dataset_name):
            sentences = {}
            did = None
            for mention_num in sorted(d["mentions"].keys(), key=int):
                m = d["mentions"][mention_num]
                did = m["doc_id"]
                if m['sent_num'] not in sentences:
                    sentences[m['sent_num']] = m['sentence']

            v = np.zeros(vectors.vectors[0].size)
            n = 0
            for s in sentences.values():
                for w in s:
                    v += vectors.vectors[vectors[w]]
                    n += 1
            doc_vectors[did] = v / n
        utils.write_pickle(
            doc_vectors,
            directories.MISC + dataset_name + "_document_vectors.pkl")
예제 #3
0
def main():
    write_feature_names()
    write_genres()
    write_words()
    word_vectors.WordVectors().write(directories.RELEVANT_VECTORS)
    write_document_vectors()