def write_word_vectors(model, weights_name, path): w = word_vectors.WordVectors(load=True) w.vectors = np.asarray(pairwise_models.get_weights(model, weights_name)[0]) write_vectors(w, path + 'vectors_learned') w = word_vectors.WordVectors(keep_all_words=True) write_vectors(w, path + 'vectors_pretrained_all')
def write_document_vectors(): vectors = word_vectors.WordVectors(load=True) for dataset_name in ["train", "dev", "test"]: print "Building document vectors for", dataset_name doc_vectors = {} for d in docs(dataset_name): sentences = {} did = None for mention_num in sorted(d["mentions"].keys(), key=int): m = d["mentions"][mention_num] did = m["doc_id"] if m['sent_num'] not in sentences: sentences[m['sent_num']] = m['sentence'] v = np.zeros(vectors.vectors[0].size) n = 0 for s in sentences.values(): for w in s: v += vectors.vectors[vectors[w]] n += 1 doc_vectors[did] = v / n utils.write_pickle( doc_vectors, directories.MISC + dataset_name + "_document_vectors.pkl")
def main(): write_feature_names() write_genres() write_words() word_vectors.WordVectors().write(directories.RELEVANT_VECTORS) write_document_vectors()