f = open(fasttext_file, 'r', encoding='utf8') lines = f.readlines()[1:] f.close() for line in lines: values = line.split() word = values[0] value = np.asarray(values[1:], dtype='float32') embeddings[word] = value print('Loaded %s word vectors.' % len(embeddings)) print(embeddings['man']) print(embeddings['woman']) print(embeddings['guy']) print(embeddings['boy']) word_vectors = list(embeddings.values())[100:150] labels = list(embeddings.keys())[100:150] kutils.viz_vectors(word_vectors, labels) kutils.viz_vectors_corr(word_vectors, labels) kutils.viz_vectors_lower_dim(word_vectors, labels) word_vectors = [ embeddings['man'], embeddings['woman'], embeddings['boy'], embeddings['guy'], embeddings['cat'], embeddings['dog'] ] labels = ['man', 'woman', 'boy', 'guy', 'cat', 'dog'] kutils.viz_vectors(word_vectors, labels) kutils.viz_vectors_corr(word_vectors, labels) kutils.viz_vectors_lower_dim(word_vectors, labels)
#build model input = Input(shape=(X_train.shape[1], )) inner = Embedding(input_dim=vocab_size, output_dim=word_embed_size, input_length=seq_maxlen, weights=[emb_layer_weights], trainable=False)(input) lstm1 = LSTM(200, return_sequences=True)(inner) features = LSTM(100, return_sequences=False)(lstm1) model = Model(inputs=input, outputs=features) print(model.summary()) feature_vectors = model.predict(X_train) print(feature_vectors) #visualize sentence vectors kutils.viz_vectors(feature_vectors, data) kutils.viz_vectors_corr(feature_vectors, data) kutils.viz_vectors_lower_dim(feature_vectors, data) #visualize layer activations act = kutils.get_activations(model, X_train[0:1]) # with just one sample. kutils.display_activations(act, directory=os.path.join("G:/", 'digit_activations'), save=True) kutils.display_heatmaps(act, tmp[0:1], directory=os.path.join("G:/", 'digit_heatmaps'), save=True)