num_epochs = 50 #model parameters num_filters = 64 embed_dim = 100 weight_decay = 1e-4 #embedding matrix print('preparing embedding matrix...') words_not_found = [] nb_words = min(MAX_NB_WORDS, len(word_index) + 1) embedding_matrix = np.zeros((nb_words, embed_dim)) for word, i in tqdm(word_index.items()): if i >= nb_words: continue embedding_vector = rep_reader.get_word_rep(index_name, word) if (embedding_vector is not None) and len(embedding_vector) > 0: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector else: words_not_found.append(word) print('number of null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0)) #print("words in document not found in the index : ", np.random.choice(words_not_found, 10)) # Model 1 CNN model = Sequential() model.add( Embedding(nb_words,
else: raise ValueError( "You must specify either kerasFile or esIndex. Neither specified.") sd = SpreadsheetData(args.inFile, args.textColumn, args.labelColumn, args.testSize, args.randomizeTestSet) # embedding matrix print('preparing embedding matrix...') words_not_found = [] nb_words = min(sd.MAX_NB_WORDS, len(sd.word_index) + 1) embed_dim = rep_reader.rep_shape[0] embedding_matrix = np.zeros((nb_words, embed_dim)) for word, i in tqdm(sd.word_index.items()): if i >= nb_words: continue embedding_vector = rep_reader.get_word_rep(args.esIndex, word) if (embedding_vector is not None) and len(embedding_vector) > 0: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector else: words_not_found.append(word) print('number of null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0)) run = SpreadsheetClassificationExecution(sd, embedding_matrix, "SuperSimpleLSTMClassifier", args.kerasFile) print("Accuracy:%f" % run.accuracy)