len(vectorizer.get_feature_names())

# In[375]:

max_epochs = 100
alpha = 0.025

model = Doc2Vec(alpha=alpha,
                vector_size=20,
                min_alpha=0.00025,
                min_count=1,
                dm=1,
                epoch=max_epochs,
                workers=multiprocessing.cpu_count())

model.build_vocab([x for x in tqdm_notebook(tagged_data)])

for epoch in tqdm_notebook(range(max_epochs)):
    model.train(tagged_data,
                total_examples=model.corpus_count,
                epochs=model.iter)
    # decrease the learning rate
    model.alpha -= 0.0002
    # fix the learning rate, no decay
    model.min_alpha = model.alpha

model.save("d2v.model")
print("Model Saved")

# In[376]: