def __train_models(self):
     # Now load all sentences from specific domain, and train TFIDF model and NGramPerplexity model.
     self.ngp = NGramPerplexity()
     self.tfidf = TFIDF()
     print("Training models from specific corpora")
     for file in os.listdir(self.input_dir):
         print("Training models from specific corpora: " + file)
         with open(self.input_dir + "/" + file, encoding="utf-8") as input:
             for line in input:
                 words = WordExtractor.get_words(line)
                 if len(words) == 0:
                     continue
                 self.sentences.append(words)
                 self.ngp.train_from_text(words)
                 self.tfidf.train_from_text(words)
from ngramperplexity import NGramPerplexity
from wordextractor import WordExtractor

NGramPerplexity.ngram_size = 3
ngp = NGramPerplexity()
ngp.train_from_text(
    WordExtractor.get_words("There are so many people at the beach"))
ngp.train_from_text(
    WordExtractor.get_words(
        "The beach is so crowded with all these people, I wish they would just go to another beach"
    ))
ngp.train_from_text(
    WordExtractor.get_words(
        "It is summer and a great day to go to the beach."))
ngp.train_from_text(
    WordExtractor.get_words(
        "Let's go to the beach and enjoy the great weather we've got today."))
ngp.train_from_text(
    WordExtractor.get_words(
        "I think the first thing I will do at the beach is to buy an ice cream."
    ))
ngp.train_from_text(
    WordExtractor.get_words(
        "There's many people at the beach today, I think they are enjoying their holidays."
    ))
ngp.train_from_text(
    WordExtractor.get_words(
        "I think something is going on at the beach right now, there are literally people everywhere."
    ))

#sentences with great similarity