def __init__(self, sentences, filename=None): # model parameters self.sentences = sentences self.dataset = "CASEREPORT" self.tokenizer = "RAW" self.prune_stopwords = stopwords("pubmed") self.phrases = None self.threshold = 250 self.decay = 2 self.bigram_iter = 3 # data file path models_folder = os.path.join(*[os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'models']) if filename is None: filename = "PHRASE_%s_%s_%s_%s" % (self.threshold, self.decay, self.dataset, self.tokenizer, ) self.filepath = os.path.join(models_folder, filename) # does identical model already exists? model_exists = os.path.isfile(self.filepath) if model_exists: logging.info("LOADING - loading phrase data..") self.phrases = Phrases.load(self.filepath) else: logging.info("CREATE - creating phrase data..") self.build()
def __init__(self): self.stopwords = stopwords("pubmed_v3")