def save_dict(self): """Saves the updated dictionary to file """ file_name = "dictionary.gensim" file_path = get_training_file_path(file_name) self._dictionary.save(file_path)
def file_path(self): """str: the name of the model's file. It is of the form lda-num-topics.model """ file_name = "lda-{num_topics}.model".format( num_topics=self._num_topics) return get_training_file_path(file_name)
def get_file_path(): """ Returns: str: the path to the file containing the corpus' data. The file name is 'lda-corpus.dat'. """ file_name = "lda-corpus.dat" return get_training_file_path(file_name)
def get_file_path(): """ Returns: str: the path to the file containing the preprocessed data. The file name is 'tf-idf-corpus.dat'. """ file_name = "tf-idf-corpus.dat" return get_training_file_path(file_name)
def load_model(): """This function loads a tf-idf model Returns: a tf-idf model (TFIDFVectorizer) """ file_name = get_training_file_path('tf-idf.model') with open(file_name, "rb") as model_file: model = pickle.load(model_file) return model
def load_dictionary(): """This function is used to load a gensim dictionary from the models folder. Returns: :obj:`gensim.corpora.dictionary.Dictionary`: The dictionary found in ucla_topic_analysis/model/dictionary.gensim or None if there was no dictionary. """ file_name = "dictionary.gensim" file_path = get_training_file_path(file_name) if os.path.isfile(file_path): return Dictionary.load(file_path) return None
async def coroutine(self, data): """This function dictionaries to a json file for using in LightTag data sets. Args: data (:obj:`dict`): A dictionary containing data that needs to be tagged """ file_path = get_training_file_path("LightTag-dataset.json") is_new_file = create_file(file_path, "[\n]") data_string = json.dumps(data) prefix = "\n" if is_new_file else ",\n" insertion_string = "{0}{1}".format(prefix, data_string) with open(file_path, "r+") as json_file: json_file.seek(0, 2) position = json_file.tell() - 2 insert(insertion_string, json_file, position)
def get_file_path(): """str: the name of the model's file. It is of the form tf-idf.model """ file_name = "tf-idf.model" return get_training_file_path(file_name)