returns: model: a dictionary mapping words to word-vectors (embeddings). """ if word2vec_format: return gensim.models.KeyedVectors.load_word2vec_format(filepath, binary=True) else: #own pretrained model return gensim.models.Word2Vec.load(filepath) if __name__ == "__main__": ### load data: trainpath = 'train_data/train_data.json' testpath = 'test_data/test_data.json' traindata = dp.loadfile(trainpath) inc_categories = [ 'cond-mat.mes-hall', 'cond-mat.mtrl-sci', 'cond-mat.stat-mech', 'cond-mat.str-el', 'cond-mat.supr-con', 'cond-mat.soft', 'quant-ph', 'cond-mat.dis-nn', 'cond-mat.quant-gas', 'hep-th' ] # train_X, train_y = dp.generate_Xy_data_categories(traindata, inc_categories, ignore_others=True, shuffle_seed=0, ydatatype='onehot', clean_x=True, keep_latex_tags=True)
returns: the predicted labels or probabilities of docs """ probabilities = self.model.predict(X_ints) if return_probabilities: return probabilities else: return np.round(probabilities) if __name__ == "__main__": ### load data: trainpath = 'train_data/train_data.json' testpath = 'test_data/test_data.json' traindata, testdata = dp.loadfile(trainpath), dp.loadfile(testpath) inc_categories = [ 'cond-mat.mes-hall', 'cond-mat.mtrl-sci', 'cond-mat.stat-mech', 'cond-mat.str-el', 'cond-mat.supr-con', 'cond-mat.soft', 'quant-ph', 'cond-mat.dis-nn', 'cond-mat.quant-gas', 'hep-th' ] train_X, train_y = dp.generate_Xy_data_categories(traindata, inc_categories, ignore_others=True, shuffle_seed=0, ydatatype='onehot', clean_x=True, keep_latex_tags=True) test_X, test_y = dp.generate_Xy_data_categories(testdata,