import os import pickle import mysql.connector from sklearn.externals import joblib from gensim.models import Word2Vec from src.utils.LoopTimer import LoopTimer from src.utils.selector import select_path_from_dir path_to_db = "/media/norpheo/mySQL/db/ssorc" path_to_mllr_model = os.path.join(path_to_db, 'models', 'mllr.joblib') path_to_mlsvc_model = os.path.join(path_to_db, 'models', 'mlsvc.joblib') path_to_word2vec_model = select_path_from_dir(os.path.join( path_to_db, 'models'), phrase="Select w2v-model: ", suffix=".model") mllr = joblib.load(path_to_mllr_model) w2v_model = Word2Vec.load(path_to_word2vec_model) w2v = {w: vec for w, vec in zip(w2v_model.wv.index2word, w2v_model.wv.syn0)} path_to_feature_file = select_path_from_dir(os.path.join( path_to_db, 'features'), phrase="Select Feature File: ", suffix='.pickle') connection = mysql.connector.connect( host="localhost", user="******", passwd="thesis", )
from sklearn.model_selection import ShuffleSplit from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix from sklearn import svm from src.utils.selector import select_path_from_dir ftype = 'w2v' path_to_db = "/media/norpheo/mySQL/db/ssorc" path_to_mllr_model = os.path.join(path_to_db, 'models', 'mllr.joblib') path_to_mlsvc_model = os.path.join(path_to_db, 'models', 'mlsvc.joblib') if ftype == 'bow': path_to_feature_file = select_path_from_dir(os.path.join( path_to_db, 'features'), phrase="Select Feature File: ", suffix='.npz') path_to_target_file = select_path_from_dir(os.path.join( path_to_db, 'features'), phrase="Select Target File: ", suffix='_targets.npy') all_features = scipy.sparse.load_npz(path_to_feature_file) all_targets = np.load(path_to_target_file) elif ftype == 'w2v': path_to_feature_file = select_path_from_dir(os.path.join( path_to_db, 'features'), phrase="Select Feature File: ", suffix='.pickle') path_to_target_file = select_path_from_dir(os.path.join( path_to_db, 'features'), phrase="Select Target File: ",
import gensim import scipy.sparse import numpy as np import mysql.connector import src.utils.corpora as corpora from src.utils.LoopTimer import LoopTimer from src.utils.selector import select_path_from_dir feature_file_name = 'lr_MLclassifier_bow_features' token_type = 'originalText' path_to_db = "/media/norpheo/mySQL/db/ssorc" dic_path = select_path_from_dir(os.path.join(path_to_db, "dictionaries"), phrase="Select Dictionary: ") tfidf_path = select_path_from_dir(os.path.join(path_to_db, "models"), phrase="Select TFIDF Model: ", suffix=".tfidf") path_to_feature_file = os.path.join(path_to_db, 'features', f"{feature_file_name}.npz") path_to_target_file = os.path.join(path_to_db, 'features', f"{feature_file_name}_targets.npy") print('Load Dictionary') dictionary = gensim.corpora.Dictionary.load(dic_path) print('Load TFIDF') tfidf = gensim.models.TfidfModel.load(tfidf_path) connection = mysql.connector.connect( host="localhost", user="******",
import pickle import mysql.connector import numpy as np from gensim.models import Word2Vec from src.utils.corpora import TokenDocStream from src.features.transformations import tokens_to_mean_w2v from src.utils.selector import select_path_from_dir feature_file_name = 'lr_MLclassifier_w2v_features' path_to_db = "/media/norpheo/mySQL/db/ssorc" path_to_word2vec_model = select_path_from_dir(os.path.join( path_to_db, 'models'), phrase="Select w2v-model: ", suffix=".model") path_to_feature_file = os.path.join(path_to_db, 'features', feature_file_name + '.pickle') path_to_target_file = os.path.join(path_to_db, 'features', feature_file_name + '_targets.pickle') connection = mysql.connector.connect( host="localhost", user="******", passwd="thesis", ) cursor = connection.cursor() cursor.execute("USE ssorc;") sq1 = f"SELECT abstract_id, label FROM ml_topics_training"