Exemplo n.º 1
0
import os
import pickle
import mysql.connector

from sklearn.externals import joblib
from gensim.models import Word2Vec

from src.utils.LoopTimer import LoopTimer
from src.utils.selector import select_path_from_dir

path_to_db = "/media/norpheo/mySQL/db/ssorc"
path_to_mllr_model = os.path.join(path_to_db, 'models', 'mllr.joblib')
path_to_mlsvc_model = os.path.join(path_to_db, 'models', 'mlsvc.joblib')
path_to_word2vec_model = select_path_from_dir(os.path.join(
    path_to_db, 'models'),
                                              phrase="Select w2v-model: ",
                                              suffix=".model")

mllr = joblib.load(path_to_mllr_model)
w2v_model = Word2Vec.load(path_to_word2vec_model)
w2v = {w: vec for w, vec in zip(w2v_model.wv.index2word, w2v_model.wv.syn0)}
path_to_feature_file = select_path_from_dir(os.path.join(
    path_to_db, 'features'),
                                            phrase="Select Feature File: ",
                                            suffix='.pickle')

connection = mysql.connector.connect(
    host="localhost",
    user="******",
    passwd="thesis",
)
Exemplo n.º 2
0
from sklearn.model_selection import ShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn import svm

from src.utils.selector import select_path_from_dir

ftype = 'w2v'

path_to_db = "/media/norpheo/mySQL/db/ssorc"
path_to_mllr_model = os.path.join(path_to_db, 'models', 'mllr.joblib')
path_to_mlsvc_model = os.path.join(path_to_db, 'models', 'mlsvc.joblib')

if ftype == 'bow':
    path_to_feature_file = select_path_from_dir(os.path.join(
        path_to_db, 'features'),
                                                phrase="Select Feature File: ",
                                                suffix='.npz')
    path_to_target_file = select_path_from_dir(os.path.join(
        path_to_db, 'features'),
                                               phrase="Select Target File: ",
                                               suffix='_targets.npy')
    all_features = scipy.sparse.load_npz(path_to_feature_file)
    all_targets = np.load(path_to_target_file)
elif ftype == 'w2v':
    path_to_feature_file = select_path_from_dir(os.path.join(
        path_to_db, 'features'),
                                                phrase="Select Feature File: ",
                                                suffix='.pickle')
    path_to_target_file = select_path_from_dir(os.path.join(
        path_to_db, 'features'),
                                               phrase="Select Target File: ",
Exemplo n.º 3
0
import gensim
import scipy.sparse
import numpy as np
import mysql.connector

import src.utils.corpora as corpora
from src.utils.LoopTimer import LoopTimer
from src.utils.selector import select_path_from_dir

feature_file_name = 'lr_MLclassifier_bow_features'

token_type = 'originalText'

path_to_db = "/media/norpheo/mySQL/db/ssorc"
dic_path = select_path_from_dir(os.path.join(path_to_db, "dictionaries"),
                                phrase="Select Dictionary: ")
tfidf_path = select_path_from_dir(os.path.join(path_to_db, "models"),
                                  phrase="Select TFIDF Model: ",
                                  suffix=".tfidf")

path_to_feature_file = os.path.join(path_to_db, 'features', f"{feature_file_name}.npz")
path_to_target_file = os.path.join(path_to_db, 'features', f"{feature_file_name}_targets.npy")

print('Load Dictionary')
dictionary = gensim.corpora.Dictionary.load(dic_path)
print('Load TFIDF')
tfidf = gensim.models.TfidfModel.load(tfidf_path)

connection = mysql.connector.connect(
            host="localhost",
            user="******",
Exemplo n.º 4
0
import pickle

import mysql.connector
import numpy as np

from gensim.models import Word2Vec

from src.utils.corpora import TokenDocStream
from src.features.transformations import tokens_to_mean_w2v
from src.utils.selector import select_path_from_dir

feature_file_name = 'lr_MLclassifier_w2v_features'

path_to_db = "/media/norpheo/mySQL/db/ssorc"
path_to_word2vec_model = select_path_from_dir(os.path.join(
    path_to_db, 'models'),
                                              phrase="Select w2v-model: ",
                                              suffix=".model")
path_to_feature_file = os.path.join(path_to_db, 'features',
                                    feature_file_name + '.pickle')
path_to_target_file = os.path.join(path_to_db, 'features',
                                   feature_file_name + '_targets.pickle')

connection = mysql.connector.connect(
    host="localhost",
    user="******",
    passwd="thesis",
)

cursor = connection.cursor()
cursor.execute("USE ssorc;")
sq1 = f"SELECT abstract_id, label FROM ml_topics_training"