Пример #1
0
from src.preprocessing.transformator import get_df
from src.evaluation.compare import compare_classifiers
from src.classifier.sklearn import pipelines
from src.data_retrieval.helpers import database
import pandas as pd


def warn(*args, **kwargs):
    pass


warnings.warn = warn

db = database.MongoDB()

df = get_df(list(db.get_articles()))

# models
feature_sets = [
    'bg_bert', 'bg_xlm', 'bg_styl', 'bg_lsa', 'en_use', 'en_nela', 'en_bert',
    'en_elmo'
]
features = [
    ('top_1', ['bg_lsa_title', 'bg_lsa_text']),
    ('top_2', ['bg_lsa_title', 'bg_lsa_text', 'en_elmo_title',
               'en_elmo_text']),
    ('top_3', [
        'bg_lsa_title', 'bg_lsa_text', 'en_elmo_title', 'en_elmo_text',
        'en_use_title', 'en_use_text'
    ]),
    ('top_4', [
Пример #2
0
import numpy as np
import pandas as pd

from src.classifier.sklearn import pipelines
from src.evaluation.compare import compare_classifiers
from src.preprocessing.transformator import get_df
from sklearn.model_selection import cross_val_predict, GridSearchCV

from sklearn.linear_model import LogisticRegression

#db = database.MongoDB()

articles = list(db.get_articles())
df = get_df(articles)

clf = LogisticRegression()

feature_sets = [
    #'bg_bert',
    'bg_xlm',
    # 'bg_styl',
    # 'bg_lsa',
    # 'en_use',
    # 'en_nela',
    # 'en_bert',
    # 'en_elmo'
]

all_feats = []
for feature_set in feature_sets:
    all_feats.append(feature_set + '_title')