Пример #1
0
def start_test_CART(train_len, test_len):

    list = make_list_gender(train_len)
    from converter import Converter
    converter = Converter()
    [X, y] = get_X_y(list, converter)
    print("len list", len(list))
    print("len converter", len(converter.mass))

    model = DecisionTreeClassifier()

    # print("clf.fit start")

    problem = model.fit(X, y)

    # print("clf.fit finish ")

    # Проверка на новых данных

    test_list = make_list_gender(-test_len)
    print("len test_list", len(test_list))

    [test_X, test_y] = get_X_y(test_list, converter)

    return test(test_X, test_y, model)
Пример #2
0
def start_test_SVM(train_len, test_len):

    list = make_list_gender(train_len, words_separator2)
    from converter import Converter
    converter = Converter()
    [X, y] = get_X_y(list, converter)
    print("len list", len(list))
    print("len converter", len(converter.mass))

    clf = SVC()
    # print("clf.fit start")
    problem = clf.fit(X, y)
    # print("clf.fit finish ")
    # Проверка на новых данных
    test_list = make_list_gender(-test_len, words_separator2)
    print("len test_list", len(test_list))

    [test_X, test_y] = get_X_y(test_list, converter)

    return test(test_X, test_y, clf)
Пример #3
0
def save_new_SVC_model(train_len):

    list = make_list_gender(train_len)
    from converter import Converter
    converter = Converter()
    [X, y] = get_X_y(list, converter)

    print("len train list", len(list))
    print("len converter", len(converter.mass))

    model = Classifier()

    # print("clf.fit start")

    model.fit(X, y)

    joblib.dump(model, 'CART.pkl')  # сохраняем данные
Пример #4
0
import numpy as np
from read_groups import make_list_gender
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import grid_search

from sklearn.svm import SVC

vectorizer = TfidfVectorizer()

corpus = [
    "dfgdfg",
    "ac",
]

list = make_list_gender(3)

new_list = []
large_list = []
from converter import Converter

converter = Converter()
# new_list = list[:10] + list[-10:]

# list = list[:50000] + list[-50000:]  #слишком много в листе, уменьшим
np.random.shuffle(list)
# print("list", list)

for m in list:
    large_list = large_list + []
    large_list = large_list + [[  #converter.convert(m[0]),
        converter.convert(m[1]),