def start_test_CART(train_len, test_len): list = make_list_gender(train_len) from converter import Converter converter = Converter() [X, y] = get_X_y(list, converter) print("len list", len(list)) print("len converter", len(converter.mass)) model = DecisionTreeClassifier() # print("clf.fit start") problem = model.fit(X, y) # print("clf.fit finish ") # Проверка на новых данных test_list = make_list_gender(-test_len) print("len test_list", len(test_list)) [test_X, test_y] = get_X_y(test_list, converter) return test(test_X, test_y, model)
def start_test_SVM(train_len, test_len): list = make_list_gender(train_len, words_separator2) from converter import Converter converter = Converter() [X, y] = get_X_y(list, converter) print("len list", len(list)) print("len converter", len(converter.mass)) clf = SVC() # print("clf.fit start") problem = clf.fit(X, y) # print("clf.fit finish ") # Проверка на новых данных test_list = make_list_gender(-test_len, words_separator2) print("len test_list", len(test_list)) [test_X, test_y] = get_X_y(test_list, converter) return test(test_X, test_y, clf)
def save_new_SVC_model(train_len): list = make_list_gender(train_len) from converter import Converter converter = Converter() [X, y] = get_X_y(list, converter) print("len train list", len(list)) print("len converter", len(converter.mass)) model = Classifier() # print("clf.fit start") model.fit(X, y) joblib.dump(model, 'CART.pkl') # сохраняем данные
import numpy as np from read_groups import make_list_gender from sklearn.feature_extraction.text import TfidfVectorizer from sklearn import grid_search from sklearn.svm import SVC vectorizer = TfidfVectorizer() corpus = [ "dfgdfg", "ac", ] list = make_list_gender(3) new_list = [] large_list = [] from converter import Converter converter = Converter() # new_list = list[:10] + list[-10:] # list = list[:50000] + list[-50000:] #слишком много в листе, уменьшим np.random.shuffle(list) # print("list", list) for m in list: large_list = large_list + [] large_list = large_list + [[ #converter.convert(m[0]), converter.convert(m[1]),