def Classify(X, Y, cls, rep, k=5000): # Start moment Start_moment = time.time() title = 'Classificando com {} e {} k={}'.format(cls, rep, k) print(title) # Creating the K-fold cross validator if 'w2v' in rep: train_x = load(open('w2v_rep/{}_train_x.pkl'.format(rep), 'rb')) train_y = load(open('w2v_rep/{}_train_y.pkl'.format(rep), 'rb')) test_x = load(open('w2v_rep/{}_test_x.pkl'.format(rep), 'rb')) test_y = load(open('w2v_rep/{}_test_y.pkl'.format(rep), 'rb')) else: X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=123, stratify=Y) train_x, train_y, test_x, test_y = Representations( ).get_representation(rep=rep, train_x=X_train, train_y=y_train, test_x=X_test, test_y=y_test, k=k, cat=None) sm = SMOTE(sampling_strategy='minority', random_state=None) train_x, train_y = sm.fit_sample(train_x, train_y) # dump(train_x, open('w2v_rep/{}_train_x.pkl'.format(rep), 'wb')) # dump(train_y, open('w2v_rep/{}_train_y.pkl'.format(rep), 'wb')) # dump(test_x, open('w2v_rep/{}_test_x.pkl'.format(rep), 'wb')) # dump(test_y, open('w2v_rep/{}_test_y.pkl'.format(rep), 'wb')) # return classifier = Models().get_classifier(cls) classifier.fit(train_x, train_y) # Train_Classifier(classifier, X_train, Y_train) pred = classifier.predict(test_x) # report = classification_report(test_labels, test_pred, target_names=['Contrário', 'Favorável'] if plb =='polaridade' else ['neutro', 'opiniao']) report = classification_report(test_y, pred, target_names=['no', 'yes']) print(report) Finish_moment = time.time() tm = "It took " + str((Finish_moment - Start_moment)) + " seconds" print(tm)
def Classify(X, Y, cls, rep, k=5000): # Start moment Start_moment = time.time() title = 'Classificando com {} e {} k={}'.format(cls, rep, k) print(title) # Creating the K-fold cross validator K_fold = KFold(n_splits=10, shuffle=True) # Labels test_labels = np.array([], 'int32') test_pred = np.array([], 'int32') # Confusion Matrix confusion = np.array([[0, 0], [0, 0]]) # The test for train_indices, test_indices in K_fold.split(X): print('Running .... =)') X_train = [X[i] for i in train_indices] Y_train = [Y[i] for i in train_indices] X_test = [X[i] for i in test_indices] Y_test = [Y[i] for i in test_indices] train_x, train_y, test_x, test_y = Representations( ).get_representation(rep=rep, train_x=X_train, train_y=Y_train, test_x=X_test, test_y=Y_test, k=k, cat=None) # c = Counter(Y_train) # print(Counter(train_y)) # print({1:c.most_common(1)[0][1], 0:c.most_common(1)[0][1], 2:c.most_common(1)[0][1]}) sm = SMOTE(sampling_strategy='minority', random_state=None) # sm = SMOTE(sampling_strategy={1:c.most_common(1)[0][1], 0:c.most_common(1)[0][1], 2:c.most_common(1)[0][1]}, random_state=None) # print(len(train_y)) train_x, train_y = sm.fit_sample(train_x, train_y) # print(Counter(train_y)) test_labels = np.append(test_labels, Y_test) classifier = Models().get_classifier(cls) classifier.fit(train_x, train_y) # Train_Classifier(classifier, X_train, Y_train) pred = classifier.predict(test_x) test_pred = np.append(test_pred, pred) # print(test_y) # print(pred) confusion += confusion_matrix(test_y, pred) # report = classification_report(test_labels, test_pred, target_names=['Contrário', 'Favorável'] if plb =='polaridade' else ['neutro', 'opiniao']) report = classification_report(test_labels, test_pred, target_names=['no', 'yes']) print(report) print("Confusion matrix:") print(confusion) Finish_moment = time.time() tm = "It took " + str((Finish_moment - Start_moment)) + " seconds" print(tm)