def cnn_fold(k, path_to_json, path_to_img, epochs=10, img_size=(28, 28), verbose=False): kfold = KFold(k, path_to_json, path_to_img) stats = [None] * 5 for i in xrange(k): print '{}: Fold {} of {}'.format(datetime.now(), i + 1, k) train_df, test_df = kfold.get_datasets(i) train_set = DatasetImages(train_df, img_size) train_set.oversample() test_set = DatasetImages(test_df, img_size) model = ModelLipnet4(verbose=True) model.fit(train_set=train_set, test_set=None, nb_epoch=epochs) stats[i] = model.evaluate(test_set) return stats
def svm_folds(k, path_to_json): kfold = KFold(k, path_to_json, '') stats = [None] * k for i in xrange(k): print '{}: Fold {} of {}'.format(datetime.now(), i + 1, k) # get train and test dataframes train_df, test_df = kfold.get_datasets(i) # create train and test datasets test_set = DatasetVironovaSVM(train_df, do_oversampling=False) train_set = DatasetVironovaSVM(train_df, do_oversampling=False) # get confusion matrix from SVM model cf = svm.svm(train_set, test_set) stats[i] = cf return stats
from tbrs import TermBasedRandomSampling from preprocessing2 import Preprocessing from naivebayes import NBMultinomial from weighting import Weighting from kfold import KFold from confusionmatrix import ConfusionMatrix import time start = time.time() data = pd.read_excel(r'C:\Users\PPATK\Desktop\Code 2\Code\Skripsi.xlsx', "Data Coding") data_tweet = data['Tweet'] data_target = data['Label'] kfold = KFold(data_tweet, data_target, 10) data_train, data_test = kfold.get_data_sequence() i = 0 print("kfold") print(time.time() - start) start = time.time() prepro = Preprocessing() cleaned_data, terms, asd = prepro.preprocessing(data_train[i]["tweet"]) print("preprocessing") print(time.time() - start) start = time.time() tbrs = TermBasedRandomSampling(X=10, Y=10, L=40) stopwords = tbrs.create_stopwords(cleaned_data, terms)