Exemplo n.º 1
0
data_target = data['Label']

kfold = KFold(data_tweet, data_target, 10)
data_train, data_test = kfold.get_data_sequence()
i = 0
print("kfold")
print(time.time() - start)
start = time.time()

prepro = Preprocessing()
cleaned_data, terms, asd = prepro.preprocessing(data_train[i]["tweet"])
print("preprocessing")
print(time.time() - start)
start = time.time()

tbrs = TermBasedRandomSampling(X=10, Y=10, L=40)
stopwords = tbrs.create_stopwords(cleaned_data, terms)

print("remove stopword")
print(time.time() - start)
start = time.time()

prepro2 = Preprocessing()
new_cleaned_data, new_terms, removed_words = prepro2.remove_stopword(
    cleaned_data, stopwords)

print("create stopword")
print(time.time() - start)
start = time.time()

weight = Weighting(new_cleaned_data, new_terms)
Exemplo n.º 2
0
            for i in range(9):
                x_array.append(" ")
                y_array.append(" ")
                l_array.append(" ")                

            accuracy_total_accumulation = 0

            for i in range(len(data_train)):
                kfold_per_combination.append(i+1)
                y_test = []
                y_pred = []

                prepro = Preprocessing()
                cleaned_data, terms = prepro.preprocessing(data_train[i]["tweet"])
                
                tbrs = TermBasedRandomSampling(X=x, Y=y, L=l)
                stopwords = tbrs.create_stopwords(cleaned_data,terms)

                prepro2 = Preprocessing()
                new_cleaned_data, new_terms = prepro2.remove_stopword(cleaned_data, stopwords)

                weight = Weighting(new_cleaned_data, new_terms)
                tfidf = weight.get_tf_idf_weighting()
                idf = weight.get_idf()

                nb = NBMultinomial()
                nb.fit(new_cleaned_data,new_terms,data_train[i]["target"],stopwords,idf,tfidf)
                
                correct_ans = 0
                for j in range(len(data_test[i]["tweet"])):
                    prediction = nb.predict(data_test[i]["tweet"][j],data_test[i]["target"][j])