Exemplo n.º 1
0
def executar(experimento, nome_Base, acento):
    nomeBase = nome_Base
    path = experimento + nomeBase
    print('executando:\n' + path)
    print('Sem acento:\n' + ('Sim' if (acento) else 'Não'))

    base = readBase(nomeBase)

    tamBase = len(base)
    i = 0
    documents = []
    #print base[0][0].split()
    tknzr = nltk.tokenize.TweetTokenizer()

    while (i < tamBase):
        if (acento):
            w = remocaoacento(tknzr.tokenize(base[i][0]))
        else:
            w = tknzr.tokenize(base[i][0])
        w = remocaopontos(w)
        conteudoLista = (w, base[i][1])
        documents.append(conteudoLista)
        i += 1

    ################################ Pre Processamento
    stopwords = nltk.corpus.stopwords.words('portuguese')

    stemmer = nltk.stem.RSLPStemmer()

    #h=0
    #j=len(documents)
    #while (h<j):
    #    g=len(documents[h][0])
    #    f=0
    #    while(f<g):
    #        stemmer.stem(documents[h][0][f])
    #        f+=1
    #    h += 1

    ################################

    random.shuffle(documents)

    all_words = []

    k = 0
    l = len(documents)
    while (k < l):
        m = len(documents[k][0])
        n = 0
        while (n < m):
            all_words.append(documents[k][0][n])
            n += 1
        k += 1
    print(str(len(all_words)))

    all_words = [w.lower() for w in all_words if w not in stopwords]

    all_words = [stemmer.stem(w) for w in all_words]
    # all_words = remocaopontos(all_words)

    #all_words = nltk.FreqDist(all_words) #calcula frequencia de palavras, definir o limite de palavras
    #all_words = nltk.LaplaceProbDist(nltk.FreqDist(all_words))
    #all_words = nltk.SimpleGoodTuringProbDist(nltk.FreqDist(all_words))
    #all_words = nltk.LidstoneProbDist(nltk.FreqDist(all_words), 0.1)
    #all_words = nltk.WittenBellProbDist(nltk.FreqDist(all_words))
    #nltk.WittenBellProbDist() procurar como mudar o ngram
    #all_words = nltk.MLEProbDist(nltk.FreqDist(all_words))
    #all_words = nltk.SimpleGoodTuringProbDist(nltk.FreqDist(w.lower() for w in all_words if w not in stopwords))

    #all_words = nltk.LidstoneProbDist(nltk.FreqDist(w.lower() for w in all_words if w not in stopwords), 0.1)

    ##all_words = nltk.FreqDist(nltk.FreqDist(w.lower() for w in all_words if w not in stopwords))
    all_words = nltk.FreqDist(nltk.FreqDist(all_words))

    #word_features = list(all_words.samples()) #se usando FreqDistlista com palavras que aparecem mais de 3000
    '''aqui que modifiquei
    word_features = list(all_words.keys())

    def find_features(document):
        words = set(document)
        features = {}
        for w in word_features:
            features[w] = (w in words)

        return features
    '''

    print(str(len(all_words.keys())))

    def wordbig(word_feature):
        words = []
        i = 0
        l = len(word_feature) - 1
        while (i < l):
            words.append(tuple([word_feature[i], word_feature[i + 1]]))
            i += 1
        return words

    def removerpalavras(todas_palavras, document):
        #remover as palavras que não estãoem todas as palavras
        linha = []
        for w in document:
            if (w in todas_palavras):
                linha.append(w)
        return linha

    def wordFeature(documents):
        #cria um dicionario de dados
        dicionario = []
        for w in documents:
            for q in w[0]:
                if (not q in dicionario):
                    dicionario.append(q)
        return dicionario

    documents = [[removerpalavras(all_words.keys(), w[0]), w[1]]
                 for w in documents]
    documents = [[wordbig(w[0]), w[1]] for w in documents]
    word_features = wordFeature(documents)

    def find_features(document):
        features = {}
        # i = 0
        # l = len(word_features)
        # while i<l:
        #     features[str(i)] = (w in document)
        #     i+=1
        for w in word_features:
            features[w] = (w in document)
        return features

    #aquii
    featuresets = [(find_features(rev), category)
                   for (rev, category) in documents]

    for w in featuresets:
        print(w)

    exit()

    kfold = 4

    baseInteira = featuresets

    tamT = len(featuresets)
    divisor = tamT // kfold

    ###### ajustar divisao
    baseDividida1 = featuresets[0:divisor]
    baseDividida2 = featuresets[divisor:(divisor * 2)]
    baseDividida3 = featuresets[(divisor * 2):(divisor * 3)]
    baseDividida4 = featuresets[(divisor * 3):tamT]

    #tamT = len(featuresets)
    #umQuarto = tamBase/4

    #training_set = featuresets[umQuarto:]
    #testing_set = featuresets[:umQuarto]

    #training_set = featuresets[100:]
    #testing_set = featuresets[0:100]

    ########################## 1 rodada
    #print "## RODADA 1 ##"

    training_set = baseDividida2 + baseDividida3 + baseDividida4
    testing_set = baseDividida1

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc1 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa1 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp1 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB1 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB1 = 0
    while (g < len(precisaoMNB1)):
        somaPMNB1 = somaPMNB1 + precisaoMNB1[g]
        g = g + 1
    MNBpt1 = (somaPMNB1 / len(precisaoMNB1)) * 100
    MNBrp1 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB1 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB1 = 0
    while (g < len(recallMNB1)):
        somaRMNB1 = somaRMNB1 + recallMNB1[g]
        g = g + 1
    MNBrt1 = (somaRMNB1 / len(recallMNB1)) * 100
    MNBfp1 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB1 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB1 = 0
    while (g < len(f1MNB1)):
        somaFMNB1 = somaFMNB1 + f1MNB1[g]
        g = g + 1
    MNBft1 = (somaFMNB1 / len(f1MNB1)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc1 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra1 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp1 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR1 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR1 = 0
    while (g < len(precisaoR1)):
        somaPR1 = somaPR1 + precisaoR1[g]
        g = g + 1
    Rpt1 = (somaPR1 / len(precisaoR1)) * 100
    Rrp1 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR1 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR1 = 0
    while (g < len(recallR1)):
        somaRR1 = somaRR1 + recallR1[g]
        g = g + 1
    Rrt1 = (somaRR1 / len(recallR1)) * 100
    Rfp1 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R1 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR1 = 0
    while (g < len(f1R1)):
        somaFR1 = somaFR1 + f1R1[g]
        g = g + 1
    Rft1 = (somaFR1 / len(f1R1)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc1 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La1 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp1 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL1 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL1 = 0
    while (g < len(precisaoL1)):
        somaPL1 = somaPL1 + precisaoL1[g]
        g = g + 1
    Lpt1 = (somaPL1 / len(precisaoL1)) * 100
    Lrp1 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL1 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL1 = 0
    while (g < len(recallL1)):
        somaRL1 = somaRL1 + recallL1[g]
        g = g + 1
    Lrt1 = (somaRL1 / len(recallL1)) * 100
    Lfp1 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L1 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL1 = 0
    while (g < len(f1L1)):
        somaFL1 = somaFL1 + f1L1[g]
        g = g + 1
    Lft1 = (somaFL1 / len(f1L1)) * 100

    ######################## Rodada 2
    #print "## RODADA 2 ##"

    training_set = baseDividida1 + baseDividida3 + baseDividida4
    testing_set = baseDividida2

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc2 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa2 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp2 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB2 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB2 = 0
    while (g < len(precisaoMNB2)):
        somaPMNB2 = somaPMNB2 + precisaoMNB2[g]
        g = g + 1
    MNBpt2 = (somaPMNB2 / len(precisaoMNB2)) * 100
    MNBrp2 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB2 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB2 = 0
    while (g < len(recallMNB2)):
        somaRMNB2 = somaRMNB2 + recallMNB2[g]
        g = g + 1
    MNBrt2 = (somaRMNB2 / len(recallMNB2)) * 100
    MNBfp2 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB2 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB2 = 0
    while (g < len(f1MNB2)):
        somaFMNB2 = somaFMNB2 + f1MNB2[g]
        g = g + 1
    MNBft2 = (somaFMNB2 / len(f1MNB2)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc2 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra2 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp2 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR2 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR2 = 0
    while (g < len(precisaoR2)):
        somaPR2 = somaPR2 + precisaoR2[g]
        g = g + 1
    Rpt2 = (somaPR2 / len(precisaoR2)) * 100
    Rrp2 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR2 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR2 = 0
    while (g < len(recallR2)):
        somaRR2 = somaRR2 + recallR2[g]
        g = g + 1
    Rrt2 = (somaRR2 / len(recallR2)) * 100
    Rfp2 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R2 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR2 = 0
    while (g < len(f1R2)):
        somaFR2 = somaFR2 + f1R2[g]
        g = g + 1
    Rft2 = (somaFR2 / len(f1R2)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc2 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La2 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp2 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL2 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL2 = 0
    while (g < len(precisaoL2)):
        somaPL2 = somaPL2 + precisaoL2[g]
        g = g + 1
    Lpt2 = (somaPL2 / len(precisaoL2)) * 100
    Lrp2 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL2 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL2 = 0
    while (g < len(recallL2)):
        somaRL2 = somaRL2 + recallL2[g]
        g = g + 1
    Lrt2 = (somaRL2 / len(recallL2)) * 100
    Lfp2 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L2 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL2 = 0
    while (g < len(f1L2)):
        somaFL2 = somaFL2 + f1L2[g]
        g = g + 1
    Lft2 = (somaFL2 / len(f1L2)) * 100

    ##################### rodada 3
    #print "## RODADA 3 ##"

    training_set = baseDividida1 + baseDividida2 + baseDividida4
    testing_set = baseDividida3

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc3 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa3 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp3 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB3 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB3 = 0
    while (g < len(precisaoMNB3)):
        somaPMNB3 = somaPMNB3 + precisaoMNB3[g]
        g = g + 1
    MNBpt3 = (somaPMNB3 / len(precisaoMNB3)) * 100
    MNBrp3 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB3 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB3 = 0
    while (g < len(recallMNB3)):
        somaRMNB3 = somaRMNB3 + recallMNB3[g]
        g = g + 1
    MNBrt3 = (somaRMNB3 / len(recallMNB3)) * 100
    MNBfp3 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB3 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB3 = 0
    while (g < len(f1MNB3)):
        somaFMNB3 = somaFMNB3 + f1MNB3[g]
        g = g + 1
    MNBft3 = (somaFMNB3 / len(f1MNB3)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc3 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra3 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp3 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR3 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR3 = 0
    while (g < len(precisaoR3)):
        somaPR3 = somaPR3 + precisaoR3[g]
        g = g + 1
    Rpt3 = (somaPR3 / len(precisaoR3)) * 100
    Rrp3 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR3 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR3 = 0
    while (g < len(recallR3)):
        somaRR3 = somaRR3 + recallR3[g]
        g = g + 1
    Rrt3 = (somaRR3 / len(recallR3)) * 100
    Rfp3 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R3 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR3 = 0
    while (g < len(f1R3)):
        somaFR3 = somaFR3 + f1R3[g]
        g = g + 1
    Rft3 = (somaFR3 / len(f1R3)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc3 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La3 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp3 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL3 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL3 = 0
    while (g < len(precisaoL3)):
        somaPL3 = somaPL3 + precisaoL3[g]
        g = g + 1
    Lpt3 = (somaPL3 / len(precisaoL3)) * 100
    Lrp3 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL3 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL3 = 0
    while (g < len(recallL3)):
        somaRL3 = somaRL3 + recallL3[g]
        g = g + 1
    Lrt3 = (somaRL2 / len(recallL2)) * 100
    Lfp3 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L3 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL3 = 0
    while (g < len(f1L3)):
        somaFL3 = somaFL3 + f1L3[g]
        g = g + 1
    Lft3 = (somaFL3 / len(f1L3)) * 100

    ############################ rodada 4
    #print "## RODADA 4 ##"

    training_set = baseDividida1 + baseDividida2 + baseDividida3
    testing_set = baseDividida4

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc4 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa4 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp4 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB4 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB4 = 0
    while (g < len(precisaoMNB4)):
        somaPMNB4 = somaPMNB4 + precisaoMNB4[g]
        g = g + 1
    MNBpt4 = (somaPMNB4 / len(precisaoMNB4)) * 100
    MNBrp4 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB4 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB4 = 0
    while (g < len(recallMNB4)):
        somaRMNB4 = somaRMNB4 + recallMNB4[g]
        g = g + 1
    MNBrt4 = (somaRMNB4 / len(recallMNB4)) * 100
    MNBfp4 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB4 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB4 = 0
    while (g < len(f1MNB4)):
        somaFMNB4 = somaFMNB4 + f1MNB4[g]
        g = g + 1
    MNBft4 = (somaFMNB4 / len(f1MNB4)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc4 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra4 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp4 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR4 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR4 = 0
    while (g < len(precisaoR4)):
        somaPR4 = somaPR4 + precisaoR4[g]
        g = g + 1
    Rpt4 = (somaPR4 / len(precisaoR4)) * 100
    Rrp4 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR4 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR4 = 0
    while (g < len(recallR4)):
        somaRR4 = somaRR4 + recallR4[g]
        g = g + 1
    Rrt4 = (somaRR4 / len(recallR4)) * 100
    Rfp4 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R4 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR4 = 0
    while (g < len(f1R4)):
        somaFR4 = somaFR4 + f1R4[g]
        g = g + 1
    Rft4 = (somaFR4 / len(f1R4)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc4 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La4 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp4 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL4 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL4 = 0
    while (g < len(precisaoL4)):
        somaPL4 = somaPL4 + precisaoL4[g]
        g = g + 1
    Lpt4 = (somaPL4 / len(precisaoL4)) * 100
    Lrp4 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL4 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL4 = 0
    while (g < len(recallL4)):
        somaRL4 = somaRL4 + recallL4[g]
        g = g + 1
    Lrt4 = (somaRL4 / len(recallL4)) * 100
    Lfp4 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L4 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL4 = 0
    while (g < len(f1L4)):
        somaFL4 = somaFL4 + f1L4[g]
        g = g + 1
    Lft4 = (somaFL4 / len(f1L4)) * 100

    ################# medias
    #print "## MEDIA ##"

    #MULTINOMINAL
    # print(MNBmc1)
    # print(MNBmc2)
    # print(MNBmc3)
    # print(MNBmc4)
    MNBmc = (MNBmc1 + MNBmc2 + MNBmc3 + MNBmc4) / 4
    MNBa = (MNBa1 + MNBa2 + MNBa3 + MNBa4) / 4
    MNBamax = max([MNBa1, MNBa2, MNBa3, MNBa4])
    MNBamin = min([MNBa1, MNBa2, MNBa3, MNBa4])
    MNBpp = (MNBpp4 + MNBpp4 + MNBpp4 + MNBpp4) / 4
    MNBpt = (MNBpt1 + MNBpt2 + MNBpt3 + MNBpt4) / 4
    MNBpmax = max([MNBpt1, MNBpt2, MNBpt3, MNBpt4])
    MNBpmin = min([MNBpt1, MNBpt2, MNBpt3, MNBpt4])
    MNBrp = (MNBrp1 + MNBrp2 + MNBrp3 + MNBrp4) / 4
    MNBrt = (MNBrt1 + MNBrt2 + MNBrt3 + MNBrt4) / 4
    MNBrmax = max([MNBrt1, MNBrt2, MNBrt3, MNBrt4])
    MNBrmin = min([MNBrt1, MNBrt2, MNBrt3, MNBrt4])
    MNBfp = (MNBfp1 + MNBfp2 + MNBfp3 + MNBfp4) / 4
    MNBft = (MNBft1 + MNBft2 + MNBft3 + MNBft4) / 4
    MNBfmax = max([MNBft1, MNBft2, MNBft3, MNBft4])
    MNBfmin = min([MNBft1, MNBft2, MNBft3, MNBft4])
    '''
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.set_aspect('equal')
    plt.imshow(MNBmc, interpolation='nearest', cmap=plt.cm.ocean)
    plt.colorbar()
    plt.show()
    '''

    #REGRESSAO LINEAR
    Rmc = (Rmc1 + Rmc2 + Rmc3 + Rmc4) / 4
    Ra = (Ra1 + Ra2 + Ra3 + Ra4) / 4
    Ramax = max([Ra1, Ra2, Ra3, Ra4])
    Ramin = min([Ra1, Ra2, Ra3, Ra4])
    Rpp = (Rpp4 + Rpp4 + Rpp4 + Rpp4) / 4  #
    Rpt = (Rpt1 + Rpt2 + Rpt3 + Rpt4) / 4
    Rpmax = max([Rpt1, Rpt2, Rpt3, Rpt4])
    Rpmin = min([Rpt1, Rpt2, Rpt3, Rpt4])
    Rrp = (Rrp1 + Rrp2 + Rrp3 + Rrp4) / 4
    Rrt = (Rrt1 + Rrt2 + Rrt3 + Rrt4) / 4
    Rrmax = max([Rrt1, Rrt2, Rrt3, Rrt4])
    Rrmin = min([Rrt1, Rrt2, Rrt3, Rrt4])
    Rfp = (Rfp1 + Rfp2 + Rfp3 + Rfp4) / 4
    Rft = (Rft1 + Rft2 + Rft3 + Rft4) / 4
    Rfmax = max([Rft1, Rft2, Rft3, Rft4])
    Rfmin = min([Rft1, Rft2, Rft3, Rft4])

    #SVC LINEAR
    Lmc = (Lmc1 + Lmc2 + Lmc3 + Lmc4) / 4
    La = (La1 + La2 + La3 + La4) / 4
    Lamax = max([La1, La2, La3, La4])
    Lamin = min([La1, La2, La3, La4])
    Lpp = (Lpp4 + Lpp4 + Lpp4 + Lpp4) / 4
    Lpt = (Lpt1 + Lpt2 + Lpt3 + Lpt4) / 4
    Lpmax = max([Lpt1, Lpt2, Lpt3, Lpt4])
    Lpmin = min([Lpt1, Lpt2, Lpt3, Lpt4])
    Lrp = (Lrp1 + Lrp2 + Lrp3 + Lrp4) / 4
    Lrt = (Lrt1 + Lrt2 + Lrt3 + Lrt4) / 4
    Lrmax = max([Lrt1, Lrt2, Lrt3, Lrt4])
    Lrmin = min([Lrt1, Lrt2, Lrt3, Lrt4])
    Lfp = (Lfp1 + Lfp2 + Lfp3 + Lfp4) / 4
    Lft = (Lft1 + Lft2 + Lft3 + Lft4) / 4
    Lfmax = max([Lft1, Lft2, Lft3, Lft4])
    Lfmin = min([Lft1, Lft2, Lft3, Lft4])
    '''
    print "SVC Linear"
    print "Matriz de confusão: ", Lmc
    print "Acuracia: ", La
    print "Precisão parcial: ", Lpp
    print "Precisão total: ", Lpt
    print "Recall parcial: ", Lrp
    print "Recall total: ", Lrt
    print "F-medida parcial: ", Lfp
    print "F-medida total: ", Lft
    '''

    with open(path, mode='w') as csv_file:
        #writer = csv.writer(csv_file)
        csv_file.writelines('Algoritmo' + ';' + 'Multinominal Naïve-Bayes' +
                            '\n')
        csv_file.writelines('Iteração' + ';' + 'Acurácia' + ';' +
                            'Precisão parcial' + ';' + 'Precisão total' + ';' +
                            'revocação parcial' + ';' + 'revocação total' +
                            ';' + 'f-medida parcial' + ';' + 'f-medida total' +
                            '\n')
        csv_file.writelines('1;' + str(MNBa1) + ';' + str(MNBpp1) + ';' +
                            str(MNBpt1) + ';' + str(MNBrp1) + ';' +
                            str(MNBrt1) + ';' + str(MNBfp1) + ';' +
                            str(MNBft1) + '\n')
        csv_file.writelines('2;' + str(MNBa2) + ';' + str(MNBpp2) + ';' +
                            str(MNBpt2) + ';' + str(MNBrp2) + ';' +
                            str(MNBrt2) + ';' + str(MNBfp2) + ';' +
                            str(MNBft2) + '\n')
        csv_file.writelines('3;' + str(MNBa3) + ';' + str(MNBpp3) + ';' +
                            str(MNBpt3) + ';' + str(MNBrp3) + ';' +
                            str(MNBrt3) + ';' + str(MNBfp3) + ';' +
                            str(MNBft3) + '\n')
        csv_file.writelines('4;' + str(MNBa4) + ';' + str(MNBpp4) + ';' +
                            str(MNBpt4) + ';' + str(MNBrp4) + ';' +
                            str(MNBrt4) + ';' + str(MNBfp4) + ';' +
                            str(MNBft4) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Total' + '\n')
        csv_file.writelines('Média;' + str(MNBa) + ';' + str(MNBpp) + ';' +
                            str(MNBpt) + ';' + str(MNBrp) + ';' + str(MNBrt) +
                            ';' + str(MNBfp) + ';' + str(MNBft) + '\n')
        csv_file.writelines('Máximo;' + str(MNBamax) + "" + ';' +
                            str(MNBpmax) + "" + ';' + str(MNBrmax) + "" + ';' +
                            str(MNBfmax) + '\n')
        csv_file.writelines('Mínimo;' + str(MNBamin) + "" + ';' +
                            str(MNBpmin) + "" + ';' + str(MNBrmin) + "" + ';' +
                            str(MNBfmin) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Algoritmo' + ';' + 'Regressão Linear' + '\n')
        csv_file.writelines('Iteração' + ';' + 'Acurácia' + ';' +
                            'Precisão parcial' + ';' + 'Precisão total' + ';' +
                            'revocação parcial' + ';' + 'revocação total' +
                            ';' + 'f-medida parcial' + ';' + 'f-medida total' +
                            '\n')
        csv_file.writelines('1;' + str(Ra1) + ';' + str(Rpp1) + ';' +
                            str(Rpt1) + ';' + str(Rrp1) + ';' + str(Rrt1) +
                            ';' + str(Rfp1) + ';' + str(Rft1) + '\n')
        csv_file.writelines('2;' + str(Ra2) + ';' + str(Rpp2) + ';' +
                            str(Rpt2) + ';' + str(Rrp2) + ';' + str(Rrt2) +
                            ';' + str(Rfp2) + ';' + str(Rft2) + '\n')
        csv_file.writelines('3;' + str(Ra3) + ';' + str(Rpp3) + ';' +
                            str(Rpt3) + ';' + str(Rrp3) + ';' + str(Rrt3) +
                            ';' + str(Rfp3) + ';' + str(Rft3) + '\n')
        csv_file.writelines('4;' + str(Ra4) + ';' + str(Rpp4) + ';' +
                            str(Rpt4) + ';' + str(Rrp4) + ';' + str(Rrt4) +
                            ';' + str(Rfp4) + ';' + str(Rft4) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Total' + '\n')
        csv_file.writelines('Média;' + str(Ra) + ';' + str(Rpp) + ';' +
                            str(Rpt) + ';' + str(Rrp) + ';' + str(Rrt) + ';' +
                            str(Rfp) + ';' + str(Rft) + '\n')
        csv_file.writelines('Máximo;' + str(Ramax) + "" + ';' + str(Rpmax) +
                            "" + ';' + str(Rrmax) + "" + ';' + str(Rfmax) +
                            '\n')
        csv_file.writelines('Mínimo;' + str(Ramin) + "" + ';' + str(Rpmin) +
                            "" + ';' + str(Rrmin) + "" + ';' + str(Rfmin) +
                            '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Algoritmo' + ';' + 'SVC Linear' + '\n')
        csv_file.writelines('Iteração' + ';' + 'Acurácia' + ';' +
                            'Precisão parcial' + ';' + 'Precisão total' + ';' +
                            'revocação parcial' + ';' + 'revocação total' +
                            ';' + 'f-medida parcial' + ';' + 'f-medida total' +
                            '\n')
        csv_file.writelines('1;' + str(La1) + ';' + str(Lpp1) + ';' +
                            str(Lpt1) + ';' + str(Lrp1) + ';' + str(Lrt1) +
                            ';' + str(Lfp1) + ';' + str(Lft1) + '\n')
        csv_file.writelines('2;' + str(La2) + ';' + str(Lpp2) + ';' +
                            str(Lpt2) + ';' + str(Lrp2) + ';' + str(Lrt2) +
                            ';' + str(Lfp2) + ';' + str(Lft2) + '\n')
        csv_file.writelines('3;' + str(La3) + ';' + str(Lpp3) + ';' +
                            str(Lpt3) + ';' + str(Lrp3) + ';' + str(Lrt3) +
                            ';' + str(Lfp3) + ';' + str(Lft3) + '\n')
        csv_file.writelines('4;' + str(La4) + ';' + str(Lpp4) + ';' +
                            str(Lpt4) + ';' + str(Lrp4) + ';' + str(Lrt4) +
                            ';' + str(Lfp4) + ';' + str(Lft4) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Total' + '\n')
        csv_file.writelines('Média;' + str(La) + ';' + str(Lpp) + ';' +
                            str(Lpt) + ';' + str(Lrp) + ';' + str(Lrt) + ';' +
                            str(Lfp) + ';' + str(Lft) + '\n')
        csv_file.writelines('Máximo;' + str(Lamax) + "" + ';' + str(Lpmax) +
                            "" + ';' + str(Lrmax) + "" + ';' + str(Lfmax) +
                            '\n')
        csv_file.writelines('Mínimo;' + str(Lamin) + "" + ';' + str(Lpmin) +
                            "" + ';' + str(Lrmin) + "" + ';' + str(Lfmin) +
                            '\n')
Exemplo n.º 2
0
#Armazenamento das polaridades de cada documento (frase) na lista polaris
polaris = [
    -1, -1, 1, 1, 1, -1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1
]

#Divisão dos dados das listas dataset e polaris em conjuntos de treinamento e validação

documents = []
acento = False
#print base[0][0].split()
tknzr = nltk.tokenize.TweetTokenizer()
i = 0
l = len(dataset)
while (i < l):
    if (acento):
        w = remocaoacento(tknzr.tokenize(dataset[i]))
    else:
        w = tknzr.tokenize(dataset[i])
    w = remocaopontos(w)
    conteudoLista = (w)
    documents.append(conteudoLista)
    i += 1

    ################################ Pre Processamento
    stopwords = nltk.corpus.stopwords.words('portuguese')

    stemmer = nltk.stem.RSLPStemmer()

    # h=0
    # j=len(documents)
    # while (h<j):
Exemplo n.º 3
0
def executar(experimento, nome_Base, acento):
    '''
    nomeBase = nome_Base
    path = experimento+nomeBase
    print('executando:\n'+path)
    print('Sem acento:\n'+('Sim' if(acento) else 'Não'))

    base = readBase(nomeBase)
    tamBase = len(base)
    '''
    base = [
        ('  Vejam bem, maioria dos homens apoiam. maioria de mulheres não. Homens tendem a serem lógicos, mulheres emotivas. Sera que mulheres acham que rodeios é judiação de animais? O que é claramente equivocado da parte delas. Como também isso se repete na PL3722, homens pensam logicamente, e são mais favorável ao cidadão ter menos restrições a armas de fogo para proteger seu patrimônio e sua família. Ja as mães, familiares, namoradas(os), principalmente de bandidos, acham isso um terror, por que qual mãe quer ver o filho bandido morto praticando um assalto? Logo são contra o direito das vitimas de si proteger do seu filho bandido. Vote consciente, vote com razão e não emoção! Brasil melhora rapidinho. ',
         1),
        ('Observando daquí, a debandada dos derrotados. Cadê o Sacoman Keffeyo, a Ana Animais e, o pilantra do Haroldo Girafales? Perderam a coragem de virem aquí na enquete, questionar a sanção do PL?  Corvardões perdedores: vão chorar no colinho da Luisa Mell.  ',
         1),
        ('Dezenas de debates e ficou mais que provado que animais atletas nao sao animais maus tratados,parabens capitao augusto. ',
         1),
        ('PARABÉNS CAPITÃO AUGUSTO ISSO PROVA QUE QUEM AMA CUIDA,ANIMAIS TRATADOS COM MUITO CARINHO ',
         1), ('Parabéns Capitão Augusto,agora é lei.', 1)
    ]
    tamBase = len(base)
    i = 0
    documents = []
    #print base[0][0].split()
    tknzr = nltk.tokenize.TweetTokenizer()

    while (i < tamBase):
        if (acento):
            w = remocaoacento(tknzr.tokenize(base[i][0]))
        else:
            w = tknzr.tokenize(base[i][0])
        w = remocaopontos(w)
        conteudoLista = (w, base[i][1])
        documents.append(conteudoLista)
        i += 1

    ################################ Pre Processamento
    stopwords = nltk.corpus.stopwords.words('portuguese')
    #     stopwords = ['de', 'a', 'o', 'que', 'e', 'do', 'da', 'em', 'um', 'para', 'com', 'não', 'uma', 'os', 'no', 'se', 'na', 'por',
    # 'mais', 'as', 'dos', 'como', 'mas', 'ao', 'ele', 'das', 'à', 'seu', 'sua', 'ou', 'quando', 'muito', 'nos', 'já',
    # 'eu', 'também', 'só', 'pelo', 'pela', 'até', 'isso', 'ela', 'entre', 'depois', 'sem', 'mesmo', 'aos', 'seus',
    # 'quem', 'nas', 'me', 'esse', 'eles', 'você', 'essa', 'num', 'nem', 'suas', 'meu', 'às', 'minha', 'numa',
    # 'pelos', 'elas', 'qual', 'nós', 'lhe', 'deles', 'essas', 'esses', 'pelas', 'este', 'dele', 'tu', 'te', 'vocês', 'vos',
    # 'lhes', 'meus', 'minhas', 'teu', 'tua', 'teus', 'tuas', 'nosso', 'nossa', 'nossos', 'nossas', 'dela', 'delas',
    # 'esta', 'estes', 'estas', 'aquele', 'aquela', 'aqueles', 'aquelas', 'isto', 'aquilo', 'estou', 'está', 'estamos',
    # 'estão', 'estive', 'esteve', 'estivemos', 'estiveram', 'estava', 'estávamos', 'estavam', 'estivera',
    # 'estivéramos', 'esteja', 'estejamos', 'estejam', 'estivesse', 'estivéssemos', 'estivessem', 'estiver',
    # 'estivermos', 'estiverem', 'hei', 'há', 'havemos', 'hão', 'houve', 'houvemos', 'houveram', 'houvera',
    # 'houvéramos', 'haja', 'hajamos', 'hajam', 'houvesse', 'houvéssemos', 'houvessem', 'houver',
    # 'houvermos', 'houverem', 'houverei', 'houverá', 'houveremos', 'houverão', 'houveria',
    # 'houveríamos', 'houveriam', 'sou', 'somos', 'são', 'era', 'éramos', 'eram', 'fui', 'foi', 'fomos',
    # 'foram', 'fora', 'fôramos', 'seja', 'sejamos', 'sejam', 'fosse', 'fôssemos', 'fossem', 'for', 'formos',
    # 'forem', 'serei', 'será', 'seremos', 'serão', 'seria', 'seríamos', 'seriam', 'tenho', 'tem', 'temos', 'tém',
    # 'tinha', 'tínhamos', 'tinham', 'tive', 'teve', 'tivemos', 'tiveram', 'tivera', 'tivéramos', 'tenha',
    # 'tenhamos', 'tenham', 'tivesse', 'tivéssemos', 'tivessem', 'tiver', 'tivermos', 'tiverem', 'terei', 'terá',
    # 'teremos', 'terão', 'teria', 'teríamos', 'teriam']

    # stemmer = nltk.stem.RSLPStemmer()

    # h=0
    # j=len(documents)
    # while (h<j):
    #    g=len(documents[h][0])
    #    f=0
    #    while(f<g):
    #        stemmer.stem(documents[h][0][f])
    #        f+=1
    #    h += 1

    ################################

    random.shuffle(documents)

    all_words = []

    k = 0
    l = len(documents)
    while (k < l):
        m = len(documents[k][0])
        n = 0
        while (n < m):
            all_words.append(documents[k][0][n])
            n += 1
        k += 1
    # all_words = remocaopontos(all_words)

    all_words = [w.lower() for w in all_words if w not in stopwords]
    # print(str(all_words))

    #all_words = nltk.FreqDist(all_words) #calcula frequencia de palavras, definir o limite de palavras
    #all_words = nltk.LaplaceProbDist(nltk.FreqDist(all_words))
    #all_words = nltk.SimpleGoodTuringProbDist(nltk.FreqDist(all_words))
    #all_words = nltk.LidstoneProbDist(nltk.FreqDist(all_words), 0.1)
    #all_words = nltk.WittenBellProbDist(nltk.FreqDist(all_words))
    #nltk.WittenBellProbDist() procurar como mudar o ngram
    #all_words = nltk.MLEProbDist(nltk.FreqDist(all_words))
    #all_words = nltk.SimpleGoodTuringProbDist(nltk.FreqDist(w.lower() for w in all_words if w not in stopwords))

    all_words = nltk.LidstoneProbDist(nltk.FreqDist(all_words), 0.1)
    #all_words = nltk.FreqDist(nltk.FreqDist(w.lower() for w in all_words if w not in stopwords))

    word_features = list(all_words.samples(
    ))  #se usando FreqDistlista com palavras que aparecem mais de 3000
    #word_features = list(all_words.keys())
    '''aqui que modifiquei
    def find_features(document):
        words = set(document)
        features = {}
        for w in word_features:
            features[w] = (w in words)

        return features
    '''

    #aquii

    def wordbig(word_feature):
        words = []
        i = 0
        l = len(word_feature) - 1
        while (i < l):
            words.append(tuple([word_feature[i], word_feature[i + 1]]))
            i += 1
        return words

    def removerpalavras(todas_palavras, document):
        #remover as palavras que não estãoem todas as palavras
        linha = []
        for w in document:
            if (w in todas_palavras):
                linha.append(w)
        return linha

    def wordFeature(documents):
        #cria um dicionario de dados
        dicionario = []
        for w in documents:
            for q in w[0]:
                if (not q in dicionario):
                    dicionario.append(q)
        return dicionario

    documents = [[removerpalavras(all_words.samples(), w[0]), w[1]]
                 for w in documents]
    documents = [[wordbig(w[0]), w[1]] for w in documents]
    word_features = wordFeature(
        documents
    )  #se 0usando FreqDistlista com palavras que aparecem mais de 3000

    # print(str(len(word_features)))
    # exit()
    # word_features = list(all_words.samples())#se 0usando FreqDistlista com palavras que aparecem mais de 3000

    def find_features(document):
        # words = set(document)
        features = {}
        i = 0
        l = len(word_features)
        while (i < l):
            features[str(word_features[i])] = (word_features[i] in document)
            i += 1
        print(str(document))
        print()
        print(str(features))
        exit()
        return features

    featuresets = [(find_features(rev), category)
                   for (rev, category) in documents]

    kfold = 4

    baseInteira = featuresets

    tamT = len(featuresets)
    divisao = tamT // kfold

    ###### ajustar divisao
    baseDividida1 = featuresets[0:divisao]
    baseDividida2 = featuresets[divisao:(divisao * 2)]
    baseDividida3 = featuresets[(divisao * 2):(divisao * 3)]
    baseDividida4 = featuresets[(divisao * 3):tamT]

    #tamT = len(featuresets)
    #umQuarto = tamBase/4

    #training_set = featuresets[umQuarto:]
    #testing_set = featuresets[:umQuarto]

    #training_set = featuresets[100:]
    #testing_set = featuresets[0:100]

    ########################## 1 rodada
    #print "## RODADA 1 ##"

    training_set = baseDividida2 + baseDividida3 + baseDividida4
    testing_set = baseDividida1

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc1 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa1 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp1 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB1 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB1 = 0
    while (g < len(precisaoMNB1)):
        somaPMNB1 = somaPMNB1 + precisaoMNB1[g]
        g = g + 1
    MNBpt1 = (somaPMNB1 / len(precisaoMNB1)) * 100
    MNBrp1 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB1 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB1 = 0
    while (g < len(recallMNB1)):
        somaRMNB1 = somaRMNB1 + recallMNB1[g]
        g = g + 1
    MNBrt1 = (somaRMNB1 / len(recallMNB1)) * 100
    MNBfp1 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB1 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB1 = 0
    while (g < len(f1MNB1)):
        somaFMNB1 = somaFMNB1 + f1MNB1[g]
        g = g + 1
    MNBft1 = (somaFMNB1 / len(f1MNB1)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc1 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra1 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp1 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR1 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR1 = 0
    while (g < len(precisaoR1)):
        somaPR1 = somaPR1 + precisaoR1[g]
        g = g + 1
    Rpt1 = (somaPR1 / len(precisaoR1)) * 100
    Rrp1 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR1 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR1 = 0
    while (g < len(recallR1)):
        somaRR1 = somaRR1 + recallR1[g]
        g = g + 1
    Rrt1 = (somaRR1 / len(recallR1)) * 100
    Rfp1 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R1 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR1 = 0
    while (g < len(f1R1)):
        somaFR1 = somaFR1 + f1R1[g]
        g = g + 1
    Rft1 = (somaFR1 / len(f1R1)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc1 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La1 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp1 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL1 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL1 = 0
    while (g < len(precisaoL1)):
        somaPL1 = somaPL1 + precisaoL1[g]
        g = g + 1
    Lpt1 = (somaPL1 / len(precisaoL1)) * 100
    Lrp1 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL1 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL1 = 0
    while (g < len(recallL1)):
        somaRL1 = somaRL1 + recallL1[g]
        g = g + 1
    Lrt1 = (somaRL1 / len(recallL1)) * 100
    Lfp1 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L1 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL1 = 0
    while (g < len(f1L1)):
        somaFL1 = somaFL1 + f1L1[g]
        g = g + 1
    Lft1 = (somaFL1 / len(f1L1)) * 100

    ######################## Rodada 2
    #print "## RODADA 2 ##"

    training_set = baseDividida1 + baseDividida3 + baseDividida4
    testing_set = baseDividida2

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc2 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa2 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp2 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB2 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB2 = 0
    while (g < len(precisaoMNB2)):
        somaPMNB2 = somaPMNB2 + precisaoMNB2[g]
        g = g + 1
    MNBpt2 = (somaPMNB2 / len(precisaoMNB2)) * 100
    MNBrp2 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB2 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB2 = 0
    while (g < len(recallMNB2)):
        somaRMNB2 = somaRMNB2 + recallMNB2[g]
        g = g + 1
    MNBrt2 = (somaRMNB2 / len(recallMNB2)) * 100
    MNBfp2 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB2 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB2 = 0
    while (g < len(f1MNB2)):
        somaFMNB2 = somaFMNB2 + f1MNB2[g]
        g = g + 1
    MNBft2 = (somaFMNB2 / len(f1MNB2)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc2 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra2 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp2 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR2 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR2 = 0
    while (g < len(precisaoR2)):
        somaPR2 = somaPR2 + precisaoR2[g]
        g = g + 1
    Rpt2 = (somaPR2 / len(precisaoR2)) * 100
    Rrp2 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR2 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR2 = 0
    while (g < len(recallR2)):
        somaRR2 = somaRR2 + recallR2[g]
        g = g + 1
    Rrt2 = (somaRR2 / len(recallR2)) * 100
    Rfp2 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R2 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR2 = 0
    while (g < len(f1R2)):
        somaFR2 = somaFR2 + f1R2[g]
        g = g + 1
    Rft2 = (somaFR2 / len(f1R2)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc2 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La2 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp2 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL2 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL2 = 0
    while (g < len(precisaoL2)):
        somaPL2 = somaPL2 + precisaoL2[g]
        g = g + 1
    Lpt2 = (somaPL2 / len(precisaoL2)) * 100
    Lrp2 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL2 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL2 = 0
    while (g < len(recallL2)):
        somaRL2 = somaRL2 + recallL2[g]
        g = g + 1
    Lrt2 = (somaRL2 / len(recallL2)) * 100
    Lfp2 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L2 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL2 = 0
    while (g < len(f1L2)):
        somaFL2 = somaFL2 + f1L2[g]
        g = g + 1
    Lft2 = (somaFL2 / len(f1L2)) * 100

    ##################### rodada 3
    #print "## RODADA 3 ##"

    training_set = baseDividida1 + baseDividida2 + baseDividida4
    testing_set = baseDividida3

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc3 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa3 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp3 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB3 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB3 = 0
    while (g < len(precisaoMNB3)):
        somaPMNB3 = somaPMNB3 + precisaoMNB3[g]
        g = g + 1
    MNBpt3 = (somaPMNB3 / len(precisaoMNB3)) * 100
    MNBrp3 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB3 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB3 = 0
    while (g < len(recallMNB3)):
        somaRMNB3 = somaRMNB3 + recallMNB3[g]
        g = g + 1
    MNBrt3 = (somaRMNB3 / len(recallMNB3)) * 100
    MNBfp3 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB3 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB3 = 0
    while (g < len(f1MNB3)):
        somaFMNB3 = somaFMNB3 + f1MNB3[g]
        g = g + 1
    MNBft3 = (somaFMNB3 / len(f1MNB3)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc3 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra3 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp3 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR3 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR3 = 0
    while (g < len(precisaoR3)):
        somaPR3 = somaPR3 + precisaoR3[g]
        g = g + 1
    Rpt3 = (somaPR3 / len(precisaoR3)) * 100
    Rrp3 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR3 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR3 = 0
    while (g < len(recallR3)):
        somaRR3 = somaRR3 + recallR3[g]
        g = g + 1
    Rrt3 = (somaRR3 / len(recallR3)) * 100
    Rfp3 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R3 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR3 = 0
    while (g < len(f1R3)):
        somaFR3 = somaFR3 + f1R3[g]
        g = g + 1
    Rft3 = (somaFR3 / len(f1R3)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc3 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La3 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp3 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL3 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL3 = 0
    while (g < len(precisaoL3)):
        somaPL3 = somaPL3 + precisaoL3[g]
        g = g + 1
    Lpt3 = (somaPL3 / len(precisaoL3)) * 100
    Lrp3 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL3 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL3 = 0
    while (g < len(recallL3)):
        somaRL3 = somaRL3 + recallL3[g]
        g = g + 1
    Lrt3 = (somaRL2 / len(recallL2)) * 100
    Lfp3 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L3 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL3 = 0
    while (g < len(f1L3)):
        somaFL3 = somaFL3 + f1L3[g]
        g = g + 1
    Lft3 = (somaFL3 / len(f1L3)) * 100

    ############################ rodada 4
    #print "## RODADA 4 ##"

    training_set = baseDividida1 + baseDividida2 + baseDividida3
    testing_set = baseDividida4

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc4 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa4 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp4 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB4 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB4 = 0
    while (g < len(precisaoMNB4)):
        somaPMNB4 = somaPMNB4 + precisaoMNB4[g]
        g = g + 1
    MNBpt4 = (somaPMNB4 / len(precisaoMNB4)) * 100
    MNBrp4 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB4 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB4 = 0
    while (g < len(recallMNB4)):
        somaRMNB4 = somaRMNB4 + recallMNB4[g]
        g = g + 1
    MNBrt4 = (somaRMNB4 / len(recallMNB4)) * 100
    MNBfp4 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB4 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB4 = 0
    while (g < len(f1MNB4)):
        somaFMNB4 = somaFMNB4 + f1MNB4[g]
        g = g + 1
    MNBft4 = (somaFMNB4 / len(f1MNB4)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc4 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra4 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp4 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR4 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR4 = 0
    while (g < len(precisaoR4)):
        somaPR4 = somaPR4 + precisaoR4[g]
        g = g + 1
    Rpt4 = (somaPR4 / len(precisaoR4)) * 100
    Rrp4 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR4 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR4 = 0
    while (g < len(recallR4)):
        somaRR4 = somaRR4 + recallR4[g]
        g = g + 1
    Rrt4 = (somaRR4 / len(recallR4)) * 100
    Rfp4 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R4 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR4 = 0
    while (g < len(f1R4)):
        somaFR4 = somaFR4 + f1R4[g]
        g = g + 1
    Rft4 = (somaFR4 / len(f1R4)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc4 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La4 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp4 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL4 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL4 = 0
    while (g < len(precisaoL4)):
        somaPL4 = somaPL4 + precisaoL4[g]
        g = g + 1
    Lpt4 = (somaPL4 / len(precisaoL4)) * 100
    Lrp4 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL4 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL4 = 0
    while (g < len(recallL4)):
        somaRL4 = somaRL4 + recallL4[g]
        g = g + 1
    Lrt4 = (somaRL4 / len(recallL4)) * 100
    Lfp4 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L4 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL4 = 0
    while (g < len(f1L4)):
        somaFL4 = somaFL4 + f1L4[g]
        g = g + 1
    Lft4 = (somaFL4 / len(f1L4)) * 100

    ################# medias
    #print "## MEDIA ##"

    #MULTINOMINAL
    MNBmc = (MNBmc1 + MNBmc2 + MNBmc3 + MNBmc4) / 4
    MNBa = (MNBa1 + MNBa2 + MNBa3 + MNBa4) / 4
    MNBamax = max([MNBa1, MNBa2, MNBa3, MNBa4])
    MNBamin = min([MNBa1, MNBa2, MNBa3, MNBa4])
    MNBpp = (MNBpp4 + MNBpp4 + MNBpp4 + MNBpp4) / 4
    MNBpt = (MNBpt1 + MNBpt2 + MNBpt3 + MNBpt4) / 4
    MNBpmax = max([MNBpt1, MNBpt2, MNBpt3, MNBpt4])
    MNBpmin = min([MNBpt1, MNBpt2, MNBpt3, MNBpt4])
    MNBrp = (MNBrp1 + MNBrp2 + MNBrp3 + MNBrp4) / 4
    MNBrt = (MNBrt1 + MNBrt2 + MNBrt3 + MNBrt4) / 4
    MNBrmax = max([MNBrt1, MNBrt2, MNBrt3, MNBrt4])
    MNBrmin = min([MNBrt1, MNBrt2, MNBrt3, MNBrt4])
    MNBfp = (MNBfp1 + MNBfp2 + MNBfp3 + MNBfp4) / 4
    MNBft = (MNBft1 + MNBft2 + MNBft3 + MNBft4) / 4
    MNBfmax = max([MNBft1, MNBft2, MNBft3, MNBft4])
    MNBfmin = min([MNBft1, MNBft2, MNBft3, MNBft4])
    '''
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.set_aspect('equal')
    plt.imshow(MNBmc, interpolation='nearest', cmap=plt.cm.ocean)
    plt.colorbar()
    plt.show()
    '''

    #REGRESSAO LINEAR
    Rmc = (Rmc1 + Rmc2 + Rmc3 + Rmc4) / 4
    Ra = (Ra1 + Ra2 + Ra3 + Ra4) / 4
    Ramax = max([Ra1, Ra2, Ra3, Ra4])
    Ramin = min([Ra1, Ra2, Ra3, Ra4])
    Rpp = (Rpp4 + Rpp4 + Rpp4 + Rpp4) / 4
    Rpt = (Rpt1 + Rpt2 + Rpt3 + Rpt4) / 4
    Rpmax = max([Rpt1, Rpt2, Rpt3, Rpt4])
    Rpmin = min([Rpt1, Rpt2, Rpt3, Rpt4])
    Rrp = (Rrp1 + Rrp2 + Rrp3 + Rrp4) / 4
    Rrt = (Rrt1 + Rrt2 + Rrt3 + Rrt4) / 4
    Rrmax = max([Rrt1, Rrt2, Rrt3, Rrt4])
    Rrmin = min([Rrt1, Rrt2, Rrt3, Rrt4])
    Rfp = (Rfp1 + Rfp2 + Rfp3 + Rfp4) / 4
    Rft = (Rft1 + Rft2 + Rft3 + Rft4) / 4
    Rfmax = max([Rft1, Rft2, Rft3, Rft4])
    Rfmin = min([Rft1, Rft2, Rft3, Rft4])

    #SVC LINEAR
    Lmc = (Lmc1 + Lmc2 + Lmc3 + Lmc4) / 4
    La = (La1 + La2 + La3 + La4) / 4
    Lamax = max([La1, La2, La3, La4])
    Lamin = min([La1, La2, La3, La4])
    Lpp = (Lpp4 + Lpp4 + Lpp4 + Lpp4) / 4
    Lpt = (Lpt1 + Lpt2 + Lpt3 + Lpt4) / 4
    Lpmax = max([Lpt1, Lpt2, Lpt3, Lpt4])
    Lpmin = min([Lpt1, Lpt2, Lpt3, Lpt4])
    Lrp = (Lrp1 + Lrp2 + Lrp3 + Lrp4) / 4
    Lrt = (Lrt1 + Lrt2 + Lrt3 + Lrt4) / 4
    Lrmax = max([Lrt1, Lrt2, Lrt3, Lrt4])
    Lrmin = min([Lrt1, Lrt2, Lrt3, Lrt4])
    Lfp = (Lfp1 + Lfp2 + Lfp3 + Lfp4) / 4
    Lft = (Lft1 + Lft2 + Lft3 + Lft4) / 4
    Lfmax = max([Lft1, Lft2, Lft3, Lft4])
    Lfmin = min([Lft1, Lft2, Lft3, Lft4])
    '''
    print "SVC Linear"
    print "Matriz de confusão: ", Lmc
    print "Acuracia: ", La
    print "Precisão parcial: ", Lpp
    print "Precisão total: ", Lpt
    print "Recall parcial: ", Lrp
    print "Recall total: ", Lrt
    print "F-medida parcial: ", Lfp
    print "F-medida total: ", Lft
    '''

    with open(path, mode='w') as csv_file:
        #writer = csv.writer(csv_file)
        csv_file.writelines('Algoritmo' + ';' + 'Multinominal Naïve-Bayes' +
                            '\n')
        csv_file.writelines('Iteração' + ';' + 'Acurácia' + ';' +
                            'Precisão parcial' + ';' + 'Precisão total' + ';' +
                            'revocação parcial' + ';' + 'revocação total' +
                            ';' + 'f-medida parcial' + ';' + 'f-medida total' +
                            '\n')
        csv_file.writelines('1;' + str(MNBa1) + ';' + str(MNBpp1) + ';' +
                            str(MNBpt1) + ';' + str(MNBrp1) + ';' +
                            str(MNBrt1) + ';' + str(MNBfp1) + ';' +
                            str(MNBft1) + '\n')
        csv_file.writelines('2;' + str(MNBa2) + ';' + str(MNBpp2) + ';' +
                            str(MNBpt2) + ';' + str(MNBrp2) + ';' +
                            str(MNBrt2) + ';' + str(MNBfp2) + ';' +
                            str(MNBft2) + '\n')
        csv_file.writelines('3;' + str(MNBa3) + ';' + str(MNBpp3) + ';' +
                            str(MNBpt3) + ';' + str(MNBrp3) + ';' +
                            str(MNBrt3) + ';' + str(MNBfp3) + ';' +
                            str(MNBft3) + '\n')
        csv_file.writelines('4;' + str(MNBa4) + ';' + str(MNBpp4) + ';' +
                            str(MNBpt4) + ';' + str(MNBrp4) + ';' +
                            str(MNBrt4) + ';' + str(MNBfp4) + ';' +
                            str(MNBft4) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Total' + '\n')
        csv_file.writelines('Média;' + str(MNBa) + ';' + str(MNBpp) + ';' +
                            str(MNBpt) + ';' + str(MNBrp) + ';' + str(MNBrt) +
                            ';' + str(MNBfp) + ';' + str(MNBft) + '\n')
        csv_file.writelines('Máximo;' + str(MNBamax) + "" + ';' +
                            str(MNBpmax) + "" + ';' + str(MNBrmax) + "" + ';' +
                            str(MNBfmax) + '\n')
        csv_file.writelines('Mínimo;' + str(MNBamin) + "" + ';' +
                            str(MNBpmin) + "" + ';' + str(MNBrmin) + "" + ';' +
                            str(MNBfmin) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Algoritmo' + ';' + 'Regressão Linear' + '\n')
        csv_file.writelines('Iteração' + ';' + 'Acurácia' + ';' +
                            'Precisão parcial' + ';' + 'Precisão total' + ';' +
                            'revocação parcial' + ';' + 'revocação total' +
                            ';' + 'f-medida parcial' + ';' + 'f-medida total' +
                            '\n')
        csv_file.writelines('1;' + str(Ra1) + ';' + str(Rpp1) + ';' +
                            str(Rpt1) + ';' + str(Rrp1) + ';' + str(Rrt1) +
                            ';' + str(Rfp1) + ';' + str(Rft1) + '\n')
        csv_file.writelines('2;' + str(Ra2) + ';' + str(Rpp2) + ';' +
                            str(Rpt2) + ';' + str(Rrp2) + ';' + str(Rrt2) +
                            ';' + str(Rfp2) + ';' + str(Rft2) + '\n')
        csv_file.writelines('3;' + str(Ra3) + ';' + str(Rpp3) + ';' +
                            str(Rpt3) + ';' + str(Rrp3) + ';' + str(Rrt3) +
                            ';' + str(Rfp3) + ';' + str(Rft3) + '\n')
        csv_file.writelines('4;' + str(Ra4) + ';' + str(Rpp4) + ';' +
                            str(Rpt4) + ';' + str(Rrp4) + ';' + str(Rrt4) +
                            ';' + str(Rfp4) + ';' + str(Rft4) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Total' + '\n')
        csv_file.writelines('Média;' + str(Ra) + ';' + str(Rpp) + ';' +
                            str(Rpt) + ';' + str(Rrp) + ';' + str(Rrt) + ';' +
                            str(Rfp) + ';' + str(Rft) + '\n')
        csv_file.writelines('Máximo;' + str(Ramax) + "" + ';' + str(Rpmax) +
                            "" + ';' + str(Rrmax) + "" + ';' + str(Rfmax) +
                            '\n')
        csv_file.writelines('Mínimo;' + str(Ramin) + "" + ';' + str(Rpmin) +
                            "" + ';' + str(Rrmin) + "" + ';' + str(Rfmin) +
                            '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Algoritmo' + ';' + 'SVC Linear' + '\n')
        csv_file.writelines('Iteração' + ';' + 'Acurácia' + ';' +
                            'Precisão parcial' + ';' + 'Precisão total' + ';' +
                            'revocação parcial' + ';' + 'revocação total' +
                            ';' + 'f-medida parcial' + ';' + 'f-medida total' +
                            '\n')
        csv_file.writelines('1;' + str(La1) + ';' + str(Lpp1) + ';' +
                            str(Lpt1) + ';' + str(Lrp1) + ';' + str(Lrt1) +
                            ';' + str(Lfp1) + ';' + str(Lft1) + '\n')
        csv_file.writelines('2;' + str(La2) + ';' + str(Lpp2) + ';' +
                            str(Lpt2) + ';' + str(Lrp2) + ';' + str(Lrt2) +
                            ';' + str(Lfp2) + ';' + str(Lft2) + '\n')
        csv_file.writelines('3;' + str(La3) + ';' + str(Lpp3) + ';' +
                            str(Lpt3) + ';' + str(Lrp3) + ';' + str(Lrt3) +
                            ';' + str(Lfp3) + ';' + str(Lft3) + '\n')
        csv_file.writelines('4;' + str(La4) + ';' + str(Lpp4) + ';' +
                            str(Lpt4) + ';' + str(Lrp4) + ';' + str(Lrt4) +
                            ';' + str(Lfp4) + ';' + str(Lft4) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Total' + '\n')
        csv_file.writelines('Média;' + str(La) + ';' + str(Lpp) + ';' +
                            str(Lpt) + ';' + str(Lrp) + ';' + str(Lrt) + ';' +
                            str(Lfp) + ';' + str(Lft) + '\n')
        csv_file.writelines('Máximo;' + str(Lamax) + "" + ';' + str(Lpmax) +
                            "" + ';' + str(Lrmax) + "" + ';' + str(Lfmax) +
                            '\n')
        csv_file.writelines('Mínimo;' + str(Lamin) + "" + ';' + str(Lpmin) +
                            "" + ';' + str(Lrmin) + "" + ';' + str(Lfmin) +
                            '\n')