Exemplos de NaiveBayes em Python, exemplos de naivebayes.NaiveBayes em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: activelearning.py Projeto: LowResourceLanguages/hltdi-l3

def run_active_learning(train, xs):
    trainingset = random.sample(xs, len(xs) // 2)
    pool = [x for x in xs if x not in trainingset]

    for it in range(10):
        if pool == []: break
        classifier = train(trainingset)
        nbc = naivebayes.NaiveBayes(trainingset)
        correct = []

        # testset = random.sample(pool, 10)
        testset = pool
        for x in testset:
            if classifier(x) == x.qw:
                correct.append(1)
            else:
                correct.append(0)

        ACTIVESTEPSIZE = 3
        for j in range(ACTIVESTEPSIZE):
            if len(pool) > 0:
                nextpick = least_certain(pool, nbc)
                trainingset.append(nextpick)
                pool.remove(nextpick)

        print("  iteration %2d, accuracy: %d / %d = %0.3f" %
            (it,
             sum(correct), len(correct),
             sum(correct) / len(correct)))

Exemplo n.º 2

0

Exibir arquivo

def runNaiveBayes(numTrainValues, numTestValues, pixels, tune, useTrainedProbs, info):
    """
    runNaiveBayes() runs the Naive Bayes learning algorithm on the MNIST dataset.
    It also prints associated analytics, including the accuracy and time taken
    to run.

    Keyword arguments:
    numTrainValues -- number of training values to train the perceptron
    numTestValues -- number of test values to test the trained perceptron
    pixels -- number of pixels to chop from the margins of the image
    tune -- a boolean for whether to tune to find the optimal number of iterations
    info -- boolean to get information about common classification mistakes
    """
    t = time.clock()

    naiveBayesClassifier = naivebayes.NaiveBayes(range(10))

    if useTrainedProbs:
        naiveBayesClassifier.useTrainedProbs(loadFeatures.getFeatureList())
    else:
        print "Loading Training Data....\n"
        trainingData, trainingLabels, validationData, validationLabels, features = loadFeatures.loadTrainingData(numTrainValues, pixels, tune)

        print "Training Naive Bayes Classifier....\n"
        naiveBayesClassifier.train(trainingData, trainingLabels, validationData, validationLabels, features, tune)

    print "Loading Testing Data....\n"
    testingData, testingLabels = loadFeatures.loadTestingData(numTestValues, pixels)

    print "Testing Naive Bayes Classifier....\n"
    classifiedData = naiveBayesClassifier.classify(testingData)
    test(classifiedData, testingLabels, info)

    print "Total Time {0}".format(time.clock() - t)

Exemplo n.º 3

0

Exibir arquivo

def main():
    if len(sys.argv) < 3:
        sys.stderr.write(
            'Usage: python bayesian-fileter-words.py training_filename test_filename > output\n'
        )
        sys.exit(1)

    nb = naivebayes.NaiveBayes()
    nb.training(sys.argv[1])
    nb.test_posterior(sys.argv[2])

Exemplo n.º 4

0

Exibir arquivo

Arquivo: attributes.py Projeto: mreouven/NaiveBayes-Classifier

        os.chdir(self.chemin)
        if ('train.csv' in os.listdir(os.getcwd())):
            cr = csv.reader(open("train.csv", "rb"))
            self.li = []
            flag = True
            for row in cr:
                if flag:
                    for n in range(0, len(row)):
                        self.li.append([])
                    flag = False
                for n in range(0, len(row)):
                    self.li[n].append(row[n])
        print('CSV data import success')
        self.attachement()

    def attachement(self):
        if self.li and self.dataex:
            for element in self.li:
                #self.dataex[element.pop(0)].append([1,3])
                self.dataex[element.pop(0)].append(element)
        print('Attachement success')
        self.chekdata()

    def chekdata(self):
        datan = dataCheck.DataCheck(self.dataex, 3)
        self.dataex = datan.data


da = readDATA('C:/Users/mreou/Documents/GitHub/Naive-Bayes')
n = naivebayes.NaiveBayes(da.dataex, 3)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: views.py Projeto: s-tnk0513/gunosy_naivebayse

# 命名規則： PEP8
from django.http.response import HttpResponse
from django.shortcuts import render
from datetime import datetime
import naivebayes
import gethtmltext
import gettrain

# ナイーブベイズ分類器のオブジェクトを作成。
nb = naivebayes.NaiveBayes()
# Gunosyのサイトをスクレイピングし、その記事データを用いて訓練させます。
gettrain.gunosy_train(nb)


def hello_guess_category(request):
    # view関数が呼ばれたびにスクレイピングして学習しないようにオブジェクトは外部で作成します。
    global nb
    # フォームからurlを取得
    url = request.GET.get('url')
    # urlのhtmlファイルのテキストを取得
    html_text = gethtmltext.url_to_text(url)
    # エラーが出た場合の処理
    if html_text is None:
        category = "urlを入力して下さい。"
    # エラが無かった場合の処理
    else:
        category = "推定カテゴリー ：" + nb.classifier(html_text)
    d = {'category': category}
    return render(request, 'index.html', d)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: nb_boost.py Projeto: rtaph/authorship-attribution

     testt = []
     for f in features:
         nb_traint.append([f[i] for i in range(len(f)) if nb_ks.count(k_indices[i]) > 0])
         boost_traint.append([f[i] for i in range(len(f)) if boost_ks.count(k_indices[i]) > 0])
         testt.append([f[i] for i in range(len(f)) if k_indices[i] == k])
         
     testc = [text_classes[i] for i in range(len(text_classes)) if k_indices[i] == k]
     
     print 'NB Train texts:', len(nb_traint[0])
     print 'Boost Train texts:', len(boost_traint[0])
     print 'Test texts:', len(testt[0])
 
     print 'Training weak classifiers...'
     classifiers = []
     for c in range(C):
         nb = naivebayes.NaiveBayes(bins[c])
         nb.train(nb_trainc, nb_traint[c])
         classifiers.append(nb)
     
     print 'Boosting...'
     samme = boost.Samme()
     samme.train(boost_traint, boost_trainc, classifiers, len(distinct_classes), BOOST_ITER)
 
     print 'Classifying...'
     classified = samme.classify(testt)
     
     # --- Calculate performance measures --- #
     class_classified = dict.fromkeys(distinct_classes,0)
     actual = dict.fromkeys(distinct_classes,0)
     correct = dict.fromkeys(distinct_classes,0)
     #top_correct = 0