def run_active_learning(train, xs):
    trainingset = random.sample(xs, len(xs) // 2)
    pool = [x for x in xs if x not in trainingset]

    for it in range(10):
        if pool == []: break
        classifier = train(trainingset)
        nbc = naivebayes.NaiveBayes(trainingset)
        correct = []

        # testset = random.sample(pool, 10)
        testset = pool
        for x in testset:
            if classifier(x) == x.qw:
                correct.append(1)
            else:
                correct.append(0)

        ACTIVESTEPSIZE = 3
        for j in range(ACTIVESTEPSIZE):
            if len(pool) > 0:
                nextpick = least_certain(pool, nbc)
                trainingset.append(nextpick)
                pool.remove(nextpick)

        print("  iteration %2d, accuracy: %d / %d = %0.3f" %
            (it,
             sum(correct), len(correct),
             sum(correct) / len(correct)))
Exemplo n.º 2
0
def runNaiveBayes(numTrainValues, numTestValues, pixels, tune, useTrainedProbs, info):
    """
    runNaiveBayes() runs the Naive Bayes learning algorithm on the MNIST dataset.
    It also prints associated analytics, including the accuracy and time taken
    to run.

    Keyword arguments:
    numTrainValues -- number of training values to train the perceptron
    numTestValues -- number of test values to test the trained perceptron
    pixels -- number of pixels to chop from the margins of the image
    tune -- a boolean for whether to tune to find the optimal number of iterations
    info -- boolean to get information about common classification mistakes
    """
    t = time.clock()

    naiveBayesClassifier = naivebayes.NaiveBayes(range(10))

    if useTrainedProbs:
        naiveBayesClassifier.useTrainedProbs(loadFeatures.getFeatureList())
    else:
        print "Loading Training Data....\n"
        trainingData, trainingLabels, validationData, validationLabels, features = loadFeatures.loadTrainingData(numTrainValues, pixels, tune)

        print "Training Naive Bayes Classifier....\n"
        naiveBayesClassifier.train(trainingData, trainingLabels, validationData, validationLabels, features, tune)

    print "Loading Testing Data....\n"
    testingData, testingLabels = loadFeatures.loadTestingData(numTestValues, pixels)

    print "Testing Naive Bayes Classifier....\n"
    classifiedData = naiveBayesClassifier.classify(testingData)
    test(classifiedData, testingLabels, info)

    print "Total Time {0}".format(time.clock() - t)
Exemplo n.º 3
0
def main():
    if len(sys.argv) < 3:
        sys.stderr.write(
            'Usage: python bayesian-fileter-words.py training_filename test_filename > output\n'
        )
        sys.exit(1)

    nb = naivebayes.NaiveBayes()
    nb.training(sys.argv[1])
    nb.test_posterior(sys.argv[2])
Exemplo n.º 4
0
        os.chdir(self.chemin)
        if ('train.csv' in os.listdir(os.getcwd())):
            cr = csv.reader(open("train.csv", "rb"))
            self.li = []
            flag = True
            for row in cr:
                if flag:
                    for n in range(0, len(row)):
                        self.li.append([])
                    flag = False
                for n in range(0, len(row)):
                    self.li[n].append(row[n])
        print('CSV data import success')
        self.attachement()

    def attachement(self):
        if self.li and self.dataex:
            for element in self.li:
                #self.dataex[element.pop(0)].append([1,3])
                self.dataex[element.pop(0)].append(element)
        print('Attachement success')
        self.chekdata()

    def chekdata(self):
        datan = dataCheck.DataCheck(self.dataex, 3)
        self.dataex = datan.data


da = readDATA('C:/Users/mreou/Documents/GitHub/Naive-Bayes')
n = naivebayes.NaiveBayes(da.dataex, 3)
Exemplo n.º 5
0
# 命名規則: PEP8
from django.http.response import HttpResponse
from django.shortcuts import render
from datetime import datetime
import naivebayes
import gethtmltext
import gettrain

# ナイーブベイズ分類器のオブジェクトを作成。
nb = naivebayes.NaiveBayes()
# Gunosyのサイトをスクレイピングし、その記事データを用いて訓練させます。
gettrain.gunosy_train(nb)


def hello_guess_category(request):
    # view関数が呼ばれたびにスクレイピングして学習しないようにオブジェクトは外部で作成します。
    global nb
    # フォームからurlを取得
    url = request.GET.get('url')
    # urlのhtmlファイルのテキストを取得
    html_text = gethtmltext.url_to_text(url)
    # エラーが出た場合の処理
    if html_text is None:
        category = "urlを入力して下さい。"
    # エラが無かった場合の処理
    else:
        category = "推定カテゴリー :" + nb.classifier(html_text)
    d = {'category': category}
    return render(request, 'index.html', d)
Exemplo n.º 6
0
     testt = []
     for f in features:
         nb_traint.append([f[i] for i in range(len(f)) if nb_ks.count(k_indices[i]) > 0])
         boost_traint.append([f[i] for i in range(len(f)) if boost_ks.count(k_indices[i]) > 0])
         testt.append([f[i] for i in range(len(f)) if k_indices[i] == k])
         
     testc = [text_classes[i] for i in range(len(text_classes)) if k_indices[i] == k]
     
     print 'NB Train texts:', len(nb_traint[0])
     print 'Boost Train texts:', len(boost_traint[0])
     print 'Test texts:', len(testt[0])
 
     print 'Training weak classifiers...'
     classifiers = []
     for c in range(C):
         nb = naivebayes.NaiveBayes(bins[c])
         nb.train(nb_trainc, nb_traint[c])
         classifiers.append(nb)
     
     print 'Boosting...'
     samme = boost.Samme()
     samme.train(boost_traint, boost_trainc, classifiers, len(distinct_classes), BOOST_ITER)
 
     print 'Classifying...'
     classified = samme.classify(testt)
     
     # --- Calculate performance measures --- #
     class_classified = dict.fromkeys(distinct_classes,0)
     actual = dict.fromkeys(distinct_classes,0)
     correct = dict.fromkeys(distinct_classes,0)
     #top_correct = 0