Python Classifier.train示例

编程语言: Python

命名空间/包名称: docclass

类/类型: Classifier

方法/功能: train

hotexamples.com的示例: 5

Python Classifier.train - 已找到5个示例。这些是从开源项目中提取的最受好评的docclass.Classifier.train现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

incc(6)

incf(5)

train(5)

catcount(2)

classify(2)

fcount(2)

prob(2)

fprob(1)

getfeatures(1)

infc(1)

save(1)

totalcount(1)

weightedprob(1)

示例#1

显示文件

文件： test_classifier.py 项目： stefanseibert/DataMining

 def testTrain(self):
     c = Classifier(getwords)
     item = "Hello hello world, my name is Python."
     cat = "Good"
     c.train(item, cat)
     self.assertEqual(c.catcount("Good"), 1)
     self.assertEqual(c.fcount("hello", "Good"), 1)
     self.assertFalse(c.fc.has_key("my"))

示例#2

显示文件

文件： parseTechFeed.py 项目： stefanseibert/DataMining

countnews['nontech']=0
countnews['test']=0


c = Classifier(getwords, initprob=0.5)

print "--------------------News from trainTech------------------------"
for feed in trainTech:
    f=feedparser.parse(feed)
    for e in f.entries:
        print '\n---------------------------'
        fulltext=stripHTML(e.title+' '+e.description)
        print fulltext
        countnews['tech']+=1

        c.train(fulltext,"Tech")

print "----------------------------------------------------------------"
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"

print "--------------------News from trainNonTech------------------------"
for feed in trainNonTech:
    f=feedparser.parse(feed)
    for e in f.entries:
        print '\n---------------------------'
        fulltext=stripHTML(e.title+' '+e.description)
        print fulltext
        countnews['nontech']+=1
        c.train(fulltext, "NonTech")

示例#3

显示文件

文件： parseTechFeed.py 项目： fridy77/DataMining

      countnews['test']+=1
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"

print 'Number of used trainings samples in categorie tech',countnews['tech']
print 'Number of used trainings samples in categorie notech',countnews['nontech']
print 'Number of used test samples',countnews['test']
print '--'*30



rss_classifier = Classifier()

for tech in train_data["good"]:
    rss_classifier.train(tech, "good")

for nontech in train_data["bad"]:
    rss_classifier.train(nontech, "bad")

print "---- training finished ---------------------"
for test in data:
    g_pb = rss_classifier.prob(test, "good")
    b_pb = rss_classifier.prob(test, "bad")
    # Normalisierung der Wahrscheinlichkeiten
    g_pb_n = g_pb /(g_pb + b_pb)
    b_pb_n = b_pb /(g_pb + b_pb)
    print test
    print "good: ", g_pb_n, " bad: ", b_pb_n

示例#4

显示文件

文件： test_classifier.py 项目： stefanseibert/DataMining

    def testClassifier(self):
        c = Classifier(getwords)
        c.train("nobody owns the water", "Good")
        c.train("the quick rabbit jumps fences", "Good")
        c.train("buy pharmaceuticals now", "Bad")
        c.train("make quick money at the online casino", "Bad")
        c.train("the quick brown fox jumps", "Good")
        c.train("next meeting is at night", "Good")
        c.train("meeting with your superstar", "Bad")
        c.train("money like water", "Bad")

        # added quick to the test string, because with 'money jumps' Good and Bad got the same value.
        self.assertEqual(c.classify("the money jumps quick"), "Good")

示例#5

显示文件

文件： parseTechFeed_more_categories.py 项目： stefanseibert/DataMining

countnews['test']=0


c = Classifier(getwords, initprob=0.5)

print "--------------------News from trainTech------------------------"
for feed in trainTech:
    f=feedparser.parse(feed)
    for e in f.entries:
        print '\n---------------------------'
        fulltext=stripHTML(e.title+' '+e.description)
        print fulltext
        countnews['tech']+=1

        c.train(fulltext,"Tech")

print "----------------------------------------------------------------"
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"

print "--------------------News from trainPolitics------------------------"
for feed in trainPolitics:
    f=feedparser.parse(feed)
    for e in f.entries:
        print '\n---------------------------'
        fulltext=stripHTML(e.title+' '+e.description)
        print fulltext
        countnews['politics']+=1
        c.train(fulltext, "Politics")