Python Classifier примеры использования

Язык программирования: Python

Пространство имен/Пакет: docclass

Класс/Тип: Classifier

Примеров на hotexamples.com: 15

Python Classifier - 15 примеров найдено. Это лучшие примеры Python кода для docclass.Classifier, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

incc(6)

incf(5)

train(5)

catcount(2)

classify(2)

fcount(2)

prob(2)

fprob(1)

getfeatures(1)

infc(1)

save(1)

totalcount(1)

weightedprob(1)

Пример #1

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

 def testTrain(self):
     c = Classifier(getwords)
     item = "Hello hello world, my name is Python."
     cat = "Good"
     c.train(item, cat)
     self.assertEqual(c.catcount("Good"), 1)
     self.assertEqual(c.fcount("hello", "Good"), 1)
     self.assertFalse(c.fc.has_key("my"))

Пример #2

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

 def testGetFeatures(self):
     c = Classifier(getwords)
     dict = c.getfeatures("Hello World world world ,       hello has cats and vervyveryveryveryveryverylongword")
     self.assertIsNotNone(dict)
     self.assertIsNotNone(dict["hello"])
     self.assertEqual(dict["hello"], 1)
     self.assertFalse(dict.has_key("has"))
     self.assertFalse(dict.has_key("vervyveryveryveryveryverylongword"))

Пример #3

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

 def testCatCount(self):
     c = Classifier(getwords)
     c.incc("Bad")
     c.incc("Bad")
     c.incc("Good")
     self.assertEqual(c.catcount("Good"), 1)
     self.assertEqual(c.catcount("Bad"), 2)

Пример #4

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

 def testIncC(self):
     c = Classifier(getwords)
     c.incc("Bad")
     self.assertEqual(c.cc["Bad"], 1)
     c.incc("Bad")
     self.assertEqual(c.cc["Bad"], 2)
     c.incc("Good")
     self.assertEqual(c.cc["Good"], 1)

Пример #5

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

 def testFCount(self):
     c = Classifier(getwords)
     c.incf("hello", "Good")
     c.incf("hello", "Good")
     c.incf("hello", "Bad")
     self.assertEqual(c.fcount("hello", "Good"), 2)
     self.assertEqual(c.fcount("hello", "Bad"), 1)
     self.assertEqual(c.fcount("wurst", "Bad"), 0)

Пример #6

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

 def testIncF(self):
     c = Classifier(getwords)
     c.incf("hello", "Good")
     self.assertEqual(c.fc["hello"]["Good"], 1)
     c.incf("hello", "Good")
     self.assertEqual(c.fc["hello"]["Good"], 2)
     c.incf("hello", "Bad")
     self.assertEqual(c.fc["hello"]["Bad"], 1)

Пример #7

Показать файл

Файл: test.py Проект: ubear/MachineLearn

def test_infc_func():
    c = Classifier(getfeatures=None)
    c.infc("python", "good")
    c.infc("python", "good")
    c.infc("the", "bad")
    c.infc("the", "good")

    print c.fc

Пример #8

Показать файл

Файл: parseTechFeed.py Проект: stefanseibert/DataMining

              'http://www.spiegel.de/schlagzeilen/tops/index.rss',
              'http://www.sueddeutsche.de/app/service/rss/alles/rss.xml'
              ]

test=["http://rss.golem.de/rss.php?r=sw&feed=RSS0.91",
          'http://newsfeed.zeit.de/politik/index',  
          'http://www.welt.de/?service=Rss'
           ]

countnews={}
countnews['tech']=0
countnews['nontech']=0
countnews['test']=0


c = Classifier(getwords, initprob=0.5)

print "--------------------News from trainTech------------------------"
for feed in trainTech:
    f=feedparser.parse(feed)
    for e in f.entries:
        print '\n---------------------------'
        fulltext=stripHTML(e.title+' '+e.description)
        print fulltext
        countnews['tech']+=1

        c.train(fulltext,"Tech")

print "----------------------------------------------------------------"
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"

Пример #9

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

    def testProb(self):
        c = Classifier(getwords)

        # training
        c.incc("Good")
        c.incf("hello", "Good")
        c.incc("Good")
        c.incf("world", "Good")
        c.incc("Good")
        c.incf("world", "Good")
        c.incc("Bad")
        c.incf("world", "Bad")

        # classify new document
        item = "world world wurst Wurst wurst world"

        self.assertEqual(c.prob(item, "Good"), 0.234375)

Пример #10

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

 def testWeightedProb(self):
     c = Classifier(getwords)
     c.incc("Good")
     c.incf("hello", "Good")
     c.incc("Good")
     c.incf("world", "Good")
     c.incc("Good")
     c.incf("world", "Good")
     c.incc("Bad")
     c.incf("world", "Bad")
     self.assertEqual(c.weightedprob("world", "Good"), 5.0/8.0)
     self.assertEqual(c.weightedprob("wurst", "Good"), 0.5)

Пример #11

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

 def testFProb(self):
     c = Classifier(getwords)
     c.incc("Good")
     c.incf("hello", "Good")
     c.incc("Good")
     c.incf("world", "Good")
     c.incc("Good")
     c.incf("world", "Good")
     self.assertEqual(c.fprob("world", "Good"), 2.0/3.0)

Пример #12

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

 def testTotalCount(self):
     c = Classifier(getwords)
     c.incc("Bad")
     c.incc("Bad")
     c.incc("Good")
     self.assertEqual(c.totalcount(), 3)

Пример #13

Показать файл

Файл: test_classifier.py Проект: stefanseibert/DataMining

    def testClassifier(self):
        c = Classifier(getwords)
        c.train("nobody owns the water", "Good")
        c.train("the quick rabbit jumps fences", "Good")
        c.train("buy pharmaceuticals now", "Bad")
        c.train("make quick money at the online casino", "Bad")
        c.train("the quick brown fox jumps", "Good")
        c.train("next meeting is at night", "Good")
        c.train("meeting with your superstar", "Bad")
        c.train("money like water", "Bad")

        # added quick to the test string, because with 'money jumps' Good and Bad got the same value.
        self.assertEqual(c.classify("the money jumps quick"), "Good")

Пример #14

Показать файл

Файл: parseTechFeed.py Проект: fridy77/DataMining

      fulltext=stripHTML(e.title+' '+e.description)
      print fulltext
      data.append(fulltext)
      countnews['test']+=1
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"

print 'Number of used trainings samples in categorie tech',countnews['tech']
print 'Number of used trainings samples in categorie notech',countnews['nontech']
print 'Number of used test samples',countnews['test']
print '--'*30



rss_classifier = Classifier()

for tech in train_data["good"]:
    rss_classifier.train(tech, "good")

for nontech in train_data["bad"]:
    rss_classifier.train(nontech, "bad")

print "---- training finished ---------------------"
for test in data:
    g_pb = rss_classifier.prob(test, "good")
    b_pb = rss_classifier.prob(test, "bad")
    # Normalisierung der Wahrscheinlichkeiten
    g_pb_n = g_pb /(g_pb + b_pb)
    b_pb_n = b_pb /(g_pb + b_pb)
    print test

Пример #15

Показать файл

Файл: parseTechFeed_more_categories.py Проект: stefanseibert/DataMining

          'http://www.welt.de/?service=Rss',
          'http://www.haz.de/rss/feed/haz_schlagzeilen']



countnews={}
countnews['tech']=0
countnews['sports']=0
countnews['economy']=0
countnews['politics']=0
countnews['science']=0

countnews['test']=0


c = Classifier(getwords, initprob=0.5)

print "--------------------News from trainTech------------------------"
for feed in trainTech:
    f=feedparser.parse(feed)
    for e in f.entries:
        print '\n---------------------------'
        fulltext=stripHTML(e.title+' '+e.description)
        print fulltext
        countnews['tech']+=1

        c.train(fulltext,"Tech")

print "----------------------------------------------------------------"
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"