示例#1
0
 def test_basic(self):
     self.assertEqual(classify('a', {'A': ['a'], 'B': ['b']}), 'A')
     self.assertEqual(classify('a a a', {'A': ['a'], 'B': ['b']}), 'A')
     self.assertEqual(classify('a a b', {'A': ['a'], 'B': ['b']}), 'A')
     self.assertEqual(classify('a a b', {'A': ['a', 'a'], 'B': ['b']}), 'A')
     self.assertEqual(classify('a b b', {'A': ['a', 'a'], 'B': ['b']}), 'B')
     self.assertEqual(classify('b b b', {'A': ['a', 'a'], 'B': ['b']}), 'B')
示例#2
0
 def test_basic(self):
     self.assertEqual(classify('a', {'A': ['a'], 'B': ['b']}), 'A')
     self.assertEqual(classify('a a a', {'A': ['a'], 'B': ['b']}), 'A')
     self.assertEqual(classify('a a b', {'A': ['a'], 'B': ['b']}), 'A')
     self.assertEqual(classify('a a b', {'A': ['a', 'a'], 'B': ['b']}), 'A')
     self.assertEqual(classify('a b b', {'A': ['a', 'a'], 'B': ['b']}), 'B')
     self.assertEqual(classify('b b b', {'A': ['a', 'a'], 'B': ['b']}), 'B')
示例#3
0
文件: main.py 项目: foolOnTheHill/ml
def run():
    classificadores = ["Bayesian", "KNN 1", "KNN 5", "KNN 10", "KNN 20", "KNN 30", "Sum 1", "Sum 5", "Sum 10", "Sum 20", "Sum 30"]
    errorResults = {}

    for c in classificadores:
        errorResults[c] = []

    K = [30, 20, 10, 5, 1]

    resultsBay = open('part2-results-bayesian.txt', 'a')
    resultsKn = open('part2-results-knn.txt', 'a')
    resultsSum = open('part2-results-sum.txt', 'a')

    for j in range(10):
        H = data_proccessing.loadData() # Folds

        for i in range(10):
            resultsBay.write("Round %d\n\n" % (j*10+i+1))
            resultsKn.write("Round %d\n\n" % (j*10+i+1))
            resultsSum.write("Round %d\n\n" % (j*10+i+1))

            (trainX, trainY, testX, testY) = prepareSets(H, i)
            (P_bay, E_bay, e_rate_bay, se_bay, interval_bay) = bayesian.classify(trainX, trainY, testX, testY)

            resultsBay.write("- Bayesian\n")
            writeResults(resultsBay, e_rate_bay, se_bay, interval_bay)
            errorResults["Bayesian"].append(e_rate_bay)

            for k in K:
                (P_kn, E_kn, e_rate_kn, se_kn, interval_kn) = knn.classify(trainX, trainY, testX, testY, k)
                (P_sum, E_sum, e_rate_sum, se_sum, interval_sum) = sum_rule.classify([P_bay, P_kn], testX, testY)
                #
                resultsKn.write("- KNN (n = %d)\n" % k)
                writeResults(resultsKn, e_rate_kn, se_kn, interval_kn)
                errorResults["KNN %i" % k].append(e_rate_kn)
                #
                resultsSum.write("- Sum (n = %d)\n" % k)
                writeResults(resultsSum, e_rate_sum, se_sum, interval_sum)
                errorResults["Sum %i" % k].append(e_rate_sum)

    resultsBay.close()
    resultsKn.close()
    resultsSum.close()

    compare(errorResults)
示例#4
0
for i in list(agerangeset):
  print(i)
print("age class size:")
for k, v in ageclass.items():
  print(k, ":", len(v))

print("begin classify")
rescol = []
count1 = 1
for i in c.execute("select * from knowledge_detail where mark = \"program\" limit 3000"):
#for i in c.execute("select * from knowledge_detail where serial = 1909"):
  print(count1)
  count1 += 1
  print(i[0]) 
  print(i[1])
  curclass = classify(i[1], ageclass, extractor=cutword)
  print curclass
  (curmin, curmax) = curclass.split('*')
  print("curmin:", curmin)
  print("curmax:", curmax)
  rescol.append([i[0], curmin, curmax])
  #c.execute("update knowledge_detail set min=?, max=? where serial=?", (curmin, curmax, i[0]))

for r in rescol:
  c.execute("update knowledge_detail set min=?, max=?, mark=\"programa\" where serial=?", (r[1], r[2], r[0]))

#for i in idcol:
#  print("update mark")
#  c.execute("update knowledge_detail set mark=\"program\" where serial=?", (i))
conn.commit()  
conn.close()
             "spam" : numberOfSpam,
             "ham" : numberOfHam
           }

ham = readMailsFromFile("./corpus/nospam2.txt")
spam = readMailsFromFile("./corpus/spam2.txt")
tests = readTestMailsFromFile("./corpus/test-final.txt")

print("Messages to filter: %d" % len(tests["messages"]))
hamCount = 0
spamCount= 0
goodHam = 0
goodSpam = 0

for index, test in enumerate(tests["messages"]):
     classif = classify(test, {'spam': spam, 'ham': ham})
     if test[:3] == "TAK" and classif == 'spam':
        goodSpam = goodSpam + 1
     if test[:3] == "NIE" and classif == 'ham':
        goodHam = goodHam + 1
     if classif == 'ham':
        hamCount = hamCount + 1
     else:
        spamCount = spamCount + 1
     test = test[4:]
     print("%d. \"%s...\" is" % (index,test[:50]), classif)

print("SPAM: %d" % spamCount)
print("HAM: %d" % hamCount)
print("Accuracy: ", ((goodSpam+goodHam)/float(len(tests["messages"])))*100, "%")
示例#6
0
            'foot size': 7
        }, {
            'height': 5.75,
            'weight': 150,
            'foot size': 9
        }]
    }))

print('')

print(' -- Spam Detection With `Classify` -- ')
spams = ["buy viagra", "dear recipient", "meet sexy singles"]  # etc
genuines = ["let's meet tomorrow", "remember to buy milk"]
message = "remember the meeting tomorrow"
# Classify as "genuine" because of the words "remember" and "tomorrow".
print(classify(message, {'spam': spams, 'genuine': genuines}))

# Classifies "unknown_file" as either a Python or Java file, considering
# you have directories with examples of each language.
#print classify_file("unknown_file", ["java_files", "python_files"])

# Classifies every file under "folder" as either a Python or Java file,
# considering you have subdirectories with examples of each language.
#print classify_folder("folder")

print('')

print(' == Low Level Functions == ')

print(' -- Classic Cancer Test Problem --')
# 1% chance of having cancer.
示例#7
0
def get_language(s):
    train = _get_train_set()
    return classify(s, train)
示例#8
0
 def test_sample(self):
     spams = ["buy viagra", "dear recipient", "meet sexy singles"]
     genuines = ["let's meet tomorrow", "remember to buy milk"]
     message = "remember the meeting tomorrow"
     instances = {'spam': spams, 'genuine': genuines}
     self.assertEqual(classify(message, instances), 'genuine')
示例#9
0
 def test_with_extraction(self):
     self.assertEqual(classify('a', {'A': ['a a a'], 'B': ['b']}), 'A')
     self.assertEqual(classify('a', {'A': ['a', 'a'], 'B': ['b b b']}), 'A')
示例#10
0
 def test_single(self):
     self.assertEqual(classify('a', {'A': []}), 'A')
     self.assertEqual(classify('a', {'A': ['a']}), 'A')
     self.assertEqual(classify('a', {'A': ['a', 'a']}), 'A')
     self.assertEqual(classify('a', {'A': ['a', 'b']}), 'A')
示例#11
0
def get_language(s):
    train = _get_train_set()
    return classify(s, train)
示例#12
0
 def test_sample(self):
     spams = ["buy viagra", "dear recipient", "meet sexy singles"]
     genuines = ["let's meet tomorrow", "remember to buy milk"]
     message = "remember the meeting tomorrow"
     instances = {'spam': spams, 'genuine': genuines}
     self.assertEqual(classify(message, instances), 'genuine')
示例#13
0
 def test_with_extraction(self):
     self.assertEqual(classify('a', {'A': ['a a a'], 'B': ['b']}), 'A')
     self.assertEqual(classify('a', {'A': ['a', 'a'], 'B': ['b b b']}), 'A')
示例#14
0
 def test_single(self):
     self.assertEqual(classify('a', {'A': []}), 'A')
     self.assertEqual(classify('a', {'A': ['a']}), 'A')
     self.assertEqual(classify('a', {'A': ['a', 'a']}), 'A')
     self.assertEqual(classify('a', {'A': ['a', 'b']}), 'A')
示例#15
0
# -*- coding: utf-8 -*-
from bayesian import classify, classify_file
from cutword import cutword

spams = [u"什么", u"哦 好的", u"是嘛 这样啊  你好啊"] # etc
genuines = [u"明天看电影", u"想吃饭了"]
message = u"看电影能吃饭吗"
# Classify as "genuine" because of the words "remember" and "tomorrow".
print classify(message, {'spam': spams, 'genuine': genuines}, extractor=cutword)
示例#16
0
                                {'height': 5.92, 'weight': 190, 'foot size': 11},
                                {'height': 5.58, 'weight': 170, 'foot size': 12},
                                {'height': 5.92, 'weight': 165, 'foot size': 10}],
                       'female': [{'height': 5, 'weight': 100, 'foot size': 6},
                                  {'height': 5.5, 'weight': 150, 'foot size': 8},
                                  {'height': 5.42, 'weight': 130, 'foot size': 7},
                                  {'height': 5.75, 'weight': 150, 'foot size': 9}]}))

print('')

print(' -- Spam Detection With `Classify` -- ')
spams = ["buy viagra", "dear recipient", "meet sexy singles"] # etc
genuines = ["let's meet tomorrow", "remember to buy milk"]
message = "remember the meeting tomorrow"
# Classify as "genuine" because of the words "remember" and "tomorrow".
print(classify(message, {'spam': spams, 'genuine': genuines}))

# Classifies "unknown_file" as either a Python or Java file, considering
# you have directories with examples of each language.
#print classify_file("unknown_file", ["java_files", "python_files"])

# Classifies every file under "folder" as either a Python or Java file,
# considering you have subdirectories with examples of each language.
#print classify_folder("folder")

print('')

print(' == Low Level Functions == ')

print(' -- Classic Cancer Test Problem --')
# 1% chance of having cancer.