def classify_and_validate_dp(name, dataset): avgs = advancedclassify.lineartrain(dataset) correct_count = 0.0 for row in dataset: res = advancedclassify.dpclassify(row.data, avgs) if res == row.match: correct_count += 1 accuracy = correct_count / len(dataset) print "[%s] correct: %d/%d, accuracy: %f" % (name, correct_count, len(dataset), accuracy)
def get_linear_classification(agesonly): avgs = advancedclassify.lineartrain(agesonly) print ">>> advancedclassify.dpclassify([30, 30], avgs)" print advancedclassify.dpclassify([30, 30], avgs) print ">>> advancedclassify.dpclassify([30, 25], avgs)" print advancedclassify.dpclassify([30, 25], avgs) print ">>> advancedclassify.dpclassify([25, 40], avgs)" print advancedclassify.dpclassify([25, 40], avgs) print ">>> advancedclassify.dpclassify([48, 20], avgs)" print advancedclassify.dpclassify([48, 20], avgs) print "oops!!"
def scalingTheData(): print '## Scaling the Data' reload(advancedclassify) numericalset=advancedclassify.loadnumerical( ) scaledset,scalef=advancedclassify.scaledata(numericalset) avgs=advancedclassify.lineartrain(scaledset) print 'numericalset[0].data',numericalset[0].data print 'numericalset[0].match', numericalset[0].match print advancedclassify.dpclassify(scalef(numericalset[0].data),avgs) print 'numericalset[11].match', numericalset[11].match print advancedclassify.dpclassify(scalef(numericalset[11].data),avgs)
def get_scaledset(numericalset): scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) print ">>> numericalset[0].match" print numericalset[0].match print ">>> advancedclassify.dpclassify(scalef(numericalset[0].data), avgs)" print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs) print ">>> numericalset[11].match" print numericalset[11].match print ">>> advancedclassify.dpclassify(scalef(numericalset[11].data), avgs)" print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs) return scaledset, scalef
def basicLinearClassification(): print '## Basic Linear Classification' reload(advancedclassify) avgs=advancedclassify.lineartrain(agesonly) print avgs for (k,v) in avgs.items(): if k==0: p='b%s' % ('o') if k==1: p='b%s' % ('+') plot(v[0],v[1],p) show() print '30,30',advancedclassify.dpclassify([30,30],avgs) print '30,25',advancedclassify.dpclassify([30,25],avgs) print '25,40',advancedclassify.dpclassify([25,40],avgs) print '48,20',advancedclassify.dpclassify([48,20],avgs)
def test_pg_197_to_214(): """ Matchmaker Dataset """ import advancedclassify agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True) matchmaker = advancedclassify.loadmatch("matchmaker.csv") """ Difficulties with the data """ # Scatter plot of mans age vs womans age # O is a match # X is not a match advancedclassify.plotagematches(agesonly) """ Basic linear classification """ avgs = advancedclassify.lineartrain(agesonly) print advancedclassify.dpclassify([30,30], avgs) print advancedclassify.dpclassify([30,25], avgs) print advancedclassify.dpclassify([25,40], avgs) print advancedclassify.dpclassify([48,20], avgs) """ Determing distances using Yahoo! maps """ print advancedclassify.milesdistance("cambride, ma", "new york, ny") """ Creating the new dataset """ numericalset = advancedclassify.loadnumerical() print numericalset[0].data """ Scaling the dataset. """ scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) print numericalset[0].data print numericalset[0].match print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs) print numericalset[11].match print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs) """ The kernel trick """ offset = advancedclassify.getoffset(agesonly) print offset print advancedclassify.nlclassify([30, 30], agesonly, offset) print advancedclassify.nlclassify([30, 25], agesonly, offset) print advancedclassify.nlclassify([25, 40], agesonly, offset) # In contrast to linear classification now recognises that # 48, 20 is not a good match print advancedclassify.nlclassify([48, 20], agesonly, offset) ssoffset = advancedclassify.getoffset(scaledset) # 0 print numericalset[0].match # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # 1 print numericalset[1].match # 1 print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset) # 0 print numericalset[2].match # 0 # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset) # man doesnt want children, women does, otherwise really gd match newrow=[28.0, -1, -1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # both want children newrow=[28.0, -1, 1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)
agesonly = ad.loadmatch('agesonly.csv', allnum=True) matchmaker = ad.loadmatch('matchmaker.csv') # ad.plotagematches(agesonly) age = [] for line in file('agesonly.csv'): l = [] for w in line.split(','): l.append(int(w)) age.append(l) tree = tr.buildtree(age) tr.printtree(tree) tr.drawtree(tree) print tr.classify(tree, [65, 63]) avgs = ad.lineartrain(agesonly) print avgs print ad.dpclassify([30, 25], avgs.values()) print ad.dpclassify([25, 40], avgs.values()) print ad.dpclassify([48, 20], avgs.values()) print tr.classify(tree, [30, 25]) print tr.classify(tree, [25, 40]) print tr.classify(tree, [48, 20]) numericalset = ad.loadnumerical() numericalset[0].data
import advancedclassify agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True) matchmaker = advancedclassify.loadmatch('matchmaker.csv') # advancedclassify.plotagematches(agesonly) avgs = advancedclassify.lineartrain(agesonly) print advancedclassify.dpclassify([30,30], avgs) print advancedclassify.dpclassify([30,25], avgs) print advancedclassify.dpclassify([25,40], avgs) print advancedclassify.dpclassify([48,20], avgs) print advancedclassify.getlocation('1 alewife center, cambridge, ma')
import advancedclassify agesonly = advancedclassify.loadmatch('agesonly.csv', allnum = True) matchmaker = advancedclassify.loadmatch('matchmaker.csv') reload(advancedclassify) avgs = advancedclassify.lineartrain(agesonly) reload(advancedclassify) advancedclassify.dpclassify([30,30], avgs) advancedclassify.dpclassify([48,32], avgs) advancedclassify.dpclassify([25,40], avgs) advancedclassify.dpclassify([48,20], avgs) reload(advancedclassify) numericalset = advancedclassify.loadnumerical() numericalset[0].data reload(advancedclassify) scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) numericalset[0].data reload(advancedclassify) offset = advancedclassify.getoffset(agesonly) advancedclassify.nlclassify([30,30], agesonly, offset) advancedclassify.nlclassify([48,32], agesonly, offset) advancedclassify.nlclassify([25,40], agesonly, offset) advancedclassify.nlclassify([48,20], agesonly, offset)
def test_pg_197_to_214(): """ Matchmaker Dataset """ import advancedclassify agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True) matchmaker = advancedclassify.loadmatch("matchmaker.csv") """ Difficulties with the data """ # Scatter plot of mans age vs womans age # O is a match # X is not a match advancedclassify.plotagematches(agesonly) """ Basic linear classification """ avgs = advancedclassify.lineartrain(agesonly) print advancedclassify.dpclassify([30, 30], avgs) print advancedclassify.dpclassify([30, 25], avgs) print advancedclassify.dpclassify([25, 40], avgs) print advancedclassify.dpclassify([48, 20], avgs) """ Determing distances using Yahoo! maps """ print advancedclassify.milesdistance("cambride, ma", "new york, ny") """ Creating the new dataset """ numericalset = advancedclassify.loadnumerical() print numericalset[0].data """ Scaling the dataset. """ scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) print numericalset[0].data print numericalset[0].match print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs) print numericalset[11].match print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs) """ The kernel trick """ offset = advancedclassify.getoffset(agesonly) print offset print advancedclassify.nlclassify([30, 30], agesonly, offset) print advancedclassify.nlclassify([30, 25], agesonly, offset) print advancedclassify.nlclassify([25, 40], agesonly, offset) # In contrast to linear classification now recognises that # 48, 20 is not a good match print advancedclassify.nlclassify([48, 20], agesonly, offset) ssoffset = advancedclassify.getoffset(scaledset) # 0 print numericalset[0].match # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # 1 print numericalset[1].match # 1 print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset) # 0 print numericalset[2].match # 0 # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset) # man doesnt want children, women does, otherwise really gd match newrow = [28.0, -1, -1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # both want children newrow = [28.0, -1, 1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)
# advancedclassifytest.py import advancedclassify agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True) matchmaker = advancedclassify.loadmatch('matchmaker.csv') numericalset = advancedclassify.loadnumerical() scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) ssoffset = advancedclassify.getoffset(scaledset) print('classify numericalset[0] : ') print(numericalset[0].match) print(advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)) print('------------------------------') print('classify numericalset[1] : ') print(numericalset[1].match) print(advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset)) print('------------------------------') print('classify numericalset[2] : ') print(numericalset[2].match) print(advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset)) print('------------------------------') print('classify newrow0 : ') newrow0 = [28.0, -1, -1, 26.0, -1, 1, 2, 0.8]