def loadnumerical(): oldrows = advancedclassify.loadmatch('matchmaker.csv') newrows = [] out = open('numerical.txt', 'w+') for row in oldrows: d = row.data data = [float(d[0]), advancedclassify.yesno(d[1]), advancedclassify.yesno(d[2]), advancedclassify.yesno(d[5]), advancedclassify.yesno(d[6]), advancedclassify.yesno(d[7]), advancedclassify.matchcount(d[3], d[8]), milesdistance(d[4], d[9]), row.match] newrows.append(advancedclassify.matchrow(data)) print(advancedclassify.matchrow(data).__str__()) out.write("%s\n" % data) out.close() return newrows
def loadnumerical(): oldrows = advancedclassify.loadmatch('matchmaker.csv') newrows = [] out = open('numerical.txt', 'w+') for row in oldrows: d = row.data data = [ float(d[0]), advancedclassify.yesno(d[1]), advancedclassify.yesno(d[2]), advancedclassify.yesno(d[5]), advancedclassify.yesno(d[6]), advancedclassify.yesno(d[7]), advancedclassify.matchcount(d[3], d[8]), milesdistance(d[4], d[9]), row.match ] newrows.append(advancedclassify.matchrow(data)) print(advancedclassify.matchrow(data).__str__()) out.write("%s\n" % data) out.close() return newrows
import advancedclassify ageonly = advancedclassify.loadmatch('agesonly.csv', allnum = True) advancedclassify.plotagematches(ageonly)
def test_pg_197_to_214(): """ Matchmaker Dataset """ import advancedclassify agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True) matchmaker = advancedclassify.loadmatch("matchmaker.csv") """ Difficulties with the data """ # Scatter plot of mans age vs womans age # O is a match # X is not a match advancedclassify.plotagematches(agesonly) """ Basic linear classification """ avgs = advancedclassify.lineartrain(agesonly) print advancedclassify.dpclassify([30,30], avgs) print advancedclassify.dpclassify([30,25], avgs) print advancedclassify.dpclassify([25,40], avgs) print advancedclassify.dpclassify([48,20], avgs) """ Determing distances using Yahoo! maps """ print advancedclassify.milesdistance("cambride, ma", "new york, ny") """ Creating the new dataset """ numericalset = advancedclassify.loadnumerical() print numericalset[0].data """ Scaling the dataset. """ scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) print numericalset[0].data print numericalset[0].match print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs) print numericalset[11].match print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs) """ The kernel trick """ offset = advancedclassify.getoffset(agesonly) print offset print advancedclassify.nlclassify([30, 30], agesonly, offset) print advancedclassify.nlclassify([30, 25], agesonly, offset) print advancedclassify.nlclassify([25, 40], agesonly, offset) # In contrast to linear classification now recognises that # 48, 20 is not a good match print advancedclassify.nlclassify([48, 20], agesonly, offset) ssoffset = advancedclassify.getoffset(scaledset) # 0 print numericalset[0].match # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # 1 print numericalset[1].match # 1 print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset) # 0 print numericalset[2].match # 0 # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset) # man doesnt want children, women does, otherwise really gd match newrow=[28.0, -1, -1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # both want children newrow=[28.0, -1, 1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)
import advancedclassify as ad import treepredict as tr agesonly = ad.loadmatch('agesonly.csv', allnum=True) matchmaker = ad.loadmatch('matchmaker.csv') # ad.plotagematches(agesonly) age = [] for line in file('agesonly.csv'): l = [] for w in line.split(','): l.append(int(w)) age.append(l) tree = tr.buildtree(age) tr.printtree(tree) tr.drawtree(tree) print tr.classify(tree, [65, 63]) avgs = ad.lineartrain(agesonly) print avgs print ad.dpclassify([30, 25], avgs.values()) print ad.dpclassify([25, 40], avgs.values()) print ad.dpclassify([48, 20], avgs.values()) print tr.classify(tree, [30, 25]) print tr.classify(tree, [25, 40]) print tr.classify(tree, [48, 20])
import advancedclassify agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True) matchmaker = advancedclassify.loadmatch('matchmaker.csv') # advancedclassify.plotagematches(agesonly) avgs = advancedclassify.lineartrain(agesonly) print advancedclassify.dpclassify([30,30], avgs) print advancedclassify.dpclassify([30,25], avgs) print advancedclassify.dpclassify([25,40], avgs) print advancedclassify.dpclassify([48,20], avgs) print advancedclassify.getlocation('1 alewife center, cambridge, ma')
def makerDataset(): print '## Matchmaker Dataset' import advancedclassify agesonly=advancedclassify.loadmatch('agesonly.csv',allnum=True) matchmaker=advancedclassify.loadmatch('matchmaker.csv') print ', '.join(['%s:%s' % item for item in matchmaker[0].__dict__.items()])
import advancedclassify ageonly = advancedclassify.loadmatch('agesonly.csv', allnum=True) advancedclassify.plotagematches(ageonly)
def get_dataset(): print "getting data..." agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True) matchmaker = advancedclassify.loadmatch('matchmaker.csv') return (agesonly, matchmaker)
def test_pg_197_to_214(): """ Matchmaker Dataset """ import advancedclassify agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True) matchmaker = advancedclassify.loadmatch("matchmaker.csv") """ Difficulties with the data """ # Scatter plot of mans age vs womans age # O is a match # X is not a match advancedclassify.plotagematches(agesonly) """ Basic linear classification """ avgs = advancedclassify.lineartrain(agesonly) print advancedclassify.dpclassify([30, 30], avgs) print advancedclassify.dpclassify([30, 25], avgs) print advancedclassify.dpclassify([25, 40], avgs) print advancedclassify.dpclassify([48, 20], avgs) """ Determing distances using Yahoo! maps """ print advancedclassify.milesdistance("cambride, ma", "new york, ny") """ Creating the new dataset """ numericalset = advancedclassify.loadnumerical() print numericalset[0].data """ Scaling the dataset. """ scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) print numericalset[0].data print numericalset[0].match print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs) print numericalset[11].match print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs) """ The kernel trick """ offset = advancedclassify.getoffset(agesonly) print offset print advancedclassify.nlclassify([30, 30], agesonly, offset) print advancedclassify.nlclassify([30, 25], agesonly, offset) print advancedclassify.nlclassify([25, 40], agesonly, offset) # In contrast to linear classification now recognises that # 48, 20 is not a good match print advancedclassify.nlclassify([48, 20], agesonly, offset) ssoffset = advancedclassify.getoffset(scaledset) # 0 print numericalset[0].match # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # 1 print numericalset[1].match # 1 print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset) # 0 print numericalset[2].match # 0 # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset) # man doesnt want children, women does, otherwise really gd match newrow = [28.0, -1, -1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # both want children newrow = [28.0, -1, 1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)
# advancedclassifytest.py import advancedclassify agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True) matchmaker = advancedclassify.loadmatch('matchmaker.csv') numericalset = advancedclassify.loadnumerical() scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) ssoffset = advancedclassify.getoffset(scaledset) print('classify numericalset[0] : ') print(numericalset[0].match) print(advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)) print('------------------------------') print('classify numericalset[1] : ') print(numericalset[1].match) print(advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset)) print('------------------------------') print('classify numericalset[2] : ') print(numericalset[2].match) print(advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset)) print('------------------------------') print('classify newrow0 : ') newrow0 = [28.0, -1, -1, 26.0, -1, 1, 2, 0.8]