MINDIFF, PERSISTENCE) print 'Using spambase data folded {} ways. Testing on fold {}.'.format( opt['folds'], opt['testfold']) # stopping conditions if opt['rounds'] is None: def doround(err, rndct): return checkconverged(err) else: def doround(err, rndct): return countdown(rndct) # load from file spambase.load() # roll into folds folds = [[] for i in xrange(opt['folds'])] k = 0 # kurrent fold for dp in spambase.data: # change the 0,1 labels to -1,1 dp.label = 1 if dp.label else -1 # add to the current fold & switch to the next fold folds[k].append(dp) k = (k + 1) % opt['folds'] # unroll to testing & training testing = folds.pop(opt['testfold']) training = reduce(lambda acc, cur: acc + cur, folds) del folds
def main(): spambase.load('spambase.data') sc = StumpCollection(spambase.trainset)
auc = resultset.auc(roc) with open('{}-{}_lambda={}_auc={}'.format(regression.__name__, gdname, suffix, auc).lower(), mode='wb') as fd: for fpr, tpr in roc: fd.write('{}, {}\n'.format(fpr, tpr)) ############################################################################### if __name__ == '__main__': print # load from file spambase.load() d = spambase.data # zscore feature values def pre(lon): mu = stats.mean(lon) sd = stats.stddev(stats.mvue(lon, mu)) return stats.zscore(lon, mu, sd) dataset.applykernel(d, pre) # insert phantom features for dp in d: dp.features = [1.0] + list(dp.features) # roll into folds folds = [[] for i in xrange(FOLDCOUNT)]