def nfoldcrossvalidation(features, labels, **kwargs): ''' jug_task = nfoldcrossvalidation(features, labels, **kwargs) A jug Task that perform n-foldcrossvalidation N-fold cross validation is inherently parallel. This function returns a ``jug.Task`` which performs n-fold crossvalidation which jug can parallelise. Parameters ---------- features : sequence of features labels : sequence kwargs : any This will be passed down to ``milk.nfoldcrossvalidation`` Returns ------- jug_task : a jug.Task A Task object See Also -------- milk.nfoldcrossvalidation : The same functionality as a "normal" function jug.CompoundTask : This function can be used as argument to CompoundTask ''' nfolds = kwargs.get('nfolds', 10) features,labels = list(map(identity, (features,labels))) kwargs = dict( (k,identity(v)) for k,v in kwargs.items()) nfold_one = TaskGenerator(milk.nfoldcrossvalidation) mapped = [nfold_one(features, labels, folds=[i], **kwargs) for i in range(nfolds)] return jug_reduce(_nfold_reduce, mapped)
def _select_best(features, results, method): features = identity(features) return mapreduce(_select_min, _evaluate_solution, [(features, r, method) for r in results], reduce_step=32, map_step=8)
def nfoldcrossvalidation(features, labels, **kwargs): ''' jug_task = nfoldcrossvalidation(features, labels, **kwargs) A jug Task that perform n-foldcrossvalidation N-fold cross validation is inherently parallel. This function returns a ``jug.Task`` which performs n-fold crossvalidation which jug can parallelise. Parameters ---------- features : sequence of features labels : sequence kwargs : any This will be passed down to ``milk.nfoldcrossvalidation`` Returns ------- jug_task : a jug.Task A Task object See Also -------- milk.nfoldcrossvalidation : The same functionality as a "normal" function jug.CompoundTask : This function can be used as argument to CompoundTask ''' nfolds = kwargs.get('nfolds', 10) features,labels = map(identity, (features,labels)) kwargs = {k:identity(v) for k,v in kwargs.iteritems()} nfold_one = TaskGenerator(milk.nfoldcrossvalidation) mapped = [nfold_one(features, labels, folds=[i], **kwargs) for i in xrange(nfolds)] return jug_reduce(_nfold_reduce, mapped)
def nfoldcrossvalidation(features, labels, **kwargs): ''' jug_task = nfoldcrossvalidation(features, labels, **kwargs) A jug Task that perform n-foldcrossvalidation N-fold cross validation is inherently parallel. This function returns a ``jug.Task`` which performs n-fold crossvalidation which jug can parallelise. Parameters ---------- features : sequence of features labels : sequence kwargs : any This will be passed down to ``milk.nfoldcrossvalidation`` Returns ------- jug_task : a jug.Task A Task object See Also -------- milk.nfoldcrossvalidation : The same functionality as a "normal" function jug.CompoundTask : This function can be used as argument to CompoundTask ''' mapper = identity(_nfold_one(features, labels, kwargs)) nfolds = kwargs.get('nfolds', 10) return mapreduce(_nfold_reduce, mapper, range(nfolds), map_step=1, reduce_step=(nfolds+1))
def _select_best(features, results, method): features = identity(features) return mapreduce(_select_min, _evaluate_solution, [(features,r,method) for r in results], reduce_step=32, map_step=8)
def test_utils_identity(): identity(2).run() == 2
def test_utils_identity(): t = identity(2) t.run() assert t.value() == 2
compares = [] rt_compares = [] sizes = {} for name,directory,has_dna,base,use_origins in datasets: images = CachedFunction(load_directory,'../data/'+directory) sizes[name] = len(images) learner = precluster_learner_plus_features(kfrac=4) origins = None if use_origins: origins = [im.origin for im in images] labels = [im.label for im in images] surfs = ['surf'] if has_dna: surfs.append('surf-ref') four = {} labels = identity(labels) for s in surfs: for use_base in [None, base]: features = computeallfeatures(images, s, use_base) features = identity(features) cmatrix = milk.ext.jugparallel.nfoldcrossvalidation(features, labels, origins=origins, learner=copy(learner)) four[s,use_base is not None] = cmatrix save_cmatrix('%s-%s-%s.txt' % (name,s,use_base), cmatrix) if s == surfs[-1] and ('no-origins' not in name): cmats = [] for k in xrange(32,385): n = 1 + (name in run_twice) for ri in xrange(n): centroids = features1centroids(features, k, ri) nfeatures = project(features, centroids)
from jug import barrier, value, TaskGenerator from jug.utils import identity from jug.compound import CompoundTaskGenerator @TaskGenerator def double(x): return 2 * x @CompoundTaskGenerator def twice(x): return (double(x), double(x)) @TaskGenerator def tadd(y): return y[0] + y[1] eight = twice(4) barrier() eight = identity(eight) barrier() sixteen = tadd(eight)
from jug import barrier, value, TaskGenerator from jug.utils import identity from jug.compound import CompoundTaskGenerator @TaskGenerator def double(x): return 2*x @CompoundTaskGenerator def twice(x): return (double(x), double(x)) @TaskGenerator def tadd(y): return y[0] + y[1] eight = twice(4) barrier() eight = identity(eight) barrier() sixteen = tadd(eight)