def mvac_job_rdb_instance(context, job_id, volumes, rdb_vol_name, rdb_db, cwd): import multyvac layer = get_compmake_config('multyvac_layer') if not layer: layer = None all_volumes = volumes + [rdb_vol_name] command, _, _ = get_job_args(job_id, db=context.get_compmake_db()) misc = dict(deps=[command]) #print('Instancing (volumes: %r, layer=%r)' % (all_volumes, layer)) core = get_compmake_config('multyvac_core') multyvac_job_id = multyvac.submit(mvac_job_rdb_worker, job_id=job_id, rdb_basepath=rdb_db.basepath, misc=misc, cwd=cwd, _core=core, _name=job_id, _layer=layer, _vol=all_volumes) #print('Getting job %r' % multyvac_job_id) multyvac_job = multyvac.get(multyvac_job_id) #print('Got job') return multyvac_job
def runbench(bench): jid = multyvac.shell_submit( "/bin/bash {}/run.sh {}".format(scripts_vol.mount_path, bench), _name='bench1', _core='f2', _layer=args.layer, _vol=[args.scripts_volume, args.results_volume]) return multyvac.get(jid)
def clean_cloud_out_dir(d): d = os.path.realpath(d) vol, _, _ = get_volume_for_dir(d) import multyvac multyvac_job_id = multyvac.submit(clean_cloud_out_dir_job, d, _vol=[vol.name], _name='Cleaning directory %r' % d) multyvac_job = multyvac.get(multyvac_job_id) multyvac_job.get_result()
def run(self, r, niters=10000): """Run each runner for `niters`, using the backend supplied in the constructor for parallelism. Parameters ---------- r : rng niters : int """ validator.validate_type(r, rng, param_name='r') validator.validate_positive(niters, param_name='niters') if self._backend == 'multiprocessing': pool = mp.Pool(processes=self._processes) args = [(runner, niters, r.next(), None) for runner in self._runners] # map_async() + get() allows us to workaround a bug where # control-C doesn't kill multiprocessing workers self._runners = pool.map_async(_mp_work, args).get(10000000) pool.close() pool.join() elif self._backend == 'multyvac': # XXX(stephentu): the only parallelism strategy thus far is every # runner gets a dedicated core (multicore=1) on a machine jids = [] has_volume = bool(self._volume) zipped = zip(self._runners, self._digests) expensive_states = [] for i, (runner, digest) in enumerate(zipped): if has_volume: statearg = (self._volume, 'state-{}'.format(digest)) expensive_states.append(runner.expensive_state) runner.expensive_state = None else: statearg = None args = (runner, niters, r.next(), statearg) jids.append( multyvac.submit( _mp_work, args, _ignore_module_dependencies=True, _layer=self._layer, _vol=self._volume, _env=dict(self._env), # submit() mutates the env _core=self._core, _name='kernels-parallel-runner-{}'.format(i))) self._runners = [multyvac.get(jid).get_result() for jid in jids] if not expensive_states: return for runner, state in zip(self._runners, expensive_states): runner.expensive_state = state else: assert False, 'should not be reached'
def delete_db_volume(db): vol = create_db_volume(db) entries = vol.ls('.') if not entries: return entries = [os.path.join(vol.mount_path, x['path']) for x in entries] import multyvac multyvac_job_id = multyvac.submit(delete_entries, entries, _vol=[vol.name], _name='Reset Compmake DB') multyvac_job = multyvac.get(multyvac_job_id) multyvac_job.get_result()
def mvac_instance(db, job_id, volumes, cwd): import multyvac layer = get_compmake_config('multyvac_layer') if not layer: layer = None command, args, kwargs = get_cmd_args_kwargs(job_id=job_id, db=db) core = get_compmake_config('multyvac_core') multyvac_job_id = multyvac.submit(command, *args, _layer=layer, _vol=volumes, _name=job_id, _core=core, **kwargs) multyvac_job = multyvac.get(multyvac_job_id) return multyvac_job
def get_results(exp_wait, exp_results): d = pickle.load(open(exp_wait, 'r')) chains = [] # reorg on a per-seed basis for jid in d['jids']: job = multyvac.get(jid) print "getting", jid chain_data = job.get_result() chains.append({ 'scores': chain_data[0], 'state': chain_data[1], 'times': chain_data[2], 'latents': chain_data[3] }) pickle.dump({'chains': chains, 'exp': d}, open(exp_results, 'w'))
def get_results(exp_wait, exp_results): d = pickle.load(open(exp_wait, 'r')) chains = [] # reorg on a per-seed basis for jid in d['jids']: job = multyvac.get(jid) print "getting", jid chain_data = job.get_result() chains.append({'scores' : chain_data[0], 'state' : chain_data[1], 'times' : chain_data[2], 'latents' : chain_data[3]}) pickle.dump({'chains' : chains, 'exp' : d}, open(exp_results, 'w'))
import os import multyvac import requests # rackspace configuration multyvac.config.api_key = "cloudpipe18" # Rackspace Username multyvac.config.api_secret_key = "683df297726d4bb18c6c230e5be795fe" # Rackspace API Key multyvac.config.api_url = "https://cloudpipe.tmpnb.org/v1" # helpers def status(url): return requests.get(url).status_code def server_header(url): return requests.get(url).headers.get('server') # execution jid = multyvac.submit(status, "https://developer.rackspace.com") print(multyvac.get(jid).get_result()) # notes about execution # multyvac.submit(some_lambda_f, _image="some_docker_image_in_which_launch_the_job")
def synchronize_db_up(context, targets): """ Syncrhonizes the DB up """ db = context.get_compmake_db() # first create the volume if it doesn't exist vol = create_db_volume(db) # now we need to put all files keys = [] cq = CacheQueryDB(db) jobs = set() jobs.update(targets) jobs.update(cq.tree(targets)) #print('%d targets, %d jobs' % (len(targets), len(jobs))) # XXX: not all jobs for job_id in jobs: resources = [job2jobargskey, job2userobjectkey, job2cachekey, job2key] for r in resources: key = r(job_id) if key in db: keys.append(key) #print('Found %s files to upload' % len(keys)) # Shadow storage db2 = StorageFilesystem(basepath=vol.mount_path) already = set([os.path.basename(x['path']) for x in vol.ls('.')]) filename2contents = {} #print('obtained: %r' % already) for key in keys: f = db.filename_for_key(key) f2 = db2.filename_for_key(key) local_path = f remote_path = os.path.relpath(f2, db2.basepath) if remote_path in already: #print('skipping %r' % local_path) continue size = os.stat(local_path).st_size use_compact = size < 6*1024 if use_compact: with open(local_path) as f: filename2contents[f2] = f.read() else: #print('%s -> %s' % (local_path, remote_path)) assert os.path.join(db2.basepath, remote_path) == f2 vol.put_file(local_path, remote_path, target_mode=None) import multyvac multyvac_job_id = multyvac.submit(copy_files, filename2contents, _vol=[vol.name]) multyvac_job = multyvac.get(multyvac_job_id) multyvac_job.get_result() return vol, db2
#!/usr/bin/env python # CLOUDPIPE_URL=http://`echo $DOCKER_HOST | cut -d ":" -f2 | tr -d "/"`:8000/v1 python2 script/sample/submitpython.py from __future__ import print_function import multyvac import os # Grab from the CLOUDPIPE_URL environment variable, otherwise assume they have # /etc/hosts configured to point to their docker api_url = os.environ.get('CLOUDPIPE_URL', 'http://docker:8000/v1') multyvac.config.set_key(api_key='admin', api_secret_key='12345', api_url=api_url) def add(a, b): return a + b jid = multyvac.submit(add, 3, 4) result = multyvac.get(jid).get_result() print("added {} and {} to get {}... in the cloud!".format(3,4,result))
"cmd": 'echo "success" > /tmp/out', "_result_source": "file:/tmp/out", }, "stdin": { "cmd": 'cat', "_stdin": "success", }, } longest = 0 for name in jobs.keys(): if len(name) > longest: longest = len(name) success = 0 failure = 0 for (name, kwargs) in jobs.items(): jid = multyvac.shell_submit(**kwargs) print("{:<{}}: job {} ...".format(name, longest, jid), end='') result = multyvac.get(jid).get_result().strip('\n') print(" result [{}]".format(result)) if result == "success": success += 1 else: failure += 1 print("{} pass / {} fail".format(success, failure)) if failure > 0: sys.exit(1)
#!/usr/bin/env python from __future__ import print_function import multyvac multyvac.config.set_key(api_key='admin', api_secret_key='12345', api_url='http://docker:8000/v1') def add(a, b): return a + b jid = multyvac.submit(add, 3, 4) result = multyvac.get(jid).get_result() print("result = {}".format(result))
#!/usr/bin/env python # CLOUDPIPE_URL=http://`echo $DOCKER_HOST | cut -d ":" -f2 | tr -d "/"`:8000/v1 python2 script/sample/submitpython.py from __future__ import print_function import multyvac import os # Grab from the CLOUDPIPE_URL environment variable, otherwise assume they have # /etc/hosts configured to point to their docker api_url = os.environ.get('CLOUDPIPE_URL', 'http://docker:8000/v1') multyvac.config.set_key(api_key='admin', api_secret_key='12345', api_url=api_url) def add(a, b): return a + b jid = multyvac.submit(add, 3, 4, _layer="ubuntu:14.04") job = multyvac.get(jid) job.wait() job = multyvac.get(jid) print("Job's stderr:\n\t{}".format(job.stderr)) # Says python is unavailable
#!/usr/bin/env python from __future__ import print_function import multyvac import time import sys multyvac.config.set_key(api_key='admin', api_secret_key='12345', api_url='http://docker:8000/v1') def longtime(seconds): print("Getting started") sys.stdout.flush() for i in xrange(0, seconds): time.sleep(1) print("{} seconds".format(i)) sys.stdout.flush() jid = multyvac.submit(longtime, 30) time.sleep(5) multyvac.kill(jid) time.sleep(1) j = multyvac.get(jid) print("job = {}, status = {}".format(j, j.status))
#!/usr/bin/env python # CLOUDPIPE_URL=http://`echo $DOCKER_HOST | cut -d ":" -f2 | tr -d "/"`:8000/v1 python2 script/sample/submitpython.py from __future__ import print_function import multyvac import os # Grab from the CLOUDPIPE_URL environment variable, otherwise assume they have # /etc/hosts configured to point to their docker api_url = os.environ.get("CLOUDPIPE_URL", "http://docker:8000/v1") multyvac.config.set_key(api_key="admin", api_secret_key="12345", api_url=api_url) def add(a, b): return a + b jid = multyvac.submit(add, 3, 4, _layer="ubuntu:14.04") job = multyvac.get(jid) job.wait() job = multyvac.get(jid) print("Job's stderr:\n\t{}".format(job.stderr)) # Says python is unavailable
def main(): gdal.AllRegister() path = auxil.select_directory('Choose input directory') if path: os.chdir(path) # input image infile = auxil.select_infile(title='Choose image file') if infile: inDataset = gdal.Open(infile,GA_ReadOnly) cols = inDataset.RasterXSize rows = inDataset.RasterYSize bands = inDataset.RasterCount projection = inDataset.GetProjection() geotransform = inDataset.GetGeoTransform() if geotransform is not None: gt = list(geotransform) else: print 'No geotransform available' return imsr = osr.SpatialReference() imsr.ImportFromWkt(projection) else: return pos = auxil.select_pos(bands) if not pos: return N = len(pos) rasterBands = [] for b in pos: rasterBands.append(inDataset.GetRasterBand(b)) # training data (shapefile) trnfile = auxil.select_infile(filt='.shp',title='Choose train shapefile') if trnfile: trnDriver = ogr.GetDriverByName('ESRI Shapefile') trnDatasource = trnDriver.Open(trnfile,0) trnLayer = trnDatasource.GetLayer() trnsr = trnLayer.GetSpatialRef() else: return # hidden neurons L = auxil.select_integer(8,'number of hidden neurons') if not L: return # outfile outfile, fmt = auxil.select_outfilefmt() if not outfile: return # coordinate transformation from training to image projection ct= osr.CoordinateTransformation(trnsr,imsr) # number of classes feature = trnLayer.GetNextFeature() while feature: classid = feature.GetField('CLASS_ID') feature = trnLayer.GetNextFeature() trnLayer.ResetReading() K = int(classid)+1 print '=========================' print ' ffncg' print '=========================' print time.asctime() print 'image: '+infile print 'training: '+trnfile # loop through the polygons Gs = [] # train observations ls = [] # class labels print 'reading training data...' for i in range(trnLayer.GetFeatureCount()): feature = trnLayer.GetFeature(i) classid = feature.GetField('CLASS_ID') l = [0 for i in range(K)] l[int(classid)] = 1.0 polygon = feature.GetGeometryRef() # transform to same projection as image polygon.Transform(ct) # convert to a Shapely object poly = shapely.wkt.loads(polygon.ExportToWkt()) # transform the boundary to pixel coords in numpy bdry = np.array(poly.boundary) bdry[:,0] = bdry[:,0]-gt[0] bdry[:,1] = bdry[:,1]-gt[3] GT = np.mat([[gt[1],gt[2]],[gt[4],gt[5]]]) bdry = bdry*np.linalg.inv(GT) # polygon in pixel coords polygon1 = asPolygon(bdry) # raster over the bounding rectangle minx,miny,maxx,maxy = map(int,list(polygon1.bounds)) pts = [] for i in range(minx,maxx+1): for j in range(miny,maxy+1): pts.append((i,j)) multipt = MultiPoint(pts) # intersection as list intersection = np.array(multipt.intersection(polygon1),dtype=np.int).tolist() # cut out the bounded image cube cube = np.zeros((maxy-miny+1,maxx-minx+1,len(rasterBands))) k=0 for band in rasterBands: cube[:,:,k] = band.ReadAsArray(minx,miny,maxx-minx+1,maxy-miny+1) k += 1 # get the training vectors for (x,y) in intersection: Gs.append(cube[y-miny,x-minx,:]) ls.append(l) polygon = None polygon1 = None feature.Destroy() trnDatasource.Destroy() m = len(ls) print str(m) + ' training pixel vectors were read in' Gs = np.array(Gs) ls = np.array(ls) # stretch the pixel vectors to [-1,1] maxx = np.max(Gs,0) minx = np.min(Gs,0) for j in range(N): Gs[:,j] = 2*(Gs[:,j]-minx[j])/(maxx[j]-minx[j]) - 1.0 # random permutation of training data idx = np.random.permutation(m) Gs = Gs[idx,:] ls = ls[idx,:] # setup output dataset driver = gdal.GetDriverByName(fmt) outDataset = driver.Create(outfile,cols,rows,1,GDT_Byte) projection = inDataset.GetProjection() geotransform = inDataset.GetGeoTransform() if geotransform is not None: outDataset.SetGeoTransform(tuple(gt)) if projection is not None: outDataset.SetProjection(projection) outBand = outDataset.GetRasterBand(1) # train on 9/10 training examples Gstrn = Gs[0:9*m//10,:] lstrn = ls[0:9*m//10,:] affn = Ffncg(Gstrn,lstrn,L) print 'training on %i pixel vectors...' % np.shape(Gstrn)[0] start = time.time() cost = affn.train(epochs=epochs) print 'elapsed time %s' %str(time.time()-start) if cost is not None: # cost = np.log10(cost) ymax = np.max(cost) ymin = np.min(cost) xmax = len(cost) plt.plot(range(xmax),cost,'k') plt.axis([0,xmax,ymin-1,ymax]) plt.title('Cross entropy') plt.xlabel('Epoch') # classify the image print 'classifying...' tile = np.zeros((cols,N)) for row in range(rows): for j in range(N): tile[:,j] = rasterBands[j].ReadAsArray(0,row,cols,1) tile[:,j] = 2*(tile[:,j]-minx[j])/(maxx[j]-minx[j]) - 1.0 cls, _ = affn.classify(tile) outBand.WriteArray(np.reshape(cls,(1,cols)),0,row) outBand.FlushCache() outDataset = None inDataset = None print 'thematic map written to: ' + outfile print 'please close the cross entropy plot to continue' plt.show() else: print 'an error occured' return print 'submitting cross-validation to multyvac' start = time.time() jid = mv.submit(traintst,Gs,ls,L,_layer='ms_image_analysis') print 'submission time: %s' %str(time.time()-start) start = time.time() job = mv.get(jid) result = job.get_result(job) print 'execution time: %s' %str(time.time()-start) print 'misclassification rate: %f' %np.mean(result) print 'standard deviation: %f' %np.std(result) print '--------done---------------------'