Пример #1
0
def mvac_job_rdb_instance(context, job_id, volumes, rdb_vol_name, rdb_db, cwd):
    import multyvac    
    layer = get_compmake_config('multyvac_layer')
    if not layer:
        layer = None
    all_volumes = volumes + [rdb_vol_name]
    
    command, _, _ = get_job_args(job_id, db=context.get_compmake_db())
    misc = dict(deps=[command])
    
    #print('Instancing (volumes: %r, layer=%r)' % (all_volumes, layer))
    core = get_compmake_config('multyvac_core')
    multyvac_job_id = multyvac.submit(mvac_job_rdb_worker,
                                      job_id=job_id,
                                      rdb_basepath=rdb_db.basepath,
                                      misc=misc,  
                                      cwd=cwd,
                                      _core=core,
                                      _name=job_id,
                                      _layer=layer,
                                      _vol=all_volumes)
    #print('Getting job %r' % multyvac_job_id)
    multyvac_job = multyvac.get(multyvac_job_id)
    #print('Got job')
    return multyvac_job
 def runbench(bench):
     jid = multyvac.shell_submit(
         "/bin/bash {}/run.sh {}".format(scripts_vol.mount_path, bench),
         _name='bench1',
         _core='f2',
         _layer=args.layer,
         _vol=[args.scripts_volume, args.results_volume])
     return multyvac.get(jid)
Пример #3
0
 def runbench(bench):
     jid = multyvac.shell_submit(
         "/bin/bash {}/run.sh {}".format(scripts_vol.mount_path, bench),
         _name='bench1',
         _core='f2',
         _layer=args.layer,
         _vol=[args.scripts_volume, args.results_volume])
     return multyvac.get(jid)
Пример #4
0
def clean_cloud_out_dir(d):
    d = os.path.realpath(d)
    vol, _, _ = get_volume_for_dir(d)
    import multyvac
    multyvac_job_id = multyvac.submit(clean_cloud_out_dir_job, 
                                      d, 
                                      _vol=[vol.name],
                                      _name='Cleaning directory %r' % d)
    multyvac_job = multyvac.get(multyvac_job_id)
    multyvac_job.get_result()
Пример #5
0
def clean_cloud_out_dir(d):
    d = os.path.realpath(d)
    vol, _, _ = get_volume_for_dir(d)
    import multyvac
    multyvac_job_id = multyvac.submit(clean_cloud_out_dir_job,
                                      d,
                                      _vol=[vol.name],
                                      _name='Cleaning directory %r' % d)
    multyvac_job = multyvac.get(multyvac_job_id)
    multyvac_job.get_result()
Пример #6
0
    def run(self, r, niters=10000):
        """Run each runner for `niters`, using the backend supplied in the
        constructor for parallelism.

        Parameters
        ----------
        r : rng
        niters : int

        """
        validator.validate_type(r, rng, param_name='r')
        validator.validate_positive(niters, param_name='niters')
        if self._backend == 'multiprocessing':
            pool = mp.Pool(processes=self._processes)
            args = [(runner, niters, r.next(), None)
                    for runner in self._runners]
            # map_async() + get() allows us to workaround a bug where
            # control-C doesn't kill multiprocessing workers
            self._runners = pool.map_async(_mp_work, args).get(10000000)
            pool.close()
            pool.join()
        elif self._backend == 'multyvac':

            # XXX(stephentu): the only parallelism strategy thus far is every
            # runner gets a dedicated core (multicore=1) on a machine
            jids = []
            has_volume = bool(self._volume)
            zipped = zip(self._runners, self._digests)
            expensive_states = []
            for i, (runner, digest) in enumerate(zipped):
                if has_volume:
                    statearg = (self._volume, 'state-{}'.format(digest))
                    expensive_states.append(runner.expensive_state)
                    runner.expensive_state = None
                else:
                    statearg = None
                args = (runner, niters, r.next(), statearg)
                jids.append(
                    multyvac.submit(
                        _mp_work,
                        args,
                        _ignore_module_dependencies=True,
                        _layer=self._layer,
                        _vol=self._volume,
                        _env=dict(self._env),  # submit() mutates the env
                        _core=self._core,
                        _name='kernels-parallel-runner-{}'.format(i)))
            self._runners = [multyvac.get(jid).get_result() for jid in jids]
            if not expensive_states:
                return
            for runner, state in zip(self._runners, expensive_states):
                runner.expensive_state = state
        else:
            assert False, 'should not be reached'
Пример #7
0
    def run(self, r, niters=10000):
        """Run each runner for `niters`, using the backend supplied in the
        constructor for parallelism.

        Parameters
        ----------
        r : rng
        niters : int

        """
        validator.validate_type(r, rng, param_name='r')
        validator.validate_positive(niters, param_name='niters')
        if self._backend == 'multiprocessing':
            pool = mp.Pool(processes=self._processes)
            args = [(runner, niters, r.next(), None)
                    for runner in self._runners]
            # map_async() + get() allows us to workaround a bug where
            # control-C doesn't kill multiprocessing workers
            self._runners = pool.map_async(_mp_work, args).get(10000000)
            pool.close()
            pool.join()
        elif self._backend == 'multyvac':

            # XXX(stephentu): the only parallelism strategy thus far is every
            # runner gets a dedicated core (multicore=1) on a machine
            jids = []
            has_volume = bool(self._volume)
            zipped = zip(self._runners, self._digests)
            expensive_states = []
            for i, (runner, digest) in enumerate(zipped):
                if has_volume:
                    statearg = (self._volume, 'state-{}'.format(digest))
                    expensive_states.append(runner.expensive_state)
                    runner.expensive_state = None
                else:
                    statearg = None
                args = (runner, niters, r.next(), statearg)
                jids.append(
                    multyvac.submit(
                        _mp_work,
                        args,
                        _ignore_module_dependencies=True,
                        _layer=self._layer,
                        _vol=self._volume,
                        _env=dict(self._env),  # submit() mutates the env
                        _core=self._core,
                        _name='kernels-parallel-runner-{}'.format(i)))
            self._runners = [multyvac.get(jid).get_result() for jid in jids]
            if not expensive_states:
                return
            for runner, state in zip(self._runners, expensive_states):
                runner.expensive_state = state
        else:
            assert False, 'should not be reached'
Пример #8
0
def delete_db_volume(db):
    vol = create_db_volume(db)
    entries = vol.ls('.')
    if not entries:
        return
    
    entries = [os.path.join(vol.mount_path, x['path']) for x in entries]
    
    import multyvac
    multyvac_job_id = multyvac.submit(delete_entries, entries, 
                                      _vol=[vol.name],
                                      _name='Reset Compmake DB')
    multyvac_job = multyvac.get(multyvac_job_id)
    multyvac_job.get_result()
Пример #9
0
def mvac_instance(db, job_id, volumes, cwd):
    import multyvac    
    layer = get_compmake_config('multyvac_layer')
    if not layer:
        layer = None

    command, args, kwargs = get_cmd_args_kwargs(job_id=job_id, db=db)

    core = get_compmake_config('multyvac_core')
    multyvac_job_id = multyvac.submit(command, *args, 
                                      _layer=layer,
                                      _vol=volumes,
                                      _name=job_id,
                                      _core=core,
                                       **kwargs)
    multyvac_job = multyvac.get(multyvac_job_id)
    return multyvac_job
Пример #10
0
def mvac_instance(db, job_id, volumes, cwd):
    import multyvac    
    layer = get_compmake_config('multyvac_layer')
    if not layer:
        layer = None

    command, args, kwargs = get_cmd_args_kwargs(job_id=job_id, db=db)

    core = get_compmake_config('multyvac_core')
    multyvac_job_id = multyvac.submit(command, *args, 
                                      _layer=layer,
                                      _vol=volumes,
                                      _name=job_id,
                                      _core=core,
                                       **kwargs)
    multyvac_job = multyvac.get(multyvac_job_id)
    return multyvac_job
Пример #11
0
def get_results(exp_wait, exp_results):
    d = pickle.load(open(exp_wait, 'r'))

    chains = []
    # reorg on a per-seed basis
    for jid in d['jids']:
        job = multyvac.get(jid)
        print "getting", jid
        chain_data = job.get_result()

        chains.append({
            'scores': chain_data[0],
            'state': chain_data[1],
            'times': chain_data[2],
            'latents': chain_data[3]
        })

    pickle.dump({'chains': chains, 'exp': d}, open(exp_results, 'w'))
Пример #12
0
def get_results(exp_wait, exp_results):
    d = pickle.load(open(exp_wait, 'r'))
    
    chains = []
    # reorg on a per-seed basis
    for jid in d['jids']:
        job = multyvac.get(jid)
        print "getting", jid
        chain_data = job.get_result()
        
        chains.append({'scores' : chain_data[0], 
                       'state' : chain_data[1], 
                       'times' : chain_data[2], 
                       'latents' : chain_data[3]})
        
    pickle.dump({'chains' : chains, 
                 'exp' : d}, 
                open(exp_results, 'w'))
Пример #13
0
import os
import multyvac
import requests


# rackspace configuration
multyvac.config.api_key = "cloudpipe18" # Rackspace Username
multyvac.config.api_secret_key = "683df297726d4bb18c6c230e5be795fe" # Rackspace API Key
multyvac.config.api_url = "https://cloudpipe.tmpnb.org/v1"


# helpers
def status(url):
    return requests.get(url).status_code

def server_header(url):
    return requests.get(url).headers.get('server')


# execution
jid = multyvac.submit(status, "https://developer.rackspace.com")
print(multyvac.get(jid).get_result())

# notes about execution
# multyvac.submit(some_lambda_f, _image="some_docker_image_in_which_launch_the_job")
Пример #14
0
def synchronize_db_up(context, targets):
    """ Syncrhonizes the DB up """
    db = context.get_compmake_db()
    # first create the volume if it doesn't exist
    vol = create_db_volume(db)
    
    # now we need to put all files
    
    keys = []
    
    cq = CacheQueryDB(db)
    jobs = set()
    jobs.update(targets)
    jobs.update(cq.tree(targets))
    
    #print('%d targets, %d jobs' % (len(targets), len(jobs)))
     
    # XXX: not all jobs
    for job_id in jobs:
        resources = [job2jobargskey, job2userobjectkey, 
                     job2cachekey, job2key]
        for r in resources:
            key = r(job_id)
            if key in db:
                keys.append(key)
                
    #print('Found %s files to upload' % len(keys))
    
    # Shadow storage
    db2 = StorageFilesystem(basepath=vol.mount_path)
    already = set([os.path.basename(x['path']) for x in vol.ls('.')])
    
    filename2contents = {}
    #print('obtained: %r' % already)
    for key in keys:
        f = db.filename_for_key(key)
        f2 = db2.filename_for_key(key)
        local_path = f
        remote_path = os.path.relpath(f2, db2.basepath)
        
        if remote_path in already:
            #print('skipping %r' % local_path)
            continue
        
        size = os.stat(local_path).st_size
        use_compact = size < 6*1024
        if use_compact:
            with open(local_path) as f:
                filename2contents[f2] = f.read()
        else:
            #print('%s -> %s' % (local_path, remote_path))
            assert os.path.join(db2.basepath, remote_path) == f2
            vol.put_file(local_path, remote_path, target_mode=None)
    
    import multyvac
    multyvac_job_id = multyvac.submit(copy_files, filename2contents, 
                                      _vol=[vol.name])
    multyvac_job = multyvac.get(multyvac_job_id)
    multyvac_job.get_result()
    
    return vol, db2
Пример #15
0
import os
import multyvac
import requests

# rackspace configuration
multyvac.config.api_key = "cloudpipe18"  # Rackspace Username
multyvac.config.api_secret_key = "683df297726d4bb18c6c230e5be795fe"  # Rackspace API Key
multyvac.config.api_url = "https://cloudpipe.tmpnb.org/v1"


# helpers
def status(url):
    return requests.get(url).status_code


def server_header(url):
    return requests.get(url).headers.get('server')


# execution
jid = multyvac.submit(status, "https://developer.rackspace.com")
print(multyvac.get(jid).get_result())

# notes about execution
# multyvac.submit(some_lambda_f, _image="some_docker_image_in_which_launch_the_job")
Пример #16
0
#!/usr/bin/env python

# CLOUDPIPE_URL=http://`echo $DOCKER_HOST | cut -d ":" -f2 | tr -d "/"`:8000/v1 python2 script/sample/submitpython.py

from __future__ import print_function

import multyvac

import os
# Grab from the CLOUDPIPE_URL environment variable, otherwise assume they have
# /etc/hosts configured to point to their docker
api_url = os.environ.get('CLOUDPIPE_URL', 'http://docker:8000/v1')

multyvac.config.set_key(api_key='admin', api_secret_key='12345', api_url=api_url)

def add(a, b):
    return a + b

jid = multyvac.submit(add, 3, 4)
result = multyvac.get(jid).get_result()
print("added {} and {} to get {}... in the cloud!".format(3,4,result))
Пример #17
0
        "cmd": 'echo "success" > /tmp/out',
        "_result_source": "file:/tmp/out",
    },
    "stdin": {
        "cmd": 'cat',
        "_stdin": "success",
    },
}

longest = 0
for name in jobs.keys():
    if len(name) > longest:
        longest = len(name)

success = 0
failure = 0

for (name, kwargs) in jobs.items():
    jid = multyvac.shell_submit(**kwargs)
    print("{:<{}}: job {} ...".format(name, longest, jid), end='')
    result = multyvac.get(jid).get_result().strip('\n')
    print(" result [{}]".format(result))
    if result == "success":
        success += 1
    else:
        failure += 1

print("{} pass / {} fail".format(success, failure))
if failure > 0:
    sys.exit(1)
Пример #18
0
#!/usr/bin/env python

from __future__ import print_function

import multyvac

multyvac.config.set_key(api_key='admin',
                        api_secret_key='12345',
                        api_url='http://docker:8000/v1')


def add(a, b):
    return a + b


jid = multyvac.submit(add, 3, 4)
result = multyvac.get(jid).get_result()
print("result = {}".format(result))
Пример #19
0
#!/usr/bin/env python

# CLOUDPIPE_URL=http://`echo $DOCKER_HOST | cut -d ":" -f2 | tr -d "/"`:8000/v1 python2 script/sample/submitpython.py

from __future__ import print_function

import multyvac

import os
# Grab from the CLOUDPIPE_URL environment variable, otherwise assume they have
# /etc/hosts configured to point to their docker
api_url = os.environ.get('CLOUDPIPE_URL', 'http://docker:8000/v1')

multyvac.config.set_key(api_key='admin', api_secret_key='12345', api_url=api_url)

def add(a, b):
    return a + b

jid = multyvac.submit(add, 3, 4, _layer="ubuntu:14.04")
job = multyvac.get(jid)
job.wait()
job = multyvac.get(jid)
print("Job's stderr:\n\t{}".format(job.stderr)) # Says python is unavailable
Пример #20
0
        "cmd": 'echo "success" > /tmp/out',
        "_result_source": "file:/tmp/out",
    },
    "stdin": {
        "cmd": 'cat',
        "_stdin": "success",
    },
}

longest = 0
for name in jobs.keys():
    if len(name) > longest:
        longest = len(name)

success = 0
failure = 0

for (name, kwargs) in jobs.items():
    jid = multyvac.shell_submit(**kwargs)
    print("{:<{}}: job {} ...".format(name, longest, jid), end='')
    result = multyvac.get(jid).get_result().strip('\n')
    print(" result [{}]".format(result))
    if result == "success":
        success += 1
    else:
        failure += 1

print("{} pass / {} fail".format(success, failure))
if failure > 0:
    sys.exit(1)
Пример #21
0
#!/usr/bin/env python

from __future__ import print_function

import multyvac
import time
import sys

multyvac.config.set_key(api_key='admin', api_secret_key='12345', api_url='http://docker:8000/v1')

def longtime(seconds):
    print("Getting started")
    sys.stdout.flush()
    for i in xrange(0, seconds):
        time.sleep(1)
        print("{} seconds".format(i))
        sys.stdout.flush()

jid = multyvac.submit(longtime, 30)
time.sleep(5)
multyvac.kill(jid)
time.sleep(1)
j = multyvac.get(jid)
print("job = {}, status = {}".format(j, j.status))
Пример #22
0
#!/usr/bin/env python

# CLOUDPIPE_URL=http://`echo $DOCKER_HOST | cut -d ":" -f2 | tr -d "/"`:8000/v1 python2 script/sample/submitpython.py

from __future__ import print_function

import multyvac

import os

# Grab from the CLOUDPIPE_URL environment variable, otherwise assume they have
# /etc/hosts configured to point to their docker
api_url = os.environ.get("CLOUDPIPE_URL", "http://docker:8000/v1")

multyvac.config.set_key(api_key="admin", api_secret_key="12345", api_url=api_url)


def add(a, b):
    return a + b


jid = multyvac.submit(add, 3, 4, _layer="ubuntu:14.04")
job = multyvac.get(jid)
job.wait()
job = multyvac.get(jid)
print("Job's stderr:\n\t{}".format(job.stderr))  # Says python is unavailable
Пример #23
0
def main():  
    gdal.AllRegister()
    path = auxil.select_directory('Choose input directory')
    if path:
        os.chdir(path)        
#  input image    
    infile = auxil.select_infile(title='Choose image file') 
    if infile:                   
        inDataset = gdal.Open(infile,GA_ReadOnly)     
        cols = inDataset.RasterXSize
        rows = inDataset.RasterYSize    
        bands = inDataset.RasterCount
        projection = inDataset.GetProjection()
        geotransform = inDataset.GetGeoTransform()
        if geotransform is not None:
            gt = list(geotransform) 
        else:
            print 'No geotransform available'
            return       
        imsr = osr.SpatialReference()  
        imsr.ImportFromWkt(projection)      
    else:
        return  
    pos =  auxil.select_pos(bands)  
    if not pos:
        return
    N = len(pos) 
    rasterBands = [] 
    for b in pos:
        rasterBands.append(inDataset.GetRasterBand(b)) 
#  training data (shapefile)      
    trnfile = auxil.select_infile(filt='.shp',title='Choose train shapefile')
    if trnfile:
        trnDriver = ogr.GetDriverByName('ESRI Shapefile')
        trnDatasource = trnDriver.Open(trnfile,0)
        trnLayer = trnDatasource.GetLayer() 
        trnsr = trnLayer.GetSpatialRef()             
    else:
        return
#  hidden neurons
    L = auxil.select_integer(8,'number of hidden neurons')    
    if not L:
        return
#  outfile
    outfile, fmt = auxil.select_outfilefmt()   
    if not outfile:
        return     
#  coordinate transformation from training to image projection   
    ct= osr.CoordinateTransformation(trnsr,imsr) 
#  number of classes    
    feature = trnLayer.GetNextFeature() 
    while feature:
        classid = feature.GetField('CLASS_ID')
        feature = trnLayer.GetNextFeature() 
    trnLayer.ResetReading()    
    K = int(classid)+1       
    print '========================='
    print '       ffncg'
    print '========================='
    print time.asctime()    
    print 'image:    '+infile
    print 'training: '+trnfile          
#  loop through the polygons    
    Gs = [] # train observations
    ls = [] # class labels
    print 'reading training data...'
    for i in range(trnLayer.GetFeatureCount()):
        feature = trnLayer.GetFeature(i)
        classid = feature.GetField('CLASS_ID')
        l = [0 for i in range(K)]
        l[int(classid)] = 1.0
        polygon = feature.GetGeometryRef()
#      transform to same projection as image        
        polygon.Transform(ct)  
#      convert to a Shapely object            
        poly = shapely.wkt.loads(polygon.ExportToWkt())
#      transform the boundary to pixel coords in numpy        
        bdry = np.array(poly.boundary) 
        bdry[:,0] = bdry[:,0]-gt[0]
        bdry[:,1] = bdry[:,1]-gt[3]
        GT = np.mat([[gt[1],gt[2]],[gt[4],gt[5]]])
        bdry = bdry*np.linalg.inv(GT) 
#      polygon in pixel coords        
        polygon1 = asPolygon(bdry)
#      raster over the bounding rectangle        
        minx,miny,maxx,maxy = map(int,list(polygon1.bounds))  
        pts = [] 
        for i in range(minx,maxx+1):
            for j in range(miny,maxy+1): 
                pts.append((i,j))             
        multipt =  MultiPoint(pts)   
#      intersection as list              
        intersection = np.array(multipt.intersection(polygon1),dtype=np.int).tolist()
#      cut out the bounded image cube               
        cube = np.zeros((maxy-miny+1,maxx-minx+1,len(rasterBands)))
        k=0
        for band in rasterBands:
            cube[:,:,k] = band.ReadAsArray(minx,miny,maxx-minx+1,maxy-miny+1)
            k += 1
#      get the training vectors
        for (x,y) in intersection:         
            Gs.append(cube[y-miny,x-minx,:])
            ls.append(l)   
        polygon = None
        polygon1 = None            
        feature.Destroy()  
    trnDatasource.Destroy() 
    m = len(ls)       
    print str(m) + ' training pixel vectors were read in' 
    Gs = np.array(Gs) 
    ls = np.array(ls)
#  stretch the pixel vectors to [-1,1]
    maxx = np.max(Gs,0)
    minx = np.min(Gs,0)
    for j in range(N):
        Gs[:,j] = 2*(Gs[:,j]-minx[j])/(maxx[j]-minx[j]) - 1.0 
#  random permutation of training data
    idx = np.random.permutation(m)
    Gs = Gs[idx,:] 
    ls = ls[idx,:]     
#  setup output dataset 
    driver = gdal.GetDriverByName(fmt)    
    outDataset = driver.Create(outfile,cols,rows,1,GDT_Byte) 
    projection = inDataset.GetProjection()
    geotransform = inDataset.GetGeoTransform()
    if geotransform is not None:
        outDataset.SetGeoTransform(tuple(gt))
    if projection is not None:
        outDataset.SetProjection(projection) 
    outBand = outDataset.GetRasterBand(1) 
#  train on 9/10 training examples         
    Gstrn = Gs[0:9*m//10,:]
    lstrn = ls[0:9*m//10,:]
    affn = Ffncg(Gstrn,lstrn,L)
    print 'training on %i pixel vectors...' % np.shape(Gstrn)[0]
    start = time.time()
    cost = affn.train(epochs=epochs)
    print 'elapsed time %s' %str(time.time()-start) 
    if cost is not None:
#        cost = np.log10(cost)  
        ymax = np.max(cost)
        ymin = np.min(cost) 
        xmax = len(cost)      
        plt.plot(range(xmax),cost,'k')
        plt.axis([0,xmax,ymin-1,ymax])
        plt.title('Cross entropy')
        plt.xlabel('Epoch')              
#      classify the image           
        print 'classifying...'
        tile = np.zeros((cols,N))    
        for row in range(rows):
            for j in range(N):
                tile[:,j] = rasterBands[j].ReadAsArray(0,row,cols,1)
                tile[:,j] = 2*(tile[:,j]-minx[j])/(maxx[j]-minx[j]) - 1.0 
            cls, _ = affn.classify(tile)  
            outBand.WriteArray(np.reshape(cls,(1,cols)),0,row)
        outBand.FlushCache()
        outDataset = None
        inDataset = None  
        print 'thematic map written to: ' + outfile
        print 'please close the cross entropy plot to continue'
        plt.show()
    else:
        print 'an error occured' 
        return 
    
    print 'submitting cross-validation to multyvac'    
    start = time.time()
    jid = mv.submit(traintst,Gs,ls,L,_layer='ms_image_analysis')  
    print 'submission time: %s' %str(time.time()-start)
    start = time.time()    
    job = mv.get(jid)
    result = job.get_result(job) 
    
    
    print 'execution time: %s' %str(time.time()-start)      
    print 'misclassification rate: %f' %np.mean(result)
    print 'standard deviation:     %f' %np.std(result)         
    print '--------done---------------------'