def picturefind(savename,directory='./',meta=None): status('searching pictures',tag='store') regex = '^.+\.v([0-9]+)\.png' fns = glob.glob(directory+'/'+savename+'.v*') nums = map(lambda y:(y,int(re.findall(regex,y)[0])),filter(lambda x:re.match(regex,x),fns)) matches = [fn for fn,num in nums if meta==picturedat(os.path.basename(fn),directory=directory)] if len(matches)>1 and meta!=None: print '[ERROR] multiple matches found for %s'%savename raise Exception('???') import pdb;pdb.set_trace() if matches==[] and meta==None: return dict([(os.path.basename(fn), picturedat(os.path.basename(fn),directory=directory)) for fn,num in nums]) return matches if not matches else matches[0]
def run(env, start_response): """Main.""" if 'PATH_INFO' not in env: env['PATH_INFO'] = '/' path = filter(None, os.path.normpath(env['PATH_INFO']).split('/')) ret = tools.status(501), tools.headers['plain'], 'Not implemented (yet?).' if env['REQUEST_METHOD'] == 'GET': if len(path) == 0: # Front page TODO pass elif path[0].lower() == 'sparql': ret = tools.sparql_get(env) elif path[0].lower() == 'ontology': ret = tools.ontology() elif path[0].lower() == 'dataset': ret = tools.dataset(path[1:]) elif env['REQUEST_METHOD'] == 'POST': if len(path) == 0: ret = tools.status(400), tools.headers['plain'], 'Error' elif path[0].lower() == 'sparql': ret = tools.sparql_post(env) elif path[0].lower() == 'submit': ret = tools.submit(env) elif env['REQUEST_METHOD'] == 'PUT': if path[0].lower() == 'submit': ret = tools.submit(env) elif env['REQUEST_METHOD'] == 'DELETE': # Not supported until authentication gets added TODO pass else: # Unsupported as yet pass status, headers, content = ret start_response(status, headers) return content
def store(obj,name,path,attrs=None,print_types=False,verbose=True): """ Use h5py to store a dictionary of data. """ if type(obj) != dict: raise Exception('except: only dictionaries can be stored') if os.path.isfile(path+'/'+name): raise Exception('except: file already exists: '+path+'/'+name) path = os.path.abspath(os.path.expanduser(path)) if not os.path.isdir(path): os.mkdir(path) fobj = h5py.File(path+'/'+name,'w') for key in obj.keys(): if print_types: print '[WRITING] '+key+' type='+str(type(obj[key])) print '[WRITING] '+key+' dtype='+str(obj[key].dtype) try: dset = fobj.create_dataset(key,data=obj[key]) except: raise Exception("failed to write this object so it's probably not numpy"+ "\n"+key+' type='+str(type(obj[key]))+' dtype='+str(obj[key].dtype)) if attrs != None: fobj.create_dataset('meta',data=numpy.string_(json.dumps(attrs))) if verbose: status('[WRITING] '+path+'/'+name) fobj.close()
def picturesave(savename,directory='./',meta=None,extras=[],backup=False, dpi=300,form='png',version=False): """ Function which saves the global matplotlib figure without overwriting. """ status('saving picture',tag='store') #---if version then we choose savename based on the next available index if version: #---check for this meta search = picturefind(savename,directory=directory,meta=meta) if not search: if meta == None: raise Exception('[ERROR] versioned image saving requires meta') fns = glob.glob(directory+'/'+savename+'.v*') nums = [int(re.findall('^.+\.v([0-9]+)\.png',fn)[0]) for fn in fns if re.match('^.+\.v[0-9]+\.png',fn)] ind = max(nums)+1 if nums != [] else 1 savename += '.v%d'%ind else: savename = re.findall('(.+)\.[a-z]+',os.path.basename(search))[0] #---backup if necessary savename += '.'+form if os.path.isfile(directory+savename) and backup: for i in range(1,100): base = directory+savename latestfile = '.'.join(base.split('.')[:-1])+'.bak'+('%02d'%i)+'.'+base.split('.')[-1] if not os.path.isfile(latestfile): break if i == 99 and os.path.isfile(latestfile): raise Exception('except: too many copies') else: status('backing up '+directory+savename+' to '+latestfile,tag='store') os.rename(directory+savename,latestfile) plt.savefig(directory+savename,dpi=dpi,bbox_extra_artists=extras,bbox_inches='tight') plt.close() #---add metadata to png if meta != None: im = Image.open(directory+savename) imgmeta = PngImagePlugin.PngInfo() imgmeta.add_text('meta',json.dumps(meta)) im.save(directory+savename,form,pnginfo=imgmeta)
def computer(function,**kwargs): """ Compute function figures out how to run a calculation over a simulation. """ work = kwargs['workspace'] calc = kwargs['calc'] #---perform a calculation over all collections if 'collections' in calc: cols = tuple([calc['collections']]) if type(calc['collections'])==str else calc['collections'] sns = unique(flatten([work.vars['collections'][i] for i in cols])) else: sns = work.sns() #---get slices (required) slice_name = calc['slice_name'] group = calc['group'] if 'group' in calc else None #---pass data to the function according to upstream data type incoming_type = calc['uptype'] jobs,data = [],dict([(sn,{}) for sn in sns]) combined_slices = [] for sn in sns: new_job = {'sn':sn,'slice_name':slice_name,'group':group} if incoming_type == 'simulation': #---prepare combinations in a dictionary if slice_name not in work.slice(sn): raise Exception( '\n[ERROR] the slices yaml file is missing a slice named "%s" for simulation "%s"'% (slice_name,sn)) try: mfp = work.slice(sn)[slice_name][group]['missing_frame_percent'] except: print "[WARNING] no missing frame percentage here" mfp = 0.0 if mfp>work.missing_frame_tolerance: status('upstream slice failure: %s,%s,%s missing_frame_percent=%.1f'%( sn,slice_name,group,mfp),tag='warning') continue #---defaulting to 'all' group if group is None new_job['grofile'] = work.postdir+\ work.slice(sn)[slice_name][group if group else 'all']['gro'] #---! xtc must become a flag. recommend 'xtc' becomes work.cursor[1] #---defaulting to 'all' group if group is None new_job['trajfile'] = work.postdir+work.slice(sn)[slice_name][group if group else 'all']['xtc'] if 'specs' not in calc: calc['specs'] = '' if 'upstream' in calc['specs']: #---if no loop on upstream you can use a list if type(calc['specs']['upstream'])==list: upstream_ask = dict([(key,None) for key in calc['specs']['upstream']]) elif type(calc['specs']['upstream'])==str: upstream_ask = {calc['specs']['upstream']:None} else: upstream_ask = calc['specs']['upstream'] for key,val in upstream_ask.items(): upspecs = deepcopy(work.calc[key]) #---identify the list of particular options along with the stubs options,stubs = work.interpret_specs(upspecs,return_stubs=True) #---identify paths and values over which we "whittle" the total list of specs whittles = [(i,j) for i,j in catalog(val)] #---if no loop on upstream pickles we interpret none and send blank specs if val in ['None','none',None]: specs = [options[ss] for r,v in whittles for ss,s in enumerate(stubs)] else: #---select the correct option by matching all catalogued routes from the incoming #---...key to the original calculation specs = [options[ss] for r,v in whittles for ss,s in enumerate(stubs) if delve(s['specs'],*r)==v] if len(specs)!=1 and 'loop' not in upspecs['slice_name']: import pdb;pdb.set_trace() raise Exception('[ERROR] redundant upstream selection %s'%str(select)) #---if there are multiple slices #---! note that we expect that if slice_names is a list it will be ordered here too for slicenum,spec in enumerate(specs): #---if the upstream calculation has a group then use it in the filename if not group: if 'group' in work.calc[key]: upgroup = work.calc[key]['group'] else: upgroup = None else: upgroup = group if not upgroup: sl = work.slice(sn)[spec['slice_name']] fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+', work.slice(sn)[upspecs['slice_name']]['all']['filekey'] )[0]+'.%s'%key else: sl = work.slice(sn)[spec['slice_name']][upgroup] fn_base = '%s.%s'%(sl['filekey'],key) #---! moved the following block left recently fn = work.select_postdata(fn_base,spec) if not fn: print '[ERROR] missing %s'%fn import pdb;pdb.set_trace() outkey = key if len(specs)==1 else '%s%d'%(key,slicenum) #---before each calculation the master loop loads the filename stored here data[sn][outkey] = os.path.basename(fn)[:-4]+'dat' new_job['upstream'] = data[sn].keys() jobs.append(new_job) #---master loop for outgoing in jobs: sn,slice_name,group = outgoing['sn'],outgoing['slice_name'],outgoing['group'] #---if we combine slices for this calculation we use the whole time span in the base filename if type(slice_name)==list: #---! simple method for making the combination file key start = min([work.slice(sn)[s]['all' if not group else group]['start'] for s in slice_name]) end = max([work.slice(sn)[s]['all' if not group else group]['end'] for s in slice_name]) skip = work.slice(sn)[s]['all' if not group else group]['skip'] #---! this filekey construction means the user will have to anticipate the names of combos fn_base = '%s.%d-%d-%d.%s'%(work.prefixer(sn),start,end,skip,function.__name__) else: #---we index all calculations automatically in case we loop over specs later index,fn_key = -1,'' if not group: fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+', work.slice(sn)[slice_name][ 'all' if not group else group]['filekey'])[0]+'.%s'%function.__name__ else: try: fn_base = work.slice(sn)[slice_name][ 'all' if not group else group]['filekey']+'.%s'%function.__name__ except: print "no group and cannot get base filename" import pdb;pdb.set_trace() prev = glob.glob(work.postdir+fn_base+'*.dat') if prev == []: index = 0 else: index = max(map(lambda x:int(re.findall('^.+\/%s\.n([0-9]+)\.dat'%fn_base,x)[0]),prev))+1 fn_key = '.n%d'%index fn = fn_base+fn_key+'.dat' #---safety check for file errors to prevent overwriting however this should be handled by indices if os.path.isfile(work.postdir+fn): raise Exception('[ERROR] %s exists'%(work.postdir+fn)) #---check for specs file with the exact same specifications exists = True if index != -1 and work.select_postdata(fn_base,calc) != None else False if not exists: import ipdb;ipdb.set_trace() status("%s %s"%(function.__name__,str(outgoing)),tag='compute') outgoing['workspace'] = work outgoing['calc'] = calc if 'upstream' in outgoing: sn = outgoing['sn'] outgoing['upstream'] = dict([(k, load(data[sn][k],work.postdir)) for k in outgoing['upstream']]) result,attrs = function(**outgoing) """ spec files are carefully constructed they prevent redundant calculations they allow us to loop over many parameters while saving files with a single index the calculation dictionary in the specs file contains meta-parameters for looping we are careful not to save meta parameters to the spec file we only save parameters which are relevant to the calculation itself the calculation dictionary in the spec file must therefore separate these parameters in a sub-dictionary called 'specs' we prefer attrs to be small and specific since attrs is also used to uniquely specify the data all big data should be stored as a result via numpy """ #---if any calculation specifications are not in attributes we warn the user here if 'specs' in calc: unaccounted = [i for i in calc['specs'] if i not in attrs] else: unaccounted = [] if 'upstream' in unaccounted and 'upstream' not in attrs: status('automatically appending upstream data',tag='status') unaccounted.remove('upstream') attrs['upstream'] = calc['specs']['upstream'] if any(unaccounted): print computer_error_attrs_passthrough+'\n\n' status('some calculation specs were not saved: %s'% str(unaccounted),tag='STATUS') import pdb;pdb.set_trace() store(result,fn,work.postdir,attrs=attrs) with open(work.postdir+fn_base+fn_key+'.spec','w') as fp: fp.write(json.dumps(attrs)+'\n') #---no modifications to work so no save return
def plotload(plotname,work,specfile=None,choice_override=None,use_group=False): """ Load postprocessing data for making a plot. Note that we currently do not use the specs items. """ #---read plot specification if 0: if not specfile: specfile = work.paths['specs_file'] #---load the yaml specifications file if type(specfile)==str: specfile = [specfile] raw_specs = '' for sfn in specfile: with open(sfn,'r') as fp: raw_specs += '\n'+fp.read() specs = yaml.load(raw_specs) specs = work.load_specs() #---merge automatic plots here if 0: if 'autoplots' in specs: for key,val in specs['autoplots'].items(): if key in specs['plots']: raise Exception('\n[ERROR] redundant names in plots and autoplots: %s'%key+ ", which is populated with django so check calculator.Calculation") else: specs['plots'][key] = deepcopy(val) plotspecs = specs['plots'][plotname] #---load the calculation from the workspace calcnames = plotspecs['calculation'] if type(calcnames)==str: calcnames = [calcnames] datasets = {name:[] for name in calcnames} calcsets = {name:[] for name in calcnames} #---loop over calcnames requested in the plot specs for calcname in calcnames: calcs = work.interpret_specs(work.calc[calcname]) if len(calcs)==0: raise Exception('[ERROR] failed to retrieve calculations') #---get the group from either plotspecs or the calculation or exception if 'group' in plotspecs: group = plotspecs['group'] elif 'group' in work.calc[calcname]: group = work.calc[calcname]['group'] else: group = None #---get the collection from either plotspecs or the upstream calculation if 'collections' in plotspecs: collections = plotspecs['collections'] if type(collections)==str: collections = [collections] else: collections = calc['collections'] sns = flatten([work.vars['collections'][c] for c in collections]) #---compile all upstream data data = [{} for c in calcs] #---iterate over the loop over upstream calculations for calcnum,calcwhittle in enumerate(calcs): status('upstream data type: %s'%str(calcwhittle),tag='load') calc = deepcopy(work.calc[calcname]) #---loop over simulations for snum,sn in enumerate(sns): status(sn.ljust(26),tag='load',i=snum,looplen=len(sns)) #---slices in plotspecs or lookup from variables with plus-syntax #---! need to allow blank slices here so that the machine looks to calcs to get them if 'slices' in plotspecs and not re.match('^\+',plotspecs['slices']): work.cursor = (work.c,work.trajectory_format) sl = work.slice(sn)[plotspecs['slices']]['all' if not group else group] elif 'slices' in plotspecs: sl = deepcopy(delve(work.vars,*plotspecs['slices'].strip('+').split('/'))) #---the slice might not have a filekey if its a combo if 'filekey' not in sl: #---! pbc and groups will usually be absent here start,end,skip,pbc = [sl[i] for i in 'start,end,skip,pbc'.split(',')] sl['filekey'] = '%s.%d-%d-%d'%(work.prefixer(sn),start,end,skip) else: raise Exception('[ERROR] cannot infer slices') #---compute base filename if not group: #---! deprecated fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+',sl['filekey'])[0]+'.%s'%calcname elif use_group: #---special settings here for loading certain kinds of data eg protein_abstractor fn_base = '%s.%s.pbc%s.%s'%(sl['filekey'],group,sl['pbc'],calcname) else: fn_base = '%s.%s'%(sl['filekey'],calcname) #---fill in upstream details in our replicate of the calculation specs for route,val in [(i,j) for i,j in catalog(calcwhittle)]: #---! the plot has to mimic the specs structure exactly otherwise error below try: endpoint = delve(work.calc[calcname],*route) except: import pdb;pdb.set_trace() if type(endpoint)==dict and 'loop' in endpoint: try: penultimate = delve(calc,*route[:-1]) penultimate[route[-1]] = val except: pass #---get the dat file and package it fn = work.select_postdata(fn_base,calc,debug=True) if fn == None: print '[ERROR] cannot locate a file necessary for plotting via work.select_postdata\n'+\ '[ERROR] you probably need to fix your meta.yaml file and run "make compute"\n'+\ '[ERROR] check the post directory at "%s" and the variables fn_base,calc\n' import pdb;pdb.set_trace() dat_fn = os.path.basename(fn)[:-4]+'dat' data[calcnum][sn] = {'data':load(dat_fn,work.postdir), 'slice':sl,'group':group,'fn_base':fn_base} #---if only one calculation of this type then we elevate package if len(calcs)==1: calcs,data = calcs[0],data[0] datasets[calcname],calcsets[calcname] = data,calcs #---if only one upstream calculation we return that directly if len(datasets)==1: return datasets.values()[0],calcsets.values()[0] else: return datasets,calcsets