def __init__(self, recipe=None, recipeURI=None, output_dir=None): from PYME.recipes import Recipe if recipe: if isinstance(recipe, string_types): self.recipe_text = recipe self.recipe = Recipe.fromYAML(recipe) else: self.recipe_text = recipe.toYAML() self.recipe = recipe self.recipeURI = None else: self.recipe = None if recipeURI is None: raise ValueError( 'recipeURI must be defined if no recipe given') else: from PYME.IO import unifiedIO self.recipeURI = recipeURI self.recipe = Recipe.fromYAML(unifiedIO.read(recipeURI)) self.output_dir = output_dir self.taskQueueURI = _getTaskQueueURI() #generate a queue ID as a hash of the recipe and the current time to_hash = self.recipeURI if self.recipeURI else self.recipe_text try: # hashlib requires bytes on py3 to_hash = to_hash.encode() except TypeError: # encoding without a string argument, i.e. already bytes pass h = hashlib.md5(to_hash) h.update(str(time.time()).encode()) self.queueID = h.hexdigest() # hexdigest returns str
def __init__(self, recipe=None, recipeURI=None): from PYME.recipes import Recipe if recipe: if isinstance(recipe, string_types): self.recipe_text = recipe self.recipe = Recipe.fromYAML(recipe) else: self.recipe_text = recipe.toYAML() self.recipe = recipe self.recipeURI = None else: self.recipe = None if recipeURI is None: raise ValueError( 'recipeURI must be defined if no recipe given') else: from PYME.IO import unifiedIO self.recipeURI = recipeURI self.recipe = Recipe.fromYAML(unifiedIO.read(recipeURI)) self.taskQueueURI = _getTaskQueueURI() #generate a queue ID as a hash of the recipe and the current time h = hashlib.md5(self.recipeURI if self.recipeURI else self.recipe_text) h.update('%s' % time.time()) self.queueID = h.hexdigest()
def __init__(self, filename=None, visFr=None): self.filter = None self.mapping = None self.colourFilter = None self.events = None self.recipe = Recipe(execute_on_invalidation=True) self.recipe.recipe_executed.connect(self.Rebuild) self.selectedDataSourceKey = None self.filterKeys = { 'error_x': (0, 30), 'error_y': (0, 30), 'A': (5, 20000), 'sig': (95, 200) } self.blobSettings = BlobSettings() self.objects = None self.imageBounds = ImageBounds(0, 0, 0, 0) self.mdh = MetaDataHandler.NestedClassMDHandler() self.Triangles = None self.edb = None self.Quads = None self.GeneratedMeasures = {} self.QTGoalPixelSize = 5 self._extra_chan_num = 0 self.filesToClose = [] self.ev_mappings = {} #define a signal which a GUI can hook if the pipeline is rebuilt (i.e. the output changes) self.onRebuild = dispatch.Signal() #a cached list of our keys to be used to decide whether to fire a keys changed signal self._keys = None #define a signal which can be hooked if the pipeline keys have changed self.onKeysChanged = dispatch.Signal() self.ready = False #self.visFr = visFr if not filename is None: self.OpenFile(filename)
def main(): # Start the tornado ioloop application ioloop = IOLoop.instance() if len(sys.argv) > 1: filename = sys.argv[1] with open(filename, 'r') as f: recipe_yaml = f.read() else: recipe_yaml = '' recipe = Recipe.fromYAML(recipe_yaml) print(recipe) # Instantiate the domain model fred = Person(name='Fred', age=42) # Create a web app serving the view with the domain model added to its # context. app = WebApp(template=template, context={'recipe': recipe}, handlers=[(r'/static/(.*)', tornado.web.StaticFileHandler, {'path': PYME.resources.get_web_static_dir()}),]) app.listen(8000) # Start serving the web app on port 8000. # # Point your web browser to http://localhost:8000/ to connect to this jigna # web app. Any operation performed on the client directly update the # model attributes on the server. print('Serving on port 8000...') ioloop.start()
def main(): # set matplotlib backend for offline figure generation #TODO - move this further down (ie. to the figure generation code itself)? import matplotlib matplotlib.use('Cairo', warn=False) #start by finding out what recipe we're using - different recipes can have different options ap = ArgumentParser() #usage = 'usage: %(prog)s [options] recipe.yaml') ap.add_argument('recipe') ap.add_argument('output_dir') ap.add_argument('-n', '--num-processes', default=NUM_PROCS) args, remainder = ap.parse_known_args() #load the recipe with open(args.recipe) as f: s = f.read() recipe = Recipe.fromYAML(s) output_dir = args.output_dir num_procs = args.num_processes if not os.path.exists(output_dir): os.makedirs(output_dir) #create a new parser to parse input and output filenames op = ArgumentParser() for ip in recipe.inputs: op.add_argument('--%s' % ip) args = op.parse_args(remainder) inputGlobs = {k: glob.glob(getattr(args, k)) for k in recipe.inputs} bake(recipe, inputGlobs, output_dir, num_procs)
def run_template(request): from PYME import config from PYME.IO import unifiedIO from PYME.recipes import Recipe from PYME.recipes import modules from PYME.cluster.rules import RecipeRule recipeURI = 'pyme-cluster://%s/%s' % ( server_filter, request.POST.get('recipeURL').lstrip('/')) output_directory = 'pyme-cluster://%s/%s' % ( server_filter, request.POST.get('recipeOutputPath').lstrip('/')) recipe_text = unifiedIO.safe_read(recipeURI).decode('utf-8') recipe = Recipe.fromYAML(recipe_text) # handle templated userfile inputs - these will be loaded by e.g. unifiedIO later for file_input in recipe.file_inputs: input_url = 'pyme-cluster://%s/%s' % ( server_filter, request.POST.get('%sURL' % file_input).lstrip('/')) recipe_text = recipe_text.replace('{' + file_input + '}', input_url) rule = RecipeRule(recipe=recipe_text, output_dir=output_directory, inputs={'input': request.POST.getlist('files', [])}) rule.push() return HttpResponseRedirect('/status/queues/')
def main(): #start by finding out what recipe we're using - different recipes can have different options ap = ArgumentParser(usage='usage: %(prog)s [options] recipe.yaml') ap.add_argument('recipe') args, remainder = ap.parse_known_args() #load the recipe with open(args.recipe) as f: s = f.read() recipe = Recipe.fromYAML(s) #create a new parser to parse input and output filenames op = ArgumentParser() for ip in recipe.inputs: op.add_argument('--%s' % ip) for ot in recipe.outputs: op.add_argument('--%s' % ot) args = op.parse_args(remainder) inputs = {k: getattr(args, k) for k in recipe.inputs} outputs = {k: getattr(args, k) for k in recipe.outputs} ##Run the recipe runRecipe(recipe, inputs, outputs) #TODO - fix for contexts
def test_recipe_1(): rec = Recipe.fromYAML(recipe_1) im = ImageStack( filename=os.path.join(resources.get_test_data_dir(), 't_im.tif')) rec.execute(input=im) assert (np.allclose(rec.namespace['zoomed'].data_xyztc.shape, (88, 80, 241, 1, 2)))
def OnMeasureClusters(self, event=None): """ Calculates various measures for clusters using PYME.recipes.localisations.MeasureClusters Parameters ---------- labelsKey: pipeline key to access array of label assignments. Measurements will be calculated for each label. """ from PYME.recipes import localisations from PYME.recipes import Recipe # build a recipe programatically measrec = Recipe() measrec.add_module( localisations.MeasureClusters3D(measrec, inputName='input', labelsKey='dbscanClustered', outputName='output')) measrec.namespace['input'] = self.pipeline.output #configure parameters if not measrec.configure_traits(view=measrec.pipeline_view, kind='modal'): return # handle cancel # run recipe meas = measrec.execute() # For now, don't make this a data source, as that requires (for multicolor) clearing the pipeline mappings. self.clusterMeasures.append(meas)
def bake_recipe(recipe_filename, inputGlobs, output_dir, *args, **kwargs): with open(recipe_filename) as f: s = f.read() recipe = Recipe.fromYAML(s) if not os.path.exists(output_dir): os.makedirs(output_dir) bake(recipe, inputGlobs, output_dir, *args, **kwargs)
def OnClustersInTime(self, event=None): #FIXME - this would probably be better in an addon module outside of the core project from PYME.recipes import localisations from PYME.recipes import Recipe import matplotlib.pyplot as plt # build a recipe programatically rec = Recipe() # split input according to colour channel selected rec.add_module( localisations.ExtractTableChannel(rec, inputName='input', outputName='chan0', channel='chan0')) rec.add_module( localisations.ClusterCountVsImagingTime(rec, inputName='chan0', stepSize=3000, outputName='output')) rec.namespace[ 'input'] = self.pipeline.output #do before configuring so that we already have the channel names populated #configure parameters if not rec.configure_traits(view=rec.pipeline_view, kind='modal'): return #handle cancel incrementedClumps = rec.execute() plt.figure() plt.scatter(incrementedClumps['t'], incrementedClumps['N_labelsWithLowMinPoints'], label=('clusters with Npoints > %i' % rec.modules[-1].lowerMinPtsPerCluster), c='b', marker='s') plt.scatter(incrementedClumps['t'], incrementedClumps['N_labelsWithHighMinPoints'], label=('clusters with Npoints > %i' % rec.modules[-1].higherMinPtsPerCluster), c='g', marker='o') plt.legend(loc=4, scatterpoints=1) plt.xlabel('Number of frames included') plt.ylabel('Number of Clusters')
def LoadRecipeText(self, s, filename=''): self.currentFilename = filename self.activeRecipe = Recipe.fromYAML(s) #self.mICurrent.SetItemLabel('Run %s\tF5' % os.path.split(filename)[1]) try: self.activeRecipe.recipe_changed.connect(self.recipeView.update) self.activeRecipe.recipe_executed.connect(self.recipeView.update) self.activeRecipe.recipe_failed.connect(self.recipeView.update) self.recipeView.update() except AttributeError: pass
def extra_inputs(request): from PYME.IO import unifiedIO from PYME.recipes import Recipe recipeURI = ('pyme-cluster://%s/' % server_filter) + request.GET.get('recipeURL').lstrip('/') recipe = Recipe.fromYAML(unifiedIO.safe_read(recipeURI)) return render(request, 'recipes/extra_inputs.html', { 'file_inputs': recipe.file_inputs, 'serverfilter': server_filter })
def view_svg(request): from PYME.IO import unifiedIO from PYME.recipes import Recipe from PYME.recipes import modules from PYME.recipes import recipeLayout recipeURI = ('pyme-cluster://%s/' % server_filter) + request.GET.get('recipeURL').lstrip('/') recipe = Recipe.fromYAML(unifiedIO.safe_read(recipeURI)) svg = recipeLayout.to_svg(recipe.dependancyGraph()) return HttpResponse(svg, content_type='image/svg+xml')
def runRecipe(recipe, inputs, outputs, context={}, metadata_defaults={}): """Load inputs and run recipe, saving outputs. Parameters ---------- recipe : an instance of PYME.recipes.filters.ModuleCollection inputs : a dictionary mapping recipe input names to filenames. These are loaded and inserted into the namespace before running the recipe. outputs : a dictionary mapping recipe output names to filenames. The corresponding members of the namespace are saved to disk following execution of the recipe. context : a dictionary used for filename subsititutions metadata_defaults: a dictionary (or metadata handler) specifying metadata entries to use if input files have incomplete metadata """ try: if not isinstance(recipe, Recipe): # recipe is a string recipe = Recipe.fromYAML(recipe) #the recipe instance might be re-used - clear any previous data recipe.namespace.clear() #load any necessary inputs and populate the recipes namespace for key, filename in inputs.items(): recipe.loadInput(filename, key, metadata_defaults) ### Run the recipe ### res = recipe.execute() #Save any outputs [old-style, detected using the 'out' prefix. for k, v in outputs.items(): saveOutput(recipe.namespace[k], v) #new style output saving - using OutputModules recipe.save(context) except: logger.exception('Error running recipe') raise
def test_queue_acquisitions(): from PYME.IO.tabular import DictSource from PYME.recipes import Recipe import numpy as np import time action_manager.paused = True d = DictSource({'x': np.arange(10), 'y': np.arange(10)}) rec = Recipe() rec.namespace['input'] = d spool_settings = {'extra_metadata': {'Sample.Well': '{file_stub}'}} rec.add_module( acquisition.QueueAcquisitions(rec, spool_settings=spool_settings)) rec.save(context={'file_stub': 'A1'}) time.sleep(1) task = action_manager.actionQueue.get_nowait() assert 'A1' == task[1]._then.params['extra_metadata']['Sample.Well']
def main(): os.environ[ 'DJANGO_SETTINGS_MODULE'] = 'clusterUI.settings' # path to your settings module application = get_wsgi_application() django_app = tornado.wsgi.WSGIContainer(application) tornado_app = JignaWebApp( handlers=[ (r'/static/(.*)', tornado.web.StaticFileHandler, { 'path': PYME.resources.get_web_static_dir() }), #(r'/media/(.*)', tornado.web.StaticFileHandler, {'path': MEDIA_URL}), #(r'/recipe_editor/(.*)', tornado.web.StaticFileHandler, {'path': os.path.dirname(html_recipe_editor.__file__)}), (r'.*', tornado.web.FallbackHandler, dict(fallback=django_app)), ], template=html_recipe_editor.template, context={'recipe': Recipe.fromYAML(rec_text)}) #server = tornado.httpserver.HTTPServer(tornado_app) http_server = tornado.httpserver.HTTPServer(tornado_app) http_server.listen(8889) tornado.ioloop.IOLoop.instance().start()
class Pipeline: def __init__(self, filename=None, visFr=None): self.filter = None self.mapping = None self.colourFilter = None self.events = None self.recipe = Recipe(execute_on_invalidation=True) self.recipe.recipe_executed.connect(self.Rebuild) self.selectedDataSourceKey = None self.filterKeys = { 'error_x': (0, 30), 'error_y': (0, 30), 'A': (5, 20000), 'sig': (95, 200) } self.blobSettings = BlobSettings() self.objects = None self.imageBounds = ImageBounds(0, 0, 0, 0) self.mdh = MetaDataHandler.NestedClassMDHandler() self.Triangles = None self.edb = None self.Quads = None self.GeneratedMeasures = {} self.QTGoalPixelSize = 5 self._extra_chan_num = 0 self.filesToClose = [] self.ev_mappings = {} #define a signal which a GUI can hook if the pipeline is rebuilt (i.e. the output changes) self.onRebuild = dispatch.Signal() #a cached list of our keys to be used to decide whether to fire a keys changed signal self._keys = None #define a signal which can be hooked if the pipeline keys have changed self.onKeysChanged = dispatch.Signal() self.ready = False #self.visFr = visFr if not filename is None: self.OpenFile(filename) #renderers.renderMetadataProviders.append(self.SaveMetadata) @property def output(self): return self.colourFilter def __getitem__(self, keys): """gets values from the 'tail' of the pipeline (ie the colourFilter)""" return self.output[keys] def keys(self): return self.output.keys() def __getattr__(self, item): try: #if 'colourFilter in ' if self.output is None: raise AttributeError('colourFilter not yet created') return self.output[item] except KeyError: raise AttributeError("'%s' has not attribute '%s'" % (self.__class__, item)) def __dir__(self): if self.output is None: return list(self.__dict__.keys()) + list(dir(type(self))) else: return list(self.output.keys()) + list( self.__dict__.keys()) + list(dir(type(self))) #compatibility redirects @property def fluorSpecies(self): #warnings.warn(DeprecationWarning('Use colour_mapper.species_ratios instead')) raise DeprecationWarning('Use colour_mapper.species_ratios instead') return self.colour_mapper.species_ratios @property def fluorSpeciesDyes(self): #warnings.warn(DeprecationWarning('Use colour_mapper.species_dyes instead')) raise DeprecationWarning('Use colour_mapper.species_dyes instead') return self.colour_mapper.species_dyes @property def chromaticShifts(self): return self.colourFilter.chromaticShifts #end compatibility redirects @property def dataSources(self): return self.recipe.namespace @property def layer_datasources(self): lds = {'output': self.colourFilter} lds.update(self.dataSources) return lds @property def layer_data_source_names(self): """ Return a list of names of datasources we can use with dotted channel selection There is a little bit of magic here as we augment the names with dotted names for colour channel selection """ names = [] #''] for k, v in self.layer_datasources.items(): names.append(k) if isinstance(v, tabular.ColourFilter): for c in v.getColourChans(): names.append('.'.join([k, c])) return names def get_layer_data(self, dsname): """ Returns layer data for a given name. The principle difference to just accessing self.dataSources directly is that we do some magic relating to allow colour channels to be accessed with the dot notation e.g. dsname.colour_channel """ if dsname == '': return self parts = dsname.split('.') if len(parts) == 2: # special case - permit access to channels using dot notation # NB: only works if our underlying datasource is a ColourFilter ds, channel = parts if ds == 'output': return self.colourFilter.get_channel_ds(channel) else: return self.dataSources.get(ds, None).get_channel_ds(channel) else: if dsname == 'output': return self.colourFilter else: return self.dataSources.get(dsname, None) @property def selectedDataSource(self): """ The currently selected data source (an instance of tabular.inputFilter derived class) """ if self.selectedDataSourceKey is None: return None else: return self.dataSources[self.selectedDataSourceKey] def selectDataSource(self, dskey): """ Set the currently selected data source Parameters ---------- dskey : string The data source name """ if not dskey in self.dataSources.keys(): raise KeyError('Data Source "%s" not found' % dskey) self.selectedDataSourceKey = dskey #remove any keys from the filter which are not present in the data for k in list(self.filterKeys.keys()): if not k in self.selectedDataSource.keys(): self.filterKeys.pop(k) self.Rebuild() def new_ds_name(self, stub, return_count=False): """ Generate a name for a new, unused, pipeline step output based on a stub FIXME - should this be in ModuleCollection instead? FIXME - should this use recipe.outputs as well? Parameters ---------- stub - string to start the name with Returns ------- """ count = 0 pattern = stub + '%d' name = pattern % count while name in self.dataSources.keys(): count += 1 name = pattern % count if return_count: return name, count return name def addColumn(self, name, values, default=0): """ Adds a column to the currently selected data source. Attempts to guess whether the size matches the input or the output, and adds padding values appropriately if it matches the output. Parameters ---------- name : str The column name values : array like The values default : float The default value to pad with if we've given an output-sized array """ import warnings warnings.warn( 'Deprecated. You should not add columns to the pipeline as this injects data and is not captured by the recipe', DeprecationWarning) ds_len = len( self.selectedDataSource[self.selectedDataSource.keys()[0]]) val_len = len(values) if val_len == ds_len: #length matches the length of our input data source - do a simple add self.selectedDataSource.addColumn(name, values) elif val_len == len(self[self.keys()[0]]): col_index = self.colourFilter.index idx = np.copy(self.filter.Index) idx[self.filter.Index] = col_index ds_vals = np.zeros(ds_len) + default ds_vals[idx] = np.array(values) self.selectedDataSource.addColumn(name, ds_vals) else: raise RuntimeError( "Length of new column doesn't match either the input or output lengths" ) def addDataSource(self, dskey, ds, add_missing_vars=True): """ Add a new data source Parameters ---------- dskey : str The name of the new data source ds : an tabular.inputFilter derived class The new data source """ #check that we have a suitable object - note that this could potentially be relaxed assert isinstance(ds, tabular.TabularBase) if not isinstance(ds, tabular.MappingFilter): #wrap with a mapping filter ds = tabular.MappingFilter(ds) #add keys which might not already be defined if add_missing_vars: _add_missing_ds_keys(ds, self.ev_mappings) if getattr(ds, 'mdh', None) is None: try: ds.mdh = self.mdh except AttributeError: logger.error('No metadata defined in pipeline') pass self.dataSources[dskey] = ds def Rebuild(self, **kwargs): """ Rebuild the pipeline. Called when the selected data source is changed/modified and/or the filter is changed. """ for s in self.dataSources.values(): if 'setMapping' in dir(s): #keep raw measurements available s.setMapping('x_raw', 'x') s.setMapping('y_raw', 'y') if 'z' in s.keys(): s.setMapping('z_raw', 'z') if not self.selectedDataSource is None: if not self.mapping is None: # copy any mapping we might have made across to the new mapping filter (should fix drift correction) # TODO - make drift correction a recipe module so that we don't need this code. Long term we should be # ditching the mapping filter here. old_mapping = self.mapping self.mapping = tabular.MappingFilter(self.selectedDataSource) self.mapping.mappings.update(old_mapping.mappings) else: self.mapping = tabular.MappingFilter(self.selectedDataSource) #the filter, however needs to be re-generated with new keys and or data source self.filter = tabular.ResultsFilter(self.mapping, **self.filterKeys) #we can also recycle the colour filter if self.colourFilter is None: self.colourFilter = tabular.ColourFilter(self.filter) else: self.colourFilter.resultsSource = self.filter #self._process_colour() self.ready = True self.ClearGenerated() def ClearGenerated(self): self.Triangles = None self.edb = None self.GeneratedMeasures = {} self.Quads = None self.onRebuild.send_robust(sender=self) #check to see if any of the keys have changed - if so, fire a keys changed event so the GUI can update newKeys = self.keys() if not newKeys == self._keys: self.onKeysChanged.send_robust(sender=self) def CloseFiles(self): while len(self.filesToClose) > 0: self.filesToClose.pop().close() def _ds_from_file(self, filename, **kwargs): """ loads a data set from a file Parameters ---------- filename : str kwargs : any additional arguments (see OpenFile) Returns ------- ds : tabular.TabularBase the datasource, complete with metadatahandler and events if found. """ mdh = MetaDataHandler.NestedClassMDHandler() events = None if os.path.splitext(filename)[1] == '.h5r': import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) try: ds = tabular.H5RSource(h5f) if 'DriftResults' in h5f.root: driftDS = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(driftDS) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) if len(ds['x']) == 0: self.selectDataSource('Fiducials') except: #fallback to catch series that only have drift data logger.exception('No fitResults table found') ds = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(ds) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) #self.selectDataSource('Fiducials') # really old files might not have metadata, so test for it before assuming if 'MetaData' in h5f.root: mdh = MetaDataHandler.HDFMDHandler(h5f) if ('Events' in h5f.root) and ('StartTime' in mdh.keys()): events = h5f.root.Events[:] elif filename.endswith('.hdf'): #recipe output - handles generically formatted .h5 import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) #defer our IO to the recipe IO method - TODO - do this for other file types as well self.recipe._inject_tables_from_hdf5('', h5f, filename, '.hdf') for dsname, ds_ in self.dataSources.items(): #loop through tables until we get one which defines x. If no table defines x, take the last table to be added #TODO make this logic better. ds = ds_ if 'x' in ds.keys(): # TODO - get rid of some of the grossness here mdh = getattr(ds, 'mdh', mdh) events = getattr(ds, 'events', events) break elif os.path.splitext(filename)[1] == '.mat': #matlab file if 'VarName' in kwargs.keys(): #old style matlab import ds = tabular.MatfileSource(filename, kwargs['FieldNames'], kwargs['VarName']) else: if kwargs.get('Multichannel', False): ds = tabular.MatfileMultiColumnSource(filename) else: ds = tabular.MatfileColumnSource(filename) # check for column name mapping field_names = kwargs.get('FieldNames', None) if field_names: if kwargs.get('Multichannel', False): field_names.append( 'probe') # don't forget to copy this field over ds = tabular.MappingFilter( ds, **{ new_field: old_field for new_field, old_field in zip( field_names, ds.keys()) }) elif os.path.splitext(filename)[1] == '.csv': #special case for csv files - tell np.loadtxt to use a comma rather than whitespace as a delimeter if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',', skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',') else: #assume it's a tab (or other whitespace) delimited text file if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames']) # make sure mdh is writable (file-based might not be) ds.mdh = MetaDataHandler.NestedClassMDHandler(mdToCopy=mdh) if events is not None: # only set the .events attribute if we actually have events. # ensure that events are sorted in increasing time order ds.events = events[np.argsort(events['Time'])] return ds def OpenFile(self, filename='', ds=None, clobber_recipe=True, **kwargs): """Open a file - accepts optional keyword arguments for use with files saved as .txt and .mat. These are: FieldNames: a list of names for the fields in the text file or matlab variable. VarName: the name of the variable in the .mat file which contains the data. SkipRows: Number of header rows to skip for txt file data PixelSize: Pixel size if not in nm """ #close any files we had open previously while len(self.filesToClose) > 0: self.filesToClose.pop().close() # clear our state # nb - equivalent to clearing recipe namespace self.dataSources.clear() if clobber_recipe: # clear any processing modules from the pipeline # call with clobber_recipe = False in a 'Open a new file with the processing pipeline I've set up' use case # TODO: Add an "File-->Open [preserving recipe]" menu option or similar self.recipe.modules = [] if 'zm' in dir(self): del self.zm self.filter = None self.mapping = None self.colourFilter = None self.events = None self.mdh = MetaDataHandler.NestedClassMDHandler() self.filename = filename if ds is None: from PYME.IO import unifiedIO # TODO - what is the launch time penalty here for importing clusterUI and finding a nameserver? # load from file(/cluster, downloading a copy of the file if needed) with unifiedIO.local_or_temp_filename(filename) as fn: # TODO - check that loading isn't lazy (i.e. we need to make a copy of data in memory whilst in the # context manager in order to be safe with unifiedIO and cluster data). From a quick look, it would seem # that _ds_from_file() copies the data, but potentially keeps the file open which could be problematic. # This won't effect local file loading even if loading is lazy (i.e. shouldn't cause a regression) ds = self._ds_from_file(fn, **kwargs) self.events = getattr(ds, 'events', None) self.mdh.copyEntriesFrom(ds.mdh) # skip the MappingFilter wrapping, etc. in self.addDataSource and add this datasource as-is self.dataSources['FitResults'] = ds # Fit module specific filter settings # TODO - put all the defaults here and use a local variable rather than in __init__ (self.filterKeys is largely an artifact of pre-recipe based pipeline) if 'Analysis.FitModule' in self.mdh.getEntryNames(): fitModule = self.mdh['Analysis.FitModule'] if 'Interp' in fitModule: self.filterKeys['A'] = (5, 100000) if fitModule == 'SplitterShiftEstFR': self.filterKeys['fitError_dx'] = (0, 10) self.filterKeys['fitError_dy'] = (0, 10) if clobber_recipe: from PYME.recipes.localisations import ProcessColour, Pipelineify from PYME.recipes.tablefilters import FilterTable add_pipeline_variables = Pipelineify( self.recipe, inputFitResults='FitResults', pixelSizeNM=kwargs.get('PixelSize', 1.), outputLocalizations='Localizations') self.recipe.add_module(add_pipeline_variables) #self._get_dye_ratios_from_metadata() colour_mapper = ProcessColour(self.recipe, input='Localizations', output='colour_mapped') self.recipe.add_module(colour_mapper) self.recipe.add_module( FilterTable(self.recipe, inputName='colour_mapped', outputName='filtered_localizations', filters={ k: list(v) for k, v in self.filterKeys.items() if k in ds.keys() })) else: logger.warn( 'Opening file without clobbering recipe, filter and ratiometric colour settings might not be handled properly' ) # FIXME - should we update filter keys and/or make the filter more robust # FIXME - do we need to do anything about colour settings? self.recipe.execute() self.filterKeys = {} if 'filtered_localizations' in self.dataSources.keys(): self.selectDataSource( 'filtered_localizations') #NB - this rebuilds the pipeline else: # TODO - replace / remove this fallback with something better. This is currently required # when we use/abuse the pipeline in dh5view, but that should ideally be replaced with # something cleaner. This (and case above) should probably also be conditional on `clobber_recipe` # as if opening with an existing recipe we would likely want to keep selectedDataSource constant as well. self.selectDataSource('FitResults') # FIXME - we do this already in pipelinify, maybe we can avoid doubling up? self.ev_mappings, self.eventCharts = _processEvents( ds, self.events, self.mdh) # extract information from any events # Retrieve or estimate image bounds if False: # 'imgBounds' in kwargs.keys(): # TODO - why is this disabled? Current usage would appear to be when opening from LMAnalysis # during real-time localization, to force image bounds to match raw data, but also potentially useful # for other scenarios where metadata is not fully present. self.imageBounds = kwargs['imgBounds'] elif ('scanx' not in self.selectedDataSource.keys() or 'scany' not in self.selectedDataSource.keys() ) and 'Camera.ROIWidth' in self.mdh.getEntryNames(): self.imageBounds = ImageBounds.extractFromMetadata(self.mdh) else: self.imageBounds = ImageBounds.estimateFromSource( self.selectedDataSource) #self._process_colour() @property def colour_mapper(self): """ Search for a colour mapper rather than use a hard coded reference - allows loading of saved pipelines with colour mapping""" from PYME.recipes.localisations import ProcessColour # find ProcessColour instance(s) in the pipeline mappers = [ m for m in self.recipe.modules if isinstance(m, ProcessColour) ] if len(mappers) > 0: #return the first mapper we find return mappers[0] else: return None def OpenChannel(self, filename='', ds=None, channel_name='', **kwargs): """Open a file - accepts optional keyword arguments for use with files saved as .txt and .mat. These are: FieldNames: a list of names for the fields in the text file or matlab variable. VarName: the name of the variable in the .mat file which contains the data. SkipRows: Number of header rows to skip for txt file data PixelSize: Pixel size if not in nm """ if channel_name == '' or channel_name is None: #select a channel name automatically channel_name = 'Channel%d' % self._extra_chan_num self._extra_chan_num += 1 if ds is None: #load from file ds = self._ds_from_file(filename, **kwargs) #wrap the data source with a mapping so we can fiddle with things #e.g. combining z position and focus mapped_ds = tabular.MappingFilter(ds) if 'PixelSize' in kwargs.keys(): mapped_ds.addVariable('pixelSize', kwargs['PixelSize']) mapped_ds.setMapping('x', 'x*pixelSize') mapped_ds.setMapping('y', 'y*pixelSize') self.addDataSource(channel_name, mapped_ds) def _process_colour(self): """ Locate any colour / channel information and munge it into a format that the colourFilter understands. We currently accept 3 ways of specifying channels: - ratiometric colour, where 'gFrac' is defined to be the ratio between our observation channels - defining a 'probe' column in the input data which gives a channel index for each point - specifying colour ranges in the metadata All of these get munged into the p_dye type entries that the colour filter needs. """ #clear out old colour keys warnings.warn( DeprecationWarning( 'This should not be called (colour now handled by the ProcessColour recipe module)' )) for k in self.mapping.mappings.keys(): if k.startswith('p_'): self.mapping.mappings.pop(k) if 'gFrac' in self.selectedDataSource.keys(): #ratiometric for structure, ratio in self.fluorSpecies.items(): if not ratio is None: self.mapping.setMapping( 'p_%s' % structure, 'exp(-(%f - gFrac)**2/(2*error_gFrac**2))/(error_gFrac*sqrt(2*numpy.pi))' % ratio) else: if 'probe' in self.mapping.keys(): #non-ratiometric (i.e. sequential) colour #color channel is given in 'probe' column self.mapping.setMapping('ColourNorm', '1.0 + 0*probe') for i in range(int(self.mapping['probe'].min()), int(self.mapping['probe'].max() + 1)): self.mapping.setMapping('p_chan%d' % i, '1.0*(probe == %d)' % i) nSeqCols = self.mdh.getOrDefault('Protocol.NumberSequentialColors', 1) if nSeqCols > 1: for i in range(nSeqCols): self.mapping.setMapping('ColourNorm', '1.0 + 0*t') cr = self.mdh['Protocol.ColorRange%d' % i] self.mapping.setMapping('p_chan%d' % i, '(t>= %d)*(t<%d)' % cr) #self.ClearGenerated() def _get_dye_ratios_from_metadata(self): warnings.warn( DeprecationWarning( 'This should not be called (colour now handled by the ProcessColour recipe module)' )) labels = self.mdh.getOrDefault('Sample.Labelling', []) seen_structures = [] for structure, dye in labels: if structure in seen_structures: strucname = structure + '_1' else: strucname = structure seen_structures.append(structure) ratio = dyeRatios.getRatio(dye, self.mdh) if not ratio is None: self.fluorSpecies[strucname] = ratio self.fluorSpeciesDyes[strucname] = dye #self.mapping.setMapping('p_%s' % structure, '(1.0/(ColourNorm*2*numpy.pi*fitError_Ag*fitError_Ar))*exp(-(fitResults_Ag - %f*A)**2/(2*fitError_Ag**2) - (fitResults_Ar - %f*A)**2/(2*fitError_Ar**2))' % (ratio, 1-ratio)) #self.mapping.setMapping('p_%s' % structure, 'exp(-(%f - gFrac)**2/(2*error_gFrac**2))/(error_gFrac*sqrt(2*numpy.pi))' % ratio) def getNeighbourDists(self, forceRetriang=False): from PYME.LMVis import visHelpers if forceRetriang or not 'neighbourDistances' in self.GeneratedMeasures.keys( ): statNeigh = statusLog.StatusLogger( "Calculating mean neighbour distances ...") self.GeneratedMeasures['neighbourDistances'] = np.array( visHelpers.calcNeighbourDists( self.getTriangles(forceRetriang))) return self.GeneratedMeasures['neighbourDistances'] def getTriangles(self, recalc=False): from matplotlib import tri if self.Triangles is None or recalc: statTri = statusLog.StatusLogger("Generating Triangulation ...") self.Triangles = tri.Triangulation( self.colourFilter['x'] + .1 * np.random.normal(size=len(self.colourFilter['x'])), self.colourFilter['y'] + .1 * np.random.normal(size=len(self.colourFilter['x']))) #reset things which will have changed self.edb = None try: self.GeneratedMeasures.pop('neighbourDistances') except KeyError: pass return self.Triangles def getEdb(self): from PYME.Analysis.points.EdgeDB import edges if self.edb is None: self.edb = edges.EdgeDB(self.getTriangles()) return self.edb def getBlobs(self): from PYME.Analysis.points.EdgeDB import edges tri = self.getTriangles() edb = self.getEdb() if self.blobSettings.jittering == 0: self.objIndices = edges.objectIndices( edb.segment(self.blobSettings.distThreshold), self.blobSettings.minSize) self.objects = [ np.vstack((tri.x[oi], tri.y[oi])).T for oi in self.objIndices ] else: from matplotlib import tri ndists = self.getNeighbourDists() x_ = np.hstack([ self['x'] + 0.5 * ndists * np.random.normal(size=ndists.size) for i in range(self.blobSettings.jittering) ]) y_ = np.hstack([ self['y'] + 0.5 * ndists * np.random.normal(size=ndists.size) for i in range(self.blobSettings.jittering) ]) T = tri.Triangulation(x_, y_) edb = edges.EdgeDB(T) objIndices = edges.objectIndices( edb.segment(self.blobSettings.distThreshold), self.blobSettings.minSize) self.objects = [ np.vstack((T.x[oi], T.y[oi])).T for oi in objIndices ] return self.objects, self.blobSettings.distThreshold def GenQuads(self, max_leaf_size=10): from PYME.Analysis.points.QuadTree import pointQT di = max(self.imageBounds.x1 - self.imageBounds.x0, self.imageBounds.y1 - self.imageBounds.y0) numPixels = di / self.QTGoalPixelSize di = self.QTGoalPixelSize * 2**np.ceil(np.log2(numPixels)) self.Quads = pointQT.qtRoot(self.imageBounds.x0, self.imageBounds.x0 + di, self.imageBounds.y0, self.imageBounds.y0 + di) for xi, yi in zip(self['x'], self['y']): self.Quads.insert(pointQT.qtRec(xi, yi, None), max_leaf_size) def measureObjects(self): from PYME.Analysis.points import objectMeasure self.objectMeasures = objectMeasure.measureObjects( self.objects, self.objThreshold) return self.objectMeasures def save_txt(self, outFile, keys=None): if outFile.endswith('.csv'): delim = ', ' else: delim = '\t' if keys is None: keys = self.keys() #nRecords = len(ds[keys[0]]) of = open(outFile, 'w') of.write('#' + delim.join(['%s' % k for k in keys]) + '\n') for row in zip(*[self[k] for k in keys]): of.write(delim.join(['%e' % c for c in row]) + '\n') of.close() def save_hdf(self, filename): self.colourFilter.to_hdf(filename, tablename='Localizations', metadata=self.mdh) def to_recarray(self, keys=None): return self.colourFilter.to_recarray(keys=keys) def toDataFrame(self, keys=None): import pandas as pd if keys is None: keys = self.keys() d = {k: self[k] for k in keys} return pd.DataFrame(d) @property def dtypes(self): return {k: str(self[k, :2].dtype) for k in self.keys()} def _repr_html_(self): import jinja2 TEMPLATE = """ <h3> LMVis.pipeline.Pipeline viewing {{ pipe.filename }} </h3> <br> {{ recipe_svg }} <b> Data Sources: </b> {% for k in pipe.dataSources.keys() %} {% if k != pipe.selectedDataSourceKey %} {{ k }} - [{{ pipe.dataSources[k]|length }} evts], {% endif %} {% endfor %} <b> {{ pipe.selectedDataSourceKey }} - [{{ pipe.dataSources[pipe.selectedDataSourceKey]|length }} evts]</b> <br> <b> Columns: </b> {{ grouped_keys }} """ try: svg = self.recipe.to_svg() except: svg = None fr_keys = [] fe_keys = [] sl_keys = [] st_keys = [] for k in self.keys(): if k.startswith('fitResults'): fr_keys.append(k) elif k.startswith('fitError'): fe_keys.append(k) elif k.startswith('slicesUsed'): sl_keys.append(k) else: st_keys.append(k) grouped_keys = sorted(st_keys) + sorted(fr_keys) + sorted( fe_keys) + sorted(sl_keys) return jinja2.Template(TEMPLATE).render( pipe=self, recipe_svg=svg, grouped_keys=', '.join(grouped_keys))
def test_stats_by_frame(): recipe = Recipe() test_length = 10 x, y = np.meshgrid(range(test_length), range(test_length)) mask = x > test_length/2 # mask out everything but 6, 7, 8, 9 # check 2D recipe.namespace['input'] = ImageStack(data=x) recipe.namespace['mask'] = ImageStack(data=mask) stats_mod = processing.StatisticsByFrame(input_name='input', mask='mask', output_name='output') recipe.add_module(stats_mod) stats = recipe.execute() # check results assert len(stats['mean']) == 1 assert stats['mean'] == 7.5 # test 3D with 2D mask recipe.namespace.clear() x3, y3, z3 = np.meshgrid(range(test_length), range(test_length), range(test_length)) recipe.namespace['input'] = ImageStack(data=z3) # reuse the same mask from before, which will now take the right 4 columns at each slice recipe.namespace['mask'] = ImageStack(data=mask) stats = recipe.execute() # check results np.testing.assert_array_almost_equal(stats['mean'], range(test_length)) # test 3D with 3D mask mask = x3 > test_length / 2 recipe.namespace['mask'] = ImageStack(data=mask) stats = recipe.execute() # check results np.testing.assert_array_almost_equal(stats['mean'], np.ma.masked_array(z3, mask=~(x3 > test_length / 2)).mean(axis=(0, 1))) # test no mask stats_mod.mask = '' stats = recipe.execute() np.testing.assert_array_almost_equal(stats['mean'], np.mean(z3, axis=(0, 1)))
def computeLoop(self): while self._loop_alive: #loop over tasks - we pop each task and then delete it after processing #to keep memory usage down queueURL, taskDescr = self.inputQueue.get() if taskDescr['type'] == 'localization': try: task = remFitBuf.createFitTaskFromTaskDef(taskDescr) res = task() self.resultsQueue.put((queueURL, taskDescr, res)) except: import traceback traceback.print_exc() tb = traceback.format_exc() logger.exception(tb) self.resultsQueue.put( (queueURL, taskDescr, TaskError(taskDescr, tb))) #self.resultsQueue.put((queueURL, taskDescr, None)) elif taskDescr['type'] == 'recipe': from PYME.recipes import Recipe from PYME.recipes import modules try: taskdefRef = taskDescr.get('taskdefRef', None) if taskdefRef: #recipe is defined in a file - go find it recipe_yaml = unifiedIO.read(taskdefRef) else: #recipe is defined in the task recipe_yaml = taskDescr['taskdef']['recipe'] recipe = Recipe.fromYAML(recipe_yaml) #load recipe inputs logging.debug(taskDescr) for key, url in taskDescr['inputs'].items(): logging.debug('RECIPE: loading %s as %s' % (url, key)) recipe.loadInput(url, key) #print recipe.namespace recipe.execute() #save results context = { 'data_root': clusterIO.local_dataroot, 'task_id': taskDescr['id'].split('~')[0] } #update context with file stub and input directory try: principle_input = taskDescr['inputs'][ 'input'] #default input context['file_stub'] = os.path.splitext( os.path.basename(principle_input))[0] context['input_dir'] = unifiedIO.dirname( principle_input) except KeyError: pass try: od = taskDescr['output_dir'] # make sure we have a trailing slash # TODO - this should be fine for most windows use cases, as you should generally # use POSIX urls for the cluster/cluster of one, but might need checking if not od.endswith('/'): od = od + '/' context['output_dir'] = unifiedIO.dirname(od) except KeyError: pass #print taskDescr['inputs'] #print context #abuse outputs as context outputs = taskDescr.get('outputs', None) if not outputs is None: context.update(outputs) #print context, context['input_dir'] recipe.save(context) self.resultsQueue.put((queueURL, taskDescr, True)) except Exception: import traceback traceback.print_exc() tb = traceback.format_exc() logger.exception(tb) self.resultsQueue.put( (queueURL, taskDescr, TaskError(taskDescr, tb)))
def OnFindMixedClusters(self, event=None): """ FindMixedClusters first uses DBSCAN clustering on two color channels separately for denoising purposes, then after having removed noisy points, DBSCAN is run again on both channels combined, and the fraction of clumps containing both colors is determined. """ from PYME.recipes import tablefilters, localisations from PYME.recipes import Recipe import wx chans = self.pipeline.colourFilter.getColourChans() nchan = len(chans) if nchan < 2: raise RuntimeError( 'FindMixedClusters requires at least two color channels') else: selectedChans = [0, 1] #rad_dlg = wx.NumberEntryDialog(None, 'Search Radius For Core Points', 'rad [nm]', 'rad [nm]', 125, 0, 9e9) #rad_dlg.ShowModal() searchRadius = 125.0 #rad_dlg.GetValue() #minPt_dlg = wx.NumberEntryDialog(None, 'Minimum Points To Be Core Point', 'min pts', 'min pts', 3, 0, 9e9) #minPt_dlg.ShowModal() minClumpSize = 3 #minPt_dlg.GetValue() #build a recipe programatically rec = Recipe() #split input according to colour channels rec.add_module( localisations.ExtractTableChannel(rec, inputName='input', outputName='chan0', channel=chans[0])) rec.add_module( localisations.ExtractTableChannel(rec, inputName='input', outputName='chan1', channel=chans[1])) #clump each channel rec.add_module( localisations.DBSCANClustering(rec, inputName='chan0', outputName='chan0_clumped', searchRadius=searchRadius, minClumpSize=minClumpSize)) rec.add_module( localisations.DBSCANClustering(rec, inputName='chan1', outputName='chan1_clumped', searchRadius=searchRadius, minClumpSize=minClumpSize)) #filter unclumped points rec.add_module( tablefilters.FilterTable( rec, inputName='chan0_clumped', outputName='chan0_cleaned', filters={'dbscanClumpID': [.5, sys.maxsize]})) rec.add_module( tablefilters.FilterTable( rec, inputName='chan1_clumped', outputName='chan1_cleaned', filters={'dbscanClumpID': [.5, sys.maxsize]})) #rejoin cleaned datasets rec.add_module( tablefilters.ConcatenateTables(rec, inputName0='chan0_cleaned', inputName1='chan1_cleaned', outputName='joined')) #clump on cleaded and rejoined data rec.add_module( localisations.DBSCANClustering(rec, inputName='joined', outputName='output', searchRadius=searchRadius, minClumpSize=minClumpSize)) rec.namespace[ 'input'] = self.pipeline.output #do it before configuring so that we already have the channe; names populated if not rec.configure_traits(view=rec.pipeline_view, kind='modal'): return #handle cancel #run recipe joined_clumps = rec.execute() joined_clump_IDs = np.unique(joined_clumps['dbscanClumpID']) joined_clump_IDs = joined_clump_IDs[joined_clump_IDs > .5] #reject unclumped points chan0_clump_IDs = np.unique( joined_clumps['dbscanClumpID'][joined_clumps['concatSource'] < .5]) chan0_clump_IDs = chan0_clump_IDs[chan0_clump_IDs > .5] chan1_clump_IDs = np.unique( joined_clumps['dbscanClumpID'][joined_clumps['concatSource'] > .5]) chan1_clump_IDs = chan1_clump_IDs[chan1_clump_IDs > .5] both_chans_IDS = [c for c in chan0_clump_IDs if c in chan1_clump_IDs] n_total_clumps = len(joined_clump_IDs) print('Total clumps: %i' % n_total_clumps) c0Ratio = float(len(chan0_clump_IDs)) / n_total_clumps print('fraction clumps with channel %i present: %f' % (selectedChans[0], c0Ratio)) self.colocalizationRatios['Channel%iin%i%i' % (selectedChans[0], selectedChans[0], selectedChans[1])] = c0Ratio c1Ratio = float(len(chan1_clump_IDs)) / n_total_clumps print('fraction clumps with channel %i present: %f' % (selectedChans[1], c1Ratio)) self.colocalizationRatios['Channel%iin%i%i' % (selectedChans[1], selectedChans[0], selectedChans[1])] = c1Ratio bothChanRatio = float(len(both_chans_IDS)) / n_total_clumps print('fraction of clumps with both channel %i and %i present: %f' % (selectedChans[0], selectedChans[1], bothChanRatio)) self.colocalizationRatios['mixedClumps%i%i' % tuple(selectedChans)] = bothChanRatio self._rec = rec
def OnPairwiseDistanceHistogram(self, event=None): from PYME.recipes import tablefilters, localisations, measurement from PYME.recipes import Recipe import matplotlib.pyplot as plt import wx import os # build a recipe programatically distogram = Recipe() # split input according to colour channels selected distogram.add_module( localisations.ExtractTableChannel(distogram, inputName='input', outputName='chan0', channel='chan0')) distogram.add_module( localisations.ExtractTableChannel(distogram, inputName='input', outputName='chan1', channel='chan0')) # Histogram distogram.add_module( measurement.PairwiseDistanceHistogram(distogram, inputPositions='chan0', inputPositions2='chan1', outputName='output')) distogram.namespace[ 'input'] = self.pipeline.output #do before configuring so that we already have the channel names populated #configure parameters if not distogram.configure_traits(view=distogram.pipeline_view, kind='modal'): return #handle cancel selectedChans = (distogram.modules[-1].inputPositions, distogram.modules[-1].inputPositions2) #run recipe distances = distogram.execute() binsz = (distances['bins'][1] - distances['bins'][0]) self.pairwiseDistances[selectedChans] = { 'counts': np.array(distances['counts']), 'bins': np.array(distances['bins'] + 0.5 * binsz) } plt.figure() plt.bar(self.pairwiseDistances[selectedChans]['bins'] - 0.5 * binsz, self.pairwiseDistances[selectedChans]['counts'], width=binsz) hist_dlg = wx.FileDialog( None, message="Save histogram as csv...", # defaultDir=os.getcwd(), defaultFile='disthist_{}.csv'.format( os.path.basename(self.pipeline.filename)), wildcard='CSV (*.csv)|*.csv', style=wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT) if hist_dlg.ShowModal() == wx.ID_OK: histfn = hist_dlg.GetPath() np.savetxt(histfn, np.vstack([ self.pairwiseDistances[selectedChans]['bins'] - 0.5 * binsz, self.pairwiseDistances[selectedChans]['counts'] ]).T, delimiter=',', header='Bins [nm],Counts')