def _clear_job_results(*, jobs, incomplete_only=True): for job in jobs: val = mt.getValue(key=job) if val: if (not incomplete_only) or (val.startswith('in-process')) or ( val.startswith('error')): print('Clearing job: ' + job['label']) mt.setValue(key=job, value=None)
def _download_recordings(*, jobs): for _, job in enumerate(jobs): val = mt.getValue(key=job) if not val: if 'recording' in job: if 'directory' in job['recording']: dsdir = job['recording']['directory'] fname = dsdir + '/raw.mda' print('REALIZING FILE: ' + fname) mt.realizeFile(path=fname)
def _run_job(job): val = mt.getValue(key=job) if val: return code = ''.join(random.choice(string.ascii_uppercase) for x in range(10)) if not mt.setValue(key=job, value='in-process-' + code, overwrite=False): return status = dict(time_started=_make_timestamp(), status='running') _set_job_status(job, status) print('Running job: ' + job['label']) try: result = _do_run_job(job) except: status['time_finished'] = _make_timestamp() status['status'] = 'error' status['error'] = 'Exception in _do_run_job' val = mt.getValue(key=job) if val == 'in-process-' + code: _set_job_status(job, status) raise val = mt.getValue(key=job) if val != 'in-process-' + code: print( 'Not saving result because in-process code does not match {} <> {}.' .format(val, 'in-process-' + code)) return status['time_finished'] = _make_timestamp() status['result'] = result if 'error' in result: print('Error running job: ' + result['error']) status['status'] = 'error' status['error'] = result['error'] _set_job_status(job, status) mt.setValue(key=job, value='error-' + code) return status['status'] = 'finished' mt.saveObject( key=job, object=result ) # Not needed in future, because we should instead use the status object
def initialize(self): #self._resource_names = ca.getSubKeys(key=dict(name='spikeforest_results')) obj = mt.getValue(key=dict(name='compute_resources'), subkey='-', parse_json=True) if obj: self._resource_names = list(obj.keys()) else: self._resource_names = ['none-found'] self._SEL_resource_name.setOptions([''] + self._resource_names) self._on_resource_name_changed(value=self._SEL_resource_name.value())
def main(): parser = argparse.ArgumentParser( description= 'Generate unit detail data (including spikesprays) for website') parser.add_argument( '--output_ids', help= 'Comma-separated list of IDs of the analysis outputs to include in the website.', required=False, default=None) args = parser.parse_args() use_slurm = True mt.configDownloadFrom(['spikeforest.kbucket']) if args.output_ids is not None: output_ids = args.output_ids.split(',') else: output_ids = [ 'paired_boyden32c', 'paired_crcns', 'paired_mea64c', 'paired_kampff', 'paired_monotrode', 'synth_monotrode', # 'synth_bionet', 'synth_magland', 'manual_franklab', 'synth_mearec_neuronexus', 'synth_mearec_tetrode', 'synth_visapy', 'hybrid_janelia' ] print('Using output ids: ', output_ids) print( '******************************** LOADING ANALYSIS OUTPUT OBJECTS...') for output_id in output_ids: slurm_working_dir = 'tmp_slurm_job_handler_' + _random_string(5) job_handler = mlpr.SlurmJobHandler(working_dir=slurm_working_dir) if use_slurm: job_handler.addBatchType(name='default', num_workers_per_batch=20, num_cores_per_job=1, time_limit_per_batch=1800, use_slurm=True, additional_srun_opts=['-p ccm']) else: job_handler.addBatchType( name='default', num_workers_per_batch=multiprocessing.cpu_count(), num_cores_per_job=1, use_slurm=False) with mlpr.JobQueue(job_handler=job_handler) as JQ: print('=============================================', output_id) print('Loading output object: {}'.format(output_id)) output_path = ( 'key://pairio/spikeforest/spikeforest_analysis_results.{}.json' ).format(output_id) obj = mt.loadObject(path=output_path) # studies = obj['studies'] # study_sets = obj.get('study_sets', []) # recordings = obj['recordings'] sorting_results = obj['sorting_results'] print( 'Determining sorting results to process ({} total)...'.format( len(sorting_results))) sorting_results_to_process = [] for sr in sorting_results: key = dict(name='unit-details-v0.1.0', recording_directory=sr['recording']['directory'], firings_true=sr['firings_true'], firings=sr['firings']) val = mt.getValue(key=key, collection='spikeforest') if not val: sr['key'] = key sorting_results_to_process.append(sr) print('Need to process {} of {} sorting results'.format( len(sorting_results_to_process), len(sorting_results))) recording_directories_to_process = sorted( list( set([ sr['recording']['directory'] for sr in sorting_results_to_process ]))) print('{} recording directories to process'.format( len(recording_directories_to_process))) print('Filtering recordings...') filter_jobs = FilterTimeseries.createJobs([ dict(recording_directory=recdir, timeseries_out={'ext': '.mda'}, _timeout=600) for recdir in recording_directories_to_process ]) filter_results = [job.execute() for job in filter_jobs] JQ.wait() filtered_timeseries_by_recdir = dict() for i, recdir in enumerate(recording_directories_to_process): result0 = filter_results[i] if result0.retcode != 0: raise Exception( 'Problem computing filtered timeseries for recording: {}' .format(recdir)) filtered_timeseries_by_recdir[recdir] = result0.outputs[ 'timeseries_out'] print('Creating spike sprays...') for sr in sorting_results_to_process: rec = sr['recording'] study_name = rec['study'] rec_name = rec['name'] sorter_name = sr['sorter']['name'] print('====== COMPUTING {}/{}/{}'.format( study_name, rec_name, sorter_name)) if sr.get('comparison_with_truth', None) is not None: cwt = mt.loadObject( path=sr['comparison_with_truth']['json']) filtered_timeseries = filtered_timeseries_by_recdir[ rec['directory']] spike_spray_job_objects = [] list0 = list(cwt.values()) for _, unit in enumerate(list0): # print('') # print('=========================== {}/{}/{} unit {} of {}'.format(study_name, rec_name, sorter_name, ii + 1, len(list0))) # ssobj = create_spikesprays(rx=rx, sx_true=sx_true, sx_sorted=sx, neighborhood_size=neighborhood_size, num_spikes=num_spikes, unit_id_true=unit['unit_id'], unit_id_sorted=unit['best_unit']) spike_spray_job_objects.append( dict(args=dict( recording_directory=rec['directory'], filtered_timeseries=filtered_timeseries, firings_true=os.path.join( rec['directory'], 'firings_true.mda'), firings_sorted=sr['firings'], unit_id_true=unit['unit_id'], unit_id_sorted=unit['best_unit'], json_out={'ext': '.json'}, _container='default', _timeout=180), study_name=study_name, rec_name=rec_name, sorter_name=sorter_name, unit=unit)) spike_spray_jobs = CreateSpikeSprays.createJobs( [obj['args'] for obj in spike_spray_job_objects]) spike_spray_results = [ job.execute() for job in spike_spray_jobs ] sr['spike_spray_job_objects'] = spike_spray_job_objects sr['spike_spray_results'] = spike_spray_results JQ.wait() for sr in sorting_results_to_process: rec = sr['recording'] study_name = rec['study'] rec_name = rec['name'] sorter_name = sr['sorter']['name'] print('====== SAVING {}/{}/{}'.format(study_name, rec_name, sorter_name)) if sr.get('comparison_with_truth', None) is not None: spike_spray_job_objects = sr['spike_spray_job_objects'] spike_spray_results = sr['spike_spray_results'] unit_details = [] ok = True for i, result in enumerate(spike_spray_results): obj0 = spike_spray_job_objects[i] if result.retcode != 0: print('WARNING: Error creating spike sprays for job:') print(spike_spray_job_objects[i]) ok = False break ssobj = mt.loadObject(path=result.outputs['json_out']) if ssobj is None: raise Exception( 'Problem loading spikespray object output.') address = mt.saveObject(object=ssobj, upload_to='spikeforest.kbucket') unit = obj0['unit'] unit_details.append( dict(studyName=obj0['study_name'], recordingName=obj0['rec_name'], sorterName=obj0['sorter_name'], trueUnitId=unit['unit_id'], sortedUnitId=unit['best_unit'], spikeSprayUrl=mt.findFile( path=address, remote_only=True, download_from='spikeforest.kbucket'), _container='default')) if ok: mt.saveObject(collection='spikeforest', key=sr['key'], object=unit_details, upload_to='spikeforest.public')
def main(): from mountaintools import client as mt parser = argparse.ArgumentParser( description= 'Generate unit detail data (including spikesprays) for website') parser.add_argument('analysis_path', help='assembled analysis file (output.json)') parser.add_argument( '--studysets', help='Comma-separated list of study set names to include', required=False, default=None) parser.add_argument('--force-run', help='Force rerunning of processing', action='store_true') parser.add_argument( '--force-run-all', help='Force rerunning of processing including filtering', action='store_true') parser.add_argument('--parallel', help='Optional number of parallel jobs', required=False, default='0') parser.add_argument('--slurm', help='Path to slurm config file', required=False, default=None) parser.add_argument('--cache', help='The cache database to use', required=False, default=None) parser.add_argument('--job-timeout', help='Timeout for processing jobs', required=False, default=600) parser.add_argument('--log-file', help='Log file for analysis progress', required=False, default=None) parser.add_argument( '--force-regenerate', help= 'Whether to force regenerating spike sprays (for when code has changed)', action='store_true') parser.add_argument('--test', help='Whether to just test by running only 1', action='store_true') args = parser.parse_args() mt.configDownloadFrom(['spikeforest.kbucket']) with open(args.analysis_path, 'r') as f: analysis = json.load(f) if args.studysets is not None: studyset_names = args.studysets.split(',') print('Using study sets: ', studyset_names) else: studyset_names = None study_sets = analysis['StudySets'] sorting_results = analysis['SortingResults'] studies_to_include = [] for ss in study_sets: if (studyset_names is None) or (ss['name'] in studyset_names): for study in ss['studies']: studies_to_include.append(study['name']) print('Including studies:', studies_to_include) print('Determining sorting results to process ({} total)...'.format( len(sorting_results))) sorting_results_to_process = [] sorting_results_to_consider = [] for sr in sorting_results: study_name = sr['studyName'] if study_name in studies_to_include: if 'firings' in sr: if sr.get('comparisonWithTruth', None) is not None: sorting_results_to_consider.append(sr) key = dict(name='unit-details-v0.1.0', recording_directory=sr['recordingDirectory'], firings_true=sr['firingsTrue'], firings=sr['firings']) val = mt.getValue(key=key, collection='spikeforest') if (not val) or (args.force_regenerate): sr['key'] = key sorting_results_to_process.append(sr) if args.test and len(sorting_results_to_process) > 0: sorting_results_to_process = [sorting_results_to_process[0]] print('Need to process {} of {} sorting results'.format( len(sorting_results_to_process), len(sorting_results_to_consider))) recording_directories_to_process = sorted( list( set([ sr['recordingDirectory'] for sr in sorting_results_to_process ]))) print('{} recording directories to process'.format( len(recording_directories_to_process))) if int(args.parallel) > 0: job_handler = hither.ParallelJobHandler(int(args.parallel)) elif args.slurm: with open(args.slurm, 'r') as f: slurm_config = json.load(f) job_handler = hither.SlurmJobHandler(working_dir='tmp_slurm', **slurm_config['cpu']) else: job_handler = None print('Filtering recordings...') filter_results = [] with hither.config(container='default', cache=args.cache, force_run=args.force_run_all, job_handler=job_handler, log_path=args.log_file, exception_on_fail=True, cache_failing=False, rerun_failing=True, job_timeout=args.job_timeout), hither.job_queue(): for recdir in recording_directories_to_process: result = filter_recording.run(recording_directory=recdir, timeseries_out=hither.File()) filter_results.append(result) filtered_timeseries_by_recdir = dict() for i, recdir in enumerate(recording_directories_to_process): result0 = filter_results[i] if not result0.success: raise Exception( 'Problem computing filtered timeseries for recording: {}'. format(recdir)) filtered_timeseries_by_recdir[ recdir] = result0.outputs.timeseries_out._path print('Creating spike sprays...') with hither.config(container='default', cache=args.cache, force_run=args.force_run or args.force_run_all, job_handler=job_handler, log_path=args.log_file, exception_on_fail=True, cache_failing=False, rerun_failing=True, job_timeout=args.job_timeout), hither.job_queue(): for sr in sorting_results_to_process: recdir = sr['recordingDirectory'] study_name = sr['studyName'] rec_name = sr['recordingName'] sorter_name = sr['sorterName'] print('====== COMPUTING {}/{}/{}'.format(study_name, rec_name, sorter_name)) cwt = ka.load_object(path=sr['comparisonWithTruth']['json']) filtered_timeseries = filtered_timeseries_by_recdir[recdir] spike_spray_results = [] list0 = list(cwt.values()) for _, unit in enumerate(list0): result = create_spike_sprays.run( recording_directory=recdir, filtered_timeseries=filtered_timeseries, firings_true=os.path.join(recdir, 'firings_true.mda'), firings_sorted=sr['firings'], unit_id_true=unit['unit_id'], unit_id_sorted=unit['best_unit'], json_out=hither.File()) setattr(result, 'unit', unit) spike_spray_results.append(result) sr['spike_spray_results'] = spike_spray_results for sr in sorting_results_to_process: recdir = sr['recordingDirectory'] study_name = sr['studyName'] rec_name = sr['recordingName'] sorter_name = sr['sorterName'] print('====== SAVING {}/{}/{}'.format(study_name, rec_name, sorter_name)) spike_spray_results = sr['spike_spray_results'] key = sr['key'] unit_details = [] ok = True for i, result in enumerate(spike_spray_results): if not result.success: print( 'WARNING: Error creating spike sprays for {}/{}/{}'.format( study_name, rec_name, sorter_name)) ok = False break ssobj = ka.load_object(result.outputs.json_out._path) if ssobj is None: raise Exception('Problem loading spikespray object output.') address = mt.saveObject(object=ssobj, upload_to='spikeforest.kbucket') unit = getattr(result, 'unit') unit_details.append( dict(studyName=study_name, recordingName=rec_name, sorterName=sorter_name, trueUnitId=unit['unit_id'], sortedUnitId=unit['best_unit'], spikeSprayUrl=mt.findFile( path=address, remote_only=True, download_from='spikeforest.kbucket'))) if ok: mt.saveObject(collection='spikeforest', key=key, object=unit_details, upload_to='spikeforest.public')
def _test1(ii): key = dict(test='key3') val0 = '{}'.format(ii) mt.setValue(key=key, value=val0) val1 = mt.getValue(key=key) return val1
from mountaintools import client as mt print('===================') # Local key/value store for associating relatively short strings (<=80 characters) with arbitrary keys (strings or dicts) # Setting values (these should be short strings, <=80 characters) mt.setValue(key='some-key1', value='hello 1') mt.setValue(key=dict(name='some_name', number=2), value='hello 2') # Getting values val1 = mt.getValue(key='some-key1') val2 = mt.getValue(key=dict(name='some_name', number=2)) print(val1) print(val2) print('===================') # Setting password-protected values mt.setValue(key='some_key2', password='******', value='the-secret-*y$#a') # Retrieving password-protected values print(mt.getValue(key='some_key2', password='******')) print('===================') # Local storage of data and files, retrievable by SHA-1 hash path = mt.saveText('This is some text', basename='test.txt') print(path) # Output: sha1://482cb0cfcbed6740a2bcb659c9ccc22a4d27b369/test.txt