def _init_from_file(self, path: str, *, original_path: str, kwargs: dict): if MdaSortingExtractor.can_read(firings_file=path): if 'paramsPath' in kwargs: params = ka.load_object(kwargs['paramsPath']) samplerate = params['samplerate'] elif 'samplerate' in kwargs: samplerate = kwargs['samplerate'] else: raise Exception('Missing argument: samplerate or paramsPath') self._sorting = MdaSortingExtractor(firings_file=path, samplerate=samplerate) else: try: obj = ka.load_object(path) except: obj = None if obj is not None: if 'firings' in obj: if 'paramsPath' in kwargs: params = ka.load_object(kwargs['paramsPath']) samplerate = params['samplerate'] elif 'samplerate' in kwargs: samplerate = kwargs['samplerate'] elif 'samplerate' in obj: samplerate = obj['samplerate'] else: raise Exception( 'Missing argument: samplerate or paramsPath') self._sorting = MdaSortingExtractor( firings_file=obj['firings'], samplerate=samplerate) if not self._sorting: raise Exception('Unsupported format for {} of size {}'.format( path, os.path.getsize(path)))
def javascript_state_changed(self, prev_state, state): self._set_status('running', 'Running Analysis') path = state.get('path', None) studySetsPath = state.get('studySetsPath', None) if path is not None: self._set_status('running', 'Loading object: {}'.format(path)) obj = ka.load_object(path=path, fr='default_readonly') if not obj: self._set_error('Unable to load object: {}'.format(path)) elif studySetsPath is not None: self._set_status('running', 'Loading object: {}'.format(studySetsPath)) obj = ka.load_object(path=studySetsPath, fr='default_readonly') if not obj: self._set_error('Unable to load object: {}'.format(path)) else: self._set_error('Missing required prop: path or studySetsPath') return # delete this because it takes a long time to transfer if 'StudyAnalysisResults' in obj: del obj['StudyAnalysisResults'] self._set_state(object=obj) self._set_status('finished', 'Finished Analysis')
def spikeinterface_recording_dict_to_labbox_dict(x): c = x['class'] if c == 'spiketoolkit.preprocessing.bandpass_filter.BandpassFilterRecording': kwargs = x['kwargs'] recording = spikeinterface_recording_dict_to_labbox_dict( kwargs['recording']) freq_min = kwargs['freq_min'] freq_max = kwargs['freq_max'] freq_wid = kwargs['freq_wid'] return _make_json_safe({ 'recording_format': 'filtered', 'data': { 'filters': [{ 'type': 'bandpass_filter', 'freq_min': freq_min, 'freq_max': freq_max, 'freq_wid': freq_wid }], 'recording': recording } }) elif c == 'spikeextractors.subrecordingextractor.SubRecordingExtractor': kwargs = x['kwargs'] recording = spikeinterface_recording_dict_to_labbox_dict( kwargs['parent_recording']) channel_ids = kwargs['channel_ids'] renamed_channel_ids = kwargs.get('renamed_channel_ids', None) start_frame = kwargs['start_frame'] end_frame = kwargs['end_frame'] if renamed_channel_ids is not None: raise Exception('renamed_channel_ids field not supported') return _make_json_safe({ 'recording_format': 'subrecording', 'data': { 'recording': recording, 'channel_ids': channel_ids, 'start_frame': start_frame, 'end_frame': end_frame } }) elif c == 'spikeextractors.extractors.mdaextractors.mdaextractors.MdaRecordingExtractor': kwargs = x['kwargs'] path = kwargs['folder_path'] raw_path = ka.store_file(path + '/raw.mda') params_path = path + '/params.json' geom_path = path + '/geom.csv' params = ka.load_object(params_path) assert params is not None, f'Unable to load params.json from: {params_path}' geom = _load_geom_from_csv(geom_path) return _make_json_safe({ 'recording_format': 'mda', 'data': { 'raw': raw_path, 'geom': geom, 'params': params } }) else: raise Exception(f'Unsupported class: {c}')
def _internal_deserialize_result(obj): import kachery as ka result = Result() result.runtime_info = obj['runtime_info'] result.runtime_info['console_out'] = ka.load_object( result.runtime_info.get('console_out', '')) if result.runtime_info['console_out'] is None: return None output_files = obj['output_files'] for oname, path in output_files.items(): if path is not None: path2 = ka.load_file(path) if path2 is None: print('Unable to find file when deserializing result.') return None else: path2 = None setattr(result.outputs, oname, File(path2)) result._output_names.append(oname) result.retval = obj['retval'] result.success = obj.get('success', False) result.version = obj.get('version', None) result.container = obj.get('container', None) result.hash_object = obj['hash_object'] result.status = obj['status'] return result
def __init__(self, *, recording_directory=None, timeseries_path=None, download=False, samplerate=None, geom=None, geom_path=None, params_path=None): RecordingExtractor.__init__(self) if recording_directory: timeseries_path = recording_directory + '/raw.mda' geom_path = recording_directory + '/geom.csv' params_path = recording_directory + '/params.json' self._timeseries_path = timeseries_path if params_path: self._dataset_params = ka.load_object(params_path) self._samplerate = self._dataset_params['samplerate'] else: self._dataset_params = dict(samplerate=samplerate) self._samplerate = samplerate if download: path0 = ka.load_file(path=self._timeseries_path) if not path0: raise Exception('Unable to realize file: ' + self._timeseries_path) self._timeseries_path = path0 self._timeseries = DiskReadMda(self._timeseries_path) if self._timeseries is None: raise Exception('Unable to load timeseries: {}'.format( self._timeseries_path)) X = self._timeseries if geom is not None: self._geom = geom elif geom_path: geom_path2 = ka.load_file(geom_path) self._geom = np.genfromtxt(geom_path2, delimiter=',') else: self._geom = np.zeros((X.N1(), 2)) if self._geom.shape[0] != X.N1(): # raise Exception( # 'Incompatible dimensions between geom.csv and timeseries file {} <> {}'.format(self._geom.shape[0], X.N1())) print( 'WARNING: Incompatible dimensions between geom.csv and timeseries file {} <> {}' .format(self._geom.shape[0], X.N1())) self._geom = np.zeros((X.N1(), 2)) self._hash = ka.get_object_hash( dict(timeseries=ka.get_file_hash(self._timeseries_path), samplerate=self._samplerate, geom=_json_serialize(self._geom))) self._num_channels = X.N1() self._num_timepoints = X.N2() for m in range(self._num_channels): self.set_channel_property(m, 'location', self._geom[m, :])
def patch_recording_geom(recording, geom_fname): print(f'PATCHING geom for recording: {recording["name"]}') geom_info = ka.get_file_info(geom_fname) x = recording['directory'] y = ka.store_dir(x).replace('sha1dir://', 'sha1://') obj = ka.load_object(y) obj['files']['geom.csv'] = dict(size=geom_info['size'], sha1=geom_info['sha1']) x2 = ka.store_object(obj) recording['directory'] = 'sha1dir://' + ka.get_file_hash(x2) + '.patched'
def trueUnitsInfo(self, format: str = 'dataframe'): if not self._summary_result: return None B = ka.load_object(self._summary_result['true_units_info']) if format == 'json': return B elif format == 'dataframe': return pd.DataFrame(B) else: raise Exception('Invalid format: ' + format)
def main(): thisdir = os.path.dirname(os.path.realpath(__file__)) studysets_obj_path = ka.load_text(thisdir + '/../../recordings/studysets') with ka.config(fr='default_readonly'): studysets_obj = ka.load_object(path=studysets_obj_path) # studysets_obj['StudySets'] new_study_sets = [] for ss in studysets_obj['StudySets']: if ss['name'] != 'PAIRED_ENGLISH': new_study_sets.append(ss) studyset_obj_path = thisdir + '/../../recordings/PAIRED_ENGLISH/PAIRED_ENGLISH.json' studyset_obj = ka.load_object(studyset_obj_path) assert studyset_obj is not None, f'Missing file: {studyset_obj_path}' new_study_sets.append(studyset_obj) studysets_obj['StudySets'] = new_study_sets with ka.config(fr='default_readwrite'): studysets_obj_path = ka.store_object(studysets_obj, basename='studysets.json') with open(thisdir + '/../../recordings/studysets', 'w') as f: f.write(studysets_obj_path)
def __init__(self, file_path): import h5_to_json as h5j se.RecordingExtractor.__init__(self) X = ka.load_object(file_path) X = h5j.hierarchy(X) self._timeseries = h5j.get_value(X['root']['acquisition']['ElectricalSeries']['_datasets']['data'], use_kachery=True, lazy=True) self._sampling_frequency = 30000 # hard-coded for now -- TODO: need to get this from the file self._geom = None # TODO: need to get this from the file if self._geom is not None: for m in range(self._timeseries.shape[0]): self.set_channel_property(m, 'location', self._geom[m, :])
def __init__(self, arg, download=False): super().__init__() self._hash = None if isinstance(arg, str): arg = dict(path=arg) if isinstance(arg, se.RecordingExtractor): self._recording = arg else: self._recording = None # filters if ('recording' in arg) and ('filters' in arg): recording1 = AutoRecordingExtractor(arg['recording']) self._recording = self._apply_filters(recording1, arg['filters']) return if 'kachery_config' in arg: ka.set_config(**arg['kachery_config']) path = arg.get('path', '') if 'nwb_path' in arg: self._recording = NwbElectricalSeriesRecordingExtractor( path=path, nwb_path=arg['nwb_path']) elif path.endswith('.mda'): if 'samplerate' not in arg: raise Exception('Missing argument: samplerate') samplerate = arg['samplerate'] self._recording = MdaRecordingExtractor(timeseries_path=path, samplerate=samplerate, download=download) hash0 = _sha1_of_object( dict(timeseries_sha1=ka.get_file_info( path, algorithm='sha1')['sha1'], samplerate=samplerate)) setattr(self, 'hash', hash0) elif path.endswith('.nwb.json'): self._recording = NwbJsonRecordingExtractor(file_path=path) hash0 = ka.get_file_info(path)['sha1'] setattr(self, 'hash', hash0) elif path.endswith('.json') and (not path.endswith('.nwb.json')): obj = ka.load_object(path) if ('raw' in obj) and ('params' in obj) and ('geom' in obj): self._recording = MdaRecordingExtractor( timeseries_path=obj['raw'], samplerate=obj['params']['samplerate'], geom=np.array(obj['geom'])) else: raise Exception('Problem initializing recording extractor') elif ka.get_file_info(path + '/raw.mda'): self._recording = MdaRecordingExtractor( recording_directory=path, download=download) else: raise Exception('Unable to initialize recording extractor.') self.copy_channel_properties(recording=self._recording)
def load_object(uri: str, p2p: bool = True, from_node: Union[str, None] = None, from_channel: Union[str, None] = None): local_path = load_file(uri, p2p=p2p, from_node=from_node, from_channel=from_channel) if local_path is None: return None return ka.load_object(uri)
def register_recording(*, recdir, output_fname, label, to): with ka.config(to=to): raw_path = ka.store_file(recdir + '/raw.mda') obj = dict(raw=raw_path, params=ka.load_object(recdir + '/params.json'), geom=np.genfromtxt(ka.load_file(recdir + '/geom.csv'), delimiter=',').tolist()) obj['self_reference'] = ka.store_object( obj, basename='{}.json'.format(label)) with open(output_fname, 'w') as f: json.dump(obj, f, indent=4)
def comparisonWithTruth(self, *, format: str = 'dataframe'): A = self._obj['comparison_with_truth'] if not A: return None B = ka.load_object(A['json']) if format == 'json': return B elif format == 'dataframe': return pd.DataFrame(B).transpose() else: raise Exception('Invalid format: ' + format)
def _init_from_file(self, path: str, *, original_path: str, kwargs: dict): if original_path.endswith('.mda'): if 'paramsPath' in kwargs: params = ka.load_object(kwargs['paramsPath']) samplerate = params['samplerate'] elif 'samplerate' in kwargs: samplerate = kwargs['samplerate'] else: raise Exception('Missing argument: samplerate or paramsPath') self._sorting = MdaSortingExtractor(firings_file=path, samplerate=samplerate) else: raise Exception('Unsupported format for {}'.format(original_path))
def test_sort(sorter_name, min_avg_accuracy, recording_path, sorting_true_path, num_jobs=1, job_handler=None, container='default'): from spikeforest2 import sorters from spikeforest2 import processing import hither_sf as hither import kachery as ka # for now, in this test, don't use gpu for irc gpu = sorter_name in ['kilosort2', 'kilosort', 'tridesclous', 'ironclust'] sorting_results = [] with ka.config(fr='default_readonly'): with hither.config(container=container, gpu=gpu, job_handler=job_handler), hither.job_queue(): sorter = getattr(sorters, sorter_name) for _ in range(num_jobs): sorting_result = sorter.run(recording_path=recording_path, sorting_out=hither.File()) sorting_results.append(sorting_result) assert sorting_result.success sorting_result = sorting_results[0] with ka.config(fr='default_readonly'): with hither.config(container='default', gpu=False): compare_result = processing.compare_with_truth.run( sorting_path=sorting_result.outputs.sorting_out, sorting_true_path=sorting_true_path, json_out=hither.File()) assert compare_result.success obj = ka.load_object(compare_result.outputs.json_out._path) aa = _average_accuracy(obj) print(F'AVERAGE-ACCURACY: {aa}') assert aa >= min_avg_accuracy, f"Average accuracy is lower than expected {aa} < {min_avg_accuracy}" print('Passed.')
def javascript_state_changed(self, prev_state, state): self._set_status('running', 'Running Analysis') path = state.get('path', None) if not path: self._set_error('Missing path') return self._set_status('running', 'Loading object: {}'.format(path)) obj = ka.load_object(path=path, fr='default_readonly') if not obj: self._set_error('Unable to load object: {}'.format(path)) # delete this because it takes a long time to transfer del obj['StudyAnalysisResults'] self._set_state(object=obj) self._set_status('finished', 'Finished Analysis')
def javascript_state_changed(self, prev_state, state): self._set_status('running', 'Running SpikeForestAnalysis') path = state.get('path', None) if not path: self._set_error('Missing path') return self._set_status('running', 'Loading object: {}'.format(path)) obj = ka.load_object(path=path, fr='default_readonly') if not obj: self._set_error('Unable to load object: {}'.format(path)) # For now don't load the bulk of the object if 'StudyAnalysisResults' in obj: del obj['StudyAnalysisResults'] self._set_state(object=obj) self._set_status('finished', 'Finished SpikeForestAnalysis')
# Version: 0.1.5-w1 # Download the data (if needed) ka.set_config(fr='default_readonly') ka.load_file(recording_path + '/raw.mda') # Run the spike sorting with hither.config(container='docker://magland/sf-kilosort2:0.1.5', gpu=gpu): sorting_result = sorter.run(recording_path=recording_path, sorting_out=hither.File(), **params) assert sorting_result.success sorting_path = sorting_result.outputs.sorting_out # Compare with ground truth with hither.config(container='default'): compare_result = processing.compare_with_truth.run( sorting_path=sorting_path, sorting_true_path=sorting_true_path, json_out=hither.File()) assert compare_result.success obj = ka.load_object(compare_result.outputs.json_out._path) accuracies = [float(obj[i]['accuracy']) for i in obj.keys()] print('ACCURACIES:') print(accuracies) print('') average_accuracy = np.mean(accuracies) print('AVERAGE-ACCURACY:', average_accuracy)
def main(): from spikeforest2 import sorters from spikeforest2 import processing parser = argparse.ArgumentParser( description='Run the SpikeForest2 main analysis') # parser.add_argument('analysis_file', help='Path to the analysis specification file (.json format).') # parser.add_argument('--config', help='Configuration file', required=True) # parser.add_argument('--output', help='Analysis output file (.json format)', required=True) # parser.add_argument('--slurm', help='Optional SLURM configuration file (.json format)', required=False, default=None) # parser.add_argument('--verbose', help='Provide some additional verbose output.', action='store_true') parser.add_argument( 'spec', help='Path to the .json file containing the analysis specification') parser.add_argument('--output', '-o', help='The output .json file', required=True) parser.add_argument('--force-run', help='Force rerunning of all spike sorting', action='store_true') parser.add_argument( '--force-run-all', help='Force rerunning of all spike sorting and other processing', action='store_true') parser.add_argument('--parallel', help='Optional number of parallel jobs', required=False, default='0') parser.add_argument('--slurm', help='Path to slurm config file', required=False, default=None) parser.add_argument('--cache', help='The cache database to use', required=False, default=None) parser.add_argument('--rerun-failing', help='Rerun sorting jobs that previously failed', action='store_true') parser.add_argument('--test', help='Only run a few.', action='store_true') parser.add_argument('--job-timeout', help='Timeout for sorting jobs', required=False, default=600) parser.add_argument('--log-file', help='Log file for analysis progress', required=False, default=None) args = parser.parse_args() force_run_all = args.force_run_all # the following apply to sorting jobs only force_run = args.force_run or args.force_run_all job_timeout = float(args.job_timeout) cache_failing = True rerun_failing = args.rerun_failing with open(args.spec, 'r') as f: spec = json.load(f) # clear the log file if args.log_file is not None: with open(args.log_file, 'w'): pass studysets_path = spec['studysets'] studyset_names = spec['studyset_names'] spike_sorters = spec['spike_sorters'] ka.set_config(fr='default_readonly') print(f'Loading study sets object from: {studysets_path}') studysets_obj = ka.load_object(studysets_path) if not studysets_obj: raise Exception(f'Unable to load: {studysets_path}') all_study_sets = studysets_obj['StudySets'] study_sets = [] for studyset in all_study_sets: if studyset['name'] in studyset_names: study_sets.append(studyset) if int(args.parallel) > 0: job_handler = hither.ParallelJobHandler(int(args.parallel)) job_handler_gpu = job_handler job_handler_ks = job_handler elif args.slurm: with open(args.slurm, 'r') as f: slurm_config = json.load(f) job_handler = hither.SlurmJobHandler(working_dir='tmp_slurm', **slurm_config['cpu']) job_handler_gpu = hither.SlurmJobHandler(working_dir='tmp_slurm', **slurm_config['gpu']) job_handler_ks = hither.SlurmJobHandler(working_dir='tmp_slurm', **slurm_config['ks']) else: job_handler = None job_handler_gpu = None job_handler_ks = None with hither.config(container='default', cache=args.cache, force_run=force_run_all, job_handler=job_handler, log_path=args.log_file), hither.job_queue(): studies = [] recordings = [] for studyset in study_sets: studyset_name = studyset['name'] print(f'================ STUDY SET: {studyset_name}') studies0 = studyset['studies'] if args.test: studies0 = studies0[:1] studyset['studies'] = studies0 for study in studies0: study['study_set'] = studyset_name study_name = study['name'] print(f'======== STUDY: {study_name}') recordings0 = study['recordings'] if args.test: recordings0 = recordings0[:2] study['recordings'] = recordings0 for recording in recordings0: recording['study'] = study_name recording['study_set'] = studyset_name recording['firings_true'] = recording['firingsTrue'] recordings.append(recording) studies.append(study) # Download recordings for recording in recordings: ka.load_file(recording['directory'] + '/raw.mda') ka.load_file(recording['directory'] + '/firings_true.mda') # Attach results objects for recording in recordings: recording['results'] = dict() # Summarize recordings for recording in recordings: recording_path = recording['directory'] sorting_true_path = recording['firingsTrue'] recording['results'][ 'computed-info'] = processing.compute_recording_info.run( _label= f'compute-recording-info:{recording["study"]}/{recording["name"]}', recording_path=recording_path, json_out=hither.File()) recording['results'][ 'true-units-info'] = processing.compute_units_info.run( _label= f'compute-units-info:{recording["study"]}/{recording["name"]}', recording_path=recording_path, sorting_path=sorting_true_path, json_out=hither.File()) # Spike sorting for sorter in spike_sorters: for recording in recordings: if recording['study_set'] in sorter['studysets']: recording_path = recording['directory'] sorting_true_path = recording['firingsTrue'] algorithm = sorter['processor_name'] if not hasattr(sorters, algorithm): raise Exception( f'No such sorting algorithm: {algorithm}') Sorter = getattr(sorters, algorithm) if algorithm in ['ironclust']: gpu = True jh = job_handler_gpu elif algorithm in ['kilosort', 'kilosort2']: gpu = True jh = job_handler_ks else: gpu = False jh = job_handler with hither.config(gpu=gpu, force_run=force_run, exception_on_fail=False, cache_failing=cache_failing, rerun_failing=rerun_failing, job_handler=jh, job_timeout=job_timeout): sorting_result = Sorter.run( _label= f'{algorithm}:{recording["study"]}/{recording["name"]}', recording_path=recording['directory'], sorting_out=hither.File()) recording['results']['sorting-' + sorter['name']] = sorting_result recording['results'][ 'comparison-with-truth-' + sorter['name']] = processing.compare_with_truth.run( _label= f'comparison-with-truth:{algorithm}:{recording["study"]}/{recording["name"]}', sorting_path=sorting_result.outputs.sorting_out, sorting_true_path=sorting_true_path, json_out=hither.File()) recording['results'][ 'units-info-' + sorter['name']] = processing.compute_units_info.run( _label= f'units-info:{algorithm}:{recording["study"]}/{recording["name"]}', recording_path=recording_path, sorting_path=sorting_result.outputs.sorting_out, json_out=hither.File()) # Assemble all of the results print('') print('=======================================================') print('Assembling results...') for recording in recordings: print( f'Assembling recording: {recording["study"]}/{recording["name"]}') recording['summary'] = dict( plots=dict(), computed_info=ka.load_object( recording['results']['computed-info'].outputs.json_out._path), true_units_info=ka.store_file( recording['results'] ['true-units-info'].outputs.json_out._path)) sorting_results = [] for sorter in spike_sorters: for recording in recordings: if recording['study_set'] in sorter['studysets']: print( f'Assembling sorting: {sorter["processor_name"]} {recording["study"]}/{recording["name"]}' ) sorting_result = recording['results']['sorting-' + sorter['name']] comparison_result = recording['results'][ 'comparison-with-truth-' + sorter['name']] units_info_result = recording['results']['units-info-' + sorter['name']] console_out_str = _console_out_to_str( sorting_result.runtime_info['console_out']) console_out_path = ka.store_text(console_out_str) sr = dict( recording=recording, sorter=sorter, firings_true=recording['directory'] + '/firings_true.mda', processor_name=sorter['processor_name'], processor_version=sorting_result.version, execution_stats=dict( start_time=sorting_result.runtime_info['start_time'], end_time=sorting_result.runtime_info['end_time'], elapsed_sec=sorting_result.runtime_info['end_time'] - sorting_result.runtime_info['start_time'], retcode=0 if sorting_result.success else -1, timed_out=sorting_result.runtime_info.get( 'timed_out', False)), container=sorting_result.container, console_out=console_out_path) if sorting_result.success: sr['firings'] = ka.store_file( sorting_result.outputs.sorting_out._path) sr['comparison_with_truth'] = dict(json=ka.store_file( comparison_result.outputs.json_out._path)) sr['sorted_units_info'] = ka.store_file( units_info_result.outputs.json_out._path) else: sr['firings'] = None sr['comparison_with_truth'] = None sr['sorted_units_info'] = None sorting_results.append(sr) # Delete results from recordings for recording in recordings: del recording['results'] # Aggregate sorting results print('') print('=======================================================') print('Aggregating sorting results...') aggregated_sorting_results = aggregate_sorting_results( studies, recordings, sorting_results) # Show output summary for sr in aggregated_sorting_results['study_sorting_results']: study_name = sr['study'] sorter_name = sr['sorter'] n1 = np.array(sr['num_matches']) n2 = np.array(sr['num_false_positives']) n3 = np.array(sr['num_false_negatives']) accuracies = n1 / (n1 + n2 + n3) avg_accuracy = np.mean(accuracies) txt = 'STUDY: {}, SORTER: {}, AVG ACCURACY: {}'.format( study_name, sorter_name, avg_accuracy) print(txt) output_object = dict(studies=studies, recordings=recordings, study_sets=study_sets, sorting_results=sorting_results, aggregated_sorting_results=ka.store_object( aggregated_sorting_results, basename='aggregated_sorting_results.json')) print(f'Writing output to {args.output}...') with open(args.output, 'w') as f: json.dump(output_object, f, indent=4) print('Done.')
def sortedUnitsInfo(self): A = self._obj.get('sorted_units_info', None) if not A: return None return ka.load_object(A)
from spikeforest2_utils.autoextractors.mdaextractors.mdaextractors import MdaRecordingExtractor from spikeforest2_utils import AutoRecordingExtractor, AutoSortingExtractor import kachery as ka import numpy as np import hither_sf as hither ka.set_config(fr='default_readonly') # Load a recording # recdir = 'sha1dir://fb52d510d2543634e247e0d2d1d4390be9ed9e20.synth_magland/datasets_noise10_K10_C4/001_synth' recdir = 'sha1dir://fb52d510d2543634e247e0d2d1d4390be9ed9e20.synth_magland/datasets_noise20_K20_C4/001_synth' # recdir = 'sha1dir://c0879a26f92e4c876cd608ca79192a84d4382868.manual_franklab/tetrode_600s/sorter1_1' recobj = dict( raw=recdir + '/raw.mda', params=ka.load_object(recdir + '/params.json'), geom=np.genfromtxt(ka.load_file(recdir + '/geom.csv'), delimiter=',').tolist() ) assert ka.load_file(recobj['raw']) is not None def main(): import spikeextractors as se from spikeforest2_utils import writemda32, AutoRecordingExtractor from sklearn.neighbors import NearestNeighbors from sklearn.cross_decomposition import PLSRegression import spikeforest_widgets as sw sw.init_electron() # bandpass filter with hither.config(container='default', cache='default_readwrite'):
def main(): from mountaintools import client as mt parser = argparse.ArgumentParser( description= 'Generate unit detail data (including spikesprays) for website') parser.add_argument('analysis_path', help='assembled analysis file (output.json)') parser.add_argument( '--studysets', help='Comma-separated list of study set names to include', required=False, default=None) parser.add_argument('--force-run', help='Force rerunning of processing', action='store_true') parser.add_argument( '--force-run-all', help='Force rerunning of processing including filtering', action='store_true') parser.add_argument('--parallel', help='Optional number of parallel jobs', required=False, default='0') parser.add_argument('--slurm', help='Path to slurm config file', required=False, default=None) parser.add_argument('--cache', help='The cache database to use', required=False, default=None) parser.add_argument('--job-timeout', help='Timeout for processing jobs', required=False, default=600) parser.add_argument('--log-file', help='Log file for analysis progress', required=False, default=None) parser.add_argument( '--force-regenerate', help= 'Whether to force regenerating spike sprays (for when code has changed)', action='store_true') parser.add_argument('--test', help='Whether to just test by running only 1', action='store_true') args = parser.parse_args() mt.configDownloadFrom(['spikeforest.kbucket']) with open(args.analysis_path, 'r') as f: analysis = json.load(f) if args.studysets is not None: studyset_names = args.studysets.split(',') print('Using study sets: ', studyset_names) else: studyset_names = None study_sets = analysis['StudySets'] sorting_results = analysis['SortingResults'] studies_to_include = [] for ss in study_sets: if (studyset_names is None) or (ss['name'] in studyset_names): for study in ss['studies']: studies_to_include.append(study['name']) print('Including studies:', studies_to_include) print('Determining sorting results to process ({} total)...'.format( len(sorting_results))) sorting_results_to_process = [] sorting_results_to_consider = [] for sr in sorting_results: study_name = sr['studyName'] if study_name in studies_to_include: if 'firings' in sr: if sr.get('comparisonWithTruth', None) is not None: sorting_results_to_consider.append(sr) key = dict(name='unit-details-v0.1.0', recording_directory=sr['recordingDirectory'], firings_true=sr['firingsTrue'], firings=sr['firings']) val = mt.getValue(key=key, collection='spikeforest') if (not val) or (args.force_regenerate): sr['key'] = key sorting_results_to_process.append(sr) if args.test and len(sorting_results_to_process) > 0: sorting_results_to_process = [sorting_results_to_process[0]] print('Need to process {} of {} sorting results'.format( len(sorting_results_to_process), len(sorting_results_to_consider))) recording_directories_to_process = sorted( list( set([ sr['recordingDirectory'] for sr in sorting_results_to_process ]))) print('{} recording directories to process'.format( len(recording_directories_to_process))) if int(args.parallel) > 0: job_handler = hither.ParallelJobHandler(int(args.parallel)) elif args.slurm: with open(args.slurm, 'r') as f: slurm_config = json.load(f) job_handler = hither.SlurmJobHandler(working_dir='tmp_slurm', **slurm_config['cpu']) else: job_handler = None print('Filtering recordings...') filter_results = [] with hither.config(container='default', cache=args.cache, force_run=args.force_run_all, job_handler=job_handler, log_path=args.log_file, exception_on_fail=True, cache_failing=False, rerun_failing=True, job_timeout=args.job_timeout), hither.job_queue(): for recdir in recording_directories_to_process: result = filter_recording.run(recording_directory=recdir, timeseries_out=hither.File()) filter_results.append(result) filtered_timeseries_by_recdir = dict() for i, recdir in enumerate(recording_directories_to_process): result0 = filter_results[i] if not result0.success: raise Exception( 'Problem computing filtered timeseries for recording: {}'. format(recdir)) filtered_timeseries_by_recdir[ recdir] = result0.outputs.timeseries_out._path print('Creating spike sprays...') with hither.config(container='default', cache=args.cache, force_run=args.force_run or args.force_run_all, job_handler=job_handler, log_path=args.log_file, exception_on_fail=True, cache_failing=False, rerun_failing=True, job_timeout=args.job_timeout), hither.job_queue(): for sr in sorting_results_to_process: recdir = sr['recordingDirectory'] study_name = sr['studyName'] rec_name = sr['recordingName'] sorter_name = sr['sorterName'] print('====== COMPUTING {}/{}/{}'.format(study_name, rec_name, sorter_name)) cwt = ka.load_object(path=sr['comparisonWithTruth']['json']) filtered_timeseries = filtered_timeseries_by_recdir[recdir] spike_spray_results = [] list0 = list(cwt.values()) for _, unit in enumerate(list0): result = create_spike_sprays.run( recording_directory=recdir, filtered_timeseries=filtered_timeseries, firings_true=os.path.join(recdir, 'firings_true.mda'), firings_sorted=sr['firings'], unit_id_true=unit['unit_id'], unit_id_sorted=unit['best_unit'], json_out=hither.File()) setattr(result, 'unit', unit) spike_spray_results.append(result) sr['spike_spray_results'] = spike_spray_results for sr in sorting_results_to_process: recdir = sr['recordingDirectory'] study_name = sr['studyName'] rec_name = sr['recordingName'] sorter_name = sr['sorterName'] print('====== SAVING {}/{}/{}'.format(study_name, rec_name, sorter_name)) spike_spray_results = sr['spike_spray_results'] key = sr['key'] unit_details = [] ok = True for i, result in enumerate(spike_spray_results): if not result.success: print( 'WARNING: Error creating spike sprays for {}/{}/{}'.format( study_name, rec_name, sorter_name)) ok = False break ssobj = ka.load_object(result.outputs.json_out._path) if ssobj is None: raise Exception('Problem loading spikespray object output.') address = mt.saveObject(object=ssobj, upload_to='spikeforest.kbucket') unit = getattr(result, 'unit') unit_details.append( dict(studyName=study_name, recordingName=rec_name, sorterName=sorter_name, trueUnitId=unit['unit_id'], sortedUnitId=unit['best_unit'], spikeSprayUrl=mt.findFile( path=address, remote_only=True, download_from='spikeforest.kbucket'))) if ok: mt.saveObject(collection='spikeforest', key=key, object=unit_details, upload_to='spikeforest.public')
def main(): parser = argparse.ArgumentParser( description="Prepare SpikeForest recordings (i.e., populate this repository)") parser.add_argument('output_dir', help='The output directory (e.g., recordings)') parser.add_argument('--upload', action='store_true', help='Whether to upload the recording objects to kachery (password required)') # parser.add_argument('--verbose', action='store_true', help='Turn on verbose output') args = parser.parse_args() output_dir = args.output_dir if args.upload: ka.set_config( fr='default_readwrite', to='default_readwrite' ) else: ka.set_config( fr='default_readonly', ) # geom_mearec_neuronexus = np.genfromtxt('mearec_neuronexus_geom.csv', delimiter=',').tolist() mearec_neuronexus_geom_fname = 'mearec_neuronexus_geom.csv' # Load a spikeforest analysis object X = ka.load_object('sha1://b678d798d67b6faa3c6240aca52f3857c9e4b877/analysis.json') # the output directory on the local machine basedir = output_dir if os.path.exists(basedir): raise Exception('Directory already exists: {}'.format(basedir)) if not os.path.exists(basedir): os.mkdir(basedir) studysets_to_include = ['PAIRED_BOYDEN', 'PAIRED_CRCNS_HC1', 'PAIRED_MEA64C_YGER', 'PAIRED_KAMPFF', 'PAIRED_MONOTRODE', 'SYNTH_BIONET', 'SYNTH_MONOTRODE', 'SYNTH_MAGLAND', 'SYNTH_MEAREC_NEURONEXUS', 'SYNTH_MEAREC_TETRODE', 'SYNTH_MONOTRODE', 'SYNTH_VISAPY', 'HYBRID_JANELIA', 'MANUAL_FRANKLAB'] # studysets_to_include = ['PAIRED_CRCNS_HC1', 'PAIRED_MEA64C_YGER', 'PAIRED_KAMPFF', 'PAIRED_MONOTRODE', 'SYNTH_MONOTRODE', 'SYNTH_MAGLAND', 'SYNTH_MEAREC_NEURONEXUS', 'SYNTH_MEAREC_TETRODE', 'SYNTH_MONOTRODE', 'SYNTH_VISAPY', 'HYBRID_JANELIA', 'MANUAL_FRANKLAB'] # These are the files to download within each recording fnames = ['geom.csv', 'params.json', 'raw.mda', 'firings_true.mda'] # fnames = ['geom.csv', 'params.json'] for studyset in X['StudySets']: studyset_name = studyset['name'] if studyset_name in studysets_to_include: print('STUDYSET: {}'.format(studyset['name'])) studysetdir_local = os.path.join(basedir, studyset_name) if not os.path.exists(studysetdir_local): os.mkdir(studysetdir_local) for study in studyset['studies']: study_name = study['name'] print('STUDY: {}/{}'.format(studyset_name, study_name)) studydir_local = os.path.join(studysetdir_local, study_name) if not os.path.exists(studydir_local): os.mkdir(studydir_local) for recording in study['recordings']: if studyset_name == 'SYNTH_MEAREC_NEURONEXUS': patch_recording_geom(recording, mearec_neuronexus_geom_fname) recname = recording['name'] print('RECORDING: {}/{}/{}'.format(studyset_name, study_name, recname)) recdir = recording['directory'] recfile = os.path.join(studydir_local, recname + '.json') obj = dict( raw=recdir + '/raw.mda', params=ka.load_object(recdir + '/params.json'), geom=np.genfromtxt(ka.load_file(recdir + '/geom.csv'), delimiter=',').T ) obj = _json_serialize(obj) obj['self_reference'] = ka.store_object(obj, basename='{}/{}/{}.json'.format(studyset_name, study_name, recname)) with open(recfile, 'w') as f: json.dump(obj, f, indent=4) firings_true_file = os.path.join(studydir_local, recname + '.firings_true.json') obj2 = dict( firings=recdir + '/firings_true.mda' ) obj2['self_reference'] = ka.store_object(obj2, basename='{}/{}/{}.firings_true.json'.format(studyset_name, study_name, recname)) with open(firings_true_file, 'w') as f: json.dump(obj2, f, indent=4) study['self_reference'] = ka.store_object(study, basename='{}.json'.format(study_name)) with open(os.path.join(studydir_local, study_name + '.json'), 'w') as f: json.dump(study, f, indent=4) studyset['self_reference'] = ka.store_object(studyset, basename='{}.json'.format(studyset_name)) with open(os.path.join(studysetdir_local, studyset_name + '.json'), 'w') as f: json.dump(studyset, f, indent=4) studysets_obj = dict( StudySets=X['StudySets'] ) studysets_path = ka.store_object(studysets_obj, basename='studysets.json') with open(os.path.join(basedir, 'studysets'), 'w') as f: f.write(studysets_path)
#We will also try to plot the rastor plot for the ground truth #We will be trying to get a sub set of the recording #recording4 = se.SubRecordingExtractor(parent_recording=recordingInput, channel_ids=[2, 3, 4, 5]) #Plotting a segment of recording #w_ts = sw.plot_timeseries(recording4) w_ts.figure.suptitle("Recording by group") w_ts.ax.set_ylabel("Channel_ids") #Spike Detection #Code for filtering-Bandpass filter recobj = dict( raw=recordingZero['directory'] + '/raw.mda', params=ka.load_object(recordingZero['directory'] + '/params.json'), geom=np.genfromtxt(ka.load_file(recordingZero['directory'] + '/geom.csv'), delimiter=',').tolist()) # bandpass filter rx = AutoRecordingExtractor( recobj) #recobj is our recording data(raw voltage waveforms) rx2 = st.preprocessing.bandpass_filter(recording=rx, freq_min=300, freq_max=3000, freq_wid=1000) #rx3=st.preprocessing.bandpass_filter(recording=recording4,freq_min=300, freq_max=3000, freq_wid=1000) #Just a few segments of the recording detect_threshold = 5 #As obtained in literature
def _assemble_study_analysis_result(*, study_name, study_set_name, recordings, sorting_results, sorter_names): print('Assembling {} {}'.format(study_set_name, study_name)) true_units = dict() recording_names = [] irec = 0 for rec in recordings: if rec['study'] == study_name: recording_names.append(rec['name']) true_units_info = ka.load_object(rec['summary']['true_units_info']) for unit_info in true_units_info: id0 = unit_info['unit_id'] true_units[study_name + '/' + rec['name'] + '/{}'.format(id0)] = dict( unit_id=id0, recording_index=irec, snr=unit_info['snr'], firing_rate=unit_info['firing_rate'], num_events=unit_info['num_events'], sorting_results=dict()) irec = irec + 1 cpu_times_by_sorter = dict() for sorter_name in sorter_names: cpu_times_by_sorter[sorter_name] = [] for sr in sorting_results: rec = sr['recording'] if rec['study'] == study_name: sorter_name = sr['sorter']['name'] if sorter_name in sorter_names: if sr.get('comparison_with_truth', None): comparison_with_truth = ka.load_object( sr['comparison_with_truth']['json']) if comparison_with_truth is None: print(sr) raise Exception( 'Unable to retrieve comparison with truth object for sorting result.' ) for unit_result in comparison_with_truth.values(): id0 = unit_result['unit_id'] n_match = unit_result['num_matches'] n_fp = unit_result['num_false_positives'] n_fn = unit_result['num_false_negatives'] accuracy = n_match / (n_match + n_fp + n_fn) if n_match + n_fp > 0: precision = n_match / (n_match + n_fp) else: precision = 0 recall = n_match / (n_match + n_fn) true_units[study_name + '/' + rec['name'] + '/{}'.format(id0)]['sorting_results'][ sorter_name] = dict( accuracy=accuracy, precision=precision, recall=recall, numMatches=n_match, numFalsePositives=n_fp, numFalseNegatives=n_fn) cpu_times_by_sorter[sorter_name].append( sr['execution_stats'].get('elapsed_sec', None)) else: cpu_times_by_sorter[sorter_name].append(None) keys0 = sorted(true_units.keys()) true_units_list = [true_units[key] for key in keys0] snrs = [_round(x['snr'], 3) for x in true_units_list] firing_rates = [_round(x['firing_rate'], 3) for x in true_units_list] num_events = [x['num_events'] for x in true_units_list] recording_indices = [x['recording_index'] for x in true_units_list] unit_ids = [x['unit_id'] for x in true_units_list] study_analysis_result = dict(studyName=study_name, studySetName=study_set_name, recordingNames=recording_names, trueSnrs=snrs, trueFiringRates=firing_rates, trueNumEvents=num_events, trueRecordingIndices=recording_indices, trueUnitIds=unit_ids, sortingResults=[]) for sorter_name in sorter_names: accuracies = [ _round(x['sorting_results'].get(sorter_name, {}).get('accuracy'), 3) for x in true_units_list ] precisions = [ _round(x['sorting_results'].get(sorter_name, {}).get('precision'), 3) for x in true_units_list ] recalls = [ _round(x['sorting_results'].get(sorter_name, {}).get('recall'), 3) for x in true_units_list ] numMatches = [ _round(x['sorting_results'].get(sorter_name, {}).get('numMatches'), 3) for x in true_units_list ] numFalsePositives = [ _round( x['sorting_results'].get(sorter_name, {}).get('numFalsePositives'), 3) for x in true_units_list ] numFalseNegatives = [ _round( x['sorting_results'].get(sorter_name, {}).get('numFalseNegatives'), 3) for x in true_units_list ] study_analysis_result['sortingResults'].append( dict(sorterName=sorter_name, accuracies=accuracies, precisions=precisions, numMatches=numMatches, numFalsePositives=numFalsePositives, numFalseNegatives=numFalseNegatives, recalls=recalls, cpuTimesSec=cpu_times_by_sorter[sorter_name])) # print(study_analysis_result['studyName'], study_analysis_result['sortingResults'][0]['cpuTimesSec']) return study_analysis_result
def main(): parser = argparse.ArgumentParser( description=help_txt, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( 'analysis_files', nargs='+', help='The files generated by the main spikeforest analysis') parser.add_argument('--output', '-o', help='The output .json file', required=True) args = parser.parse_args() ka.set_config(fr='default_readonly') print( '******************************** LOADING ANALYSIS OUTPUT OBJECTS...') studies = [] study_sets = [] recordings = [] sorting_results = [] for analysis_file in args.analysis_files: print('Loading: {}'.format(analysis_file)) obj = ka.load_object(analysis_file) if obj is not None: studies = studies + obj['studies'] study_sets = study_sets + obj.get('study_sets', []) recordings = recordings + obj['recordings'] sorting_results = sorting_results + obj['sorting_results'] else: raise Exception('Unable to load: {}'.format(analysis_file)) # ALGORITHMS print('******************************** ASSEMBLING ALGORITHMS...') algorithms_by_processor_name = dict() Algorithms = [] basepath = '../../spikeforest2/sorters/descriptions' repo_base_url = 'https://github.com/flatironinstitute/spikeforest/blob/master' for item in os.listdir(basepath): if item.endswith('.md'): alg = frontmatter.load(basepath + '/' + item).to_dict() alg['markdown_link'] = repo_base_url + '/spikeforest/spikeforestsorters/descriptions/' + item alg['markdown'] = alg['content'] del alg['content'] if 'processor_name' in alg: algorithms_by_processor_name[alg['processor_name']] = alg Algorithms.append(alg) print([alg['label'] for alg in Algorithms]) Studies = [] for study in studies: Studies.append( dict( name=study['name'], studySet=study['study_set'], description=study.get('description', ''), recordings=[] # the following can be obtained from the other collections # numRecordings, sorters, etc... )) print([S['name'] for S in Studies]) print('******************************** ASSEMBLING STUDY SETS...') study_sets_by_name = dict() for study_set in study_sets: study_sets_by_name[study_set['name']] = study_set study_set['studies'] = [] studies_by_name = dict() for study in studies: study0 = dict(name=study['name'], studySetName=study['study_set'], recordings=[]) study_sets_by_name[study['study_set']]['studies'].append(study0) studies_by_name[study0['name']] = study0 for recording in recordings: true_units_info = ka.load_object( recording['summary']['true_units_info']) if not true_units_info: print(recording['summary']['true_units_info']) raise Exception( 'Unable to load true_units_info for recording {}'.format( recording['name'])) recording0 = dict( name=recording['name'], studyName=recording['study'], studySetName=studies_by_name[recording['study']]['studySetName'], directory=recording['directory'], firingsTrue=recording['firings_true'], sampleRateHz=recording['summary']['computed_info']['samplerate'], numChannels=recording['summary']['computed_info']['num_channels'], durationSec=recording['summary']['computed_info']['duration_sec'], numTrueUnits=len(true_units_info), spikeSign=-1 # TODO: set this properly ) studies_by_name[recording0['studyName']]['recordings'].append( recording0) StudySets = [] for study_set in study_sets: StudySets.append(study_set) # SORTING RESULTS print('******************************** SORTING RESULTS...') SortingResults = [] for sr in sorting_results: SR = dict( recordingName=sr['recording']['name'], studyName=sr['recording']['study'], sorterName=sr['sorter']['name'], recordingDirectory=sr['recording']['directory'], firingsTrue=sr['recording']['firings_true'], consoleOut=sr['console_out'], container=sr['container'], cpuTimeSec=sr['execution_stats'].get('elapsed_sec', None), returnCode=sr['execution_stats'].get( 'retcode', 0 ), # TODO: in future, the default should not be 0 -- rather it should be a required field of execution_stats timedOut=sr['execution_stats'].get('timed_out', False), startTime=datetime.fromtimestamp( sr['execution_stats'].get('start_time')).isoformat(), endTime=datetime.fromtimestamp( sr['execution_stats'].get('end_time')).isoformat()) if sr.get('firings', None): SR['firings'] = sr['firings'] if not sr.get('comparison_with_truth', None): print( 'Warning: comparison with truth not found for sorting result: {} {}/{}' .format(sr['sorter']['name'], sr['recording']['study'], sr['recording']['name'])) print('Console output is here: ' + sr['console_out']) else: print('Warning: firings not found for sorting result: {} {}/{}'. format(sr['sorter']['name'], sr['recording']['study'], sr['recording']['name'])) print('Console output is here: ' + sr['console_out']) SortingResults.append(SR) # print('Num unit results:', len(UnitResults)) # SORTERS print('******************************** ASSEMBLING SORTERS...') sorters_by_name = dict() for sr in sorting_results: sorters_by_name[sr['sorter']['name']] = sr['sorter'] Sorters = [] sorter_names = sorted(list(sorters_by_name.keys())) sorter_names = [sorter_name for sorter_name in sorter_names] for sorter_name in sorter_names: sorter = sorters_by_name[sorter_name] alg = algorithms_by_processor_name.get(sorter['processor_name'], dict()) alg_label = alg.get('label', sorter['processor_name']) Sorters.append( dict( name=sorter['name'], algorithmName=alg_label, processorName=sorter['processor_name'], processorVersion='0', # jfm to provide this sortingParameters=sorter['params'])) print([S['name'] + ':' + S['algorithmName'] for S in Sorters]) # STUDY ANALYSIS RESULTS print( '******************************** ASSEMBLING STUDY ANALYSIS RESULTS...' ) StudyAnalysisResults = [ _assemble_study_analysis_result(study_name=study['name'], study_set_name=study['study_set'], recordings=recordings, sorting_results=sorting_results, sorter_names=sorter_names) for study in studies ] # GENERAL print('******************************** ASSEMBLING GENERAL INFO...') General = [ dict(dateUpdated=datetime.now().isoformat(), packageVersions=dict(spikeforest2=pkg_resources.get_distribution( "spikeforest2").version)) ] obj = dict(mode='spike-front', StudySets=StudySets, SortingResults=SortingResults, Sorters=Sorters, Algorithms=Algorithms, StudyAnalysisResults=StudyAnalysisResults, General=General) print(f'Writing to {args.output}...') with open(args.output, 'w') as f: json.dump(obj, f, indent=4) print('Done.')
def main(): parser = argparse.ArgumentParser( description= "Prepare SpikeForest recordings (i.e., populate this repository)") parser.add_argument('output_dir', help='The output directory (e.g., recordings)') parser.add_argument( '--upload', action='store_true', help= 'Whether to upload the recording objects to kachery (password required)' ) # parser.add_argument('--verbose', action='store_true', help='Turn on verbose output') args = parser.parse_args() output_dir = args.output_dir if args.upload: ka.set_config(fr='default_readwrite', to='default_readwrite') else: ka.set_config(fr='default_readonly', ) # geom_mearec_neuronexus = np.genfromtxt('mearec_neuronexus_geom.csv', delimiter=',').tolist() mearec_neuronexus_geom_fname = 'mearec_neuronexus_geom.csv' # Load a spikeforest analysis object X = ka.load_object( 'sha1://b678d798d67b6faa3c6240aca52f3857c9e4b877/analysis.json') # the output directory on the local machine basedir = output_dir if os.path.exists(basedir): raise Exception('Directory already exists: {}'.format(basedir)) if not os.path.exists(basedir): os.mkdir(basedir) studysets_to_add = ['PAIRED_ENGLISH'] studysets_to_include = [ 'PAIRED_BOYDEN', 'PAIRED_CRCNS_HC1', 'PAIRED_MEA64C_YGER', 'PAIRED_KAMPFF', 'PAIRED_MONOTRODE', 'SYNTH_BIONET', 'SYNTH_MONOTRODE', 'SYNTH_MAGLAND', 'SYNTH_MEAREC_NEURONEXUS', 'SYNTH_MEAREC_TETRODE', 'SYNTH_MONOTRODE', 'SYNTH_VISAPY', 'HYBRID_JANELIA', 'MANUAL_FRANKLAB' ] # studysets_to_include = ['PAIRED_CRCNS_HC1', 'PAIRED_MEA64C_YGER', 'PAIRED_KAMPFF', 'PAIRED_MONOTRODE', 'SYNTH_MONOTRODE', 'SYNTH_MAGLAND', 'SYNTH_MEAREC_NEURONEXUS', 'SYNTH_MEAREC_TETRODE', 'SYNTH_MONOTRODE', 'SYNTH_VISAPY', 'HYBRID_JANELIA', 'MANUAL_FRANKLAB'] listdir_ = lambda _path: [ x for x in os.listdir(_path) if os.path.isdir(os.path.join(_path, x)) ] listfile_ = lambda _path: [ x for x in os.listdir(_path) if os.path.isfile(os.path.join(_path, x)) ] # These are the files to download within each recording fnames = ['geom.csv', 'params.json', 'raw.mda', 'firings_true.mda'] # fnames = ['geom.csv', 'params.json'] for studyset_name in studysets_to_add: studyset = dict(name=studyset_name, info=studyset_name, desciption=studyset_name) print('STUDYSET: {}'.format(studyset_name)) studysetdir_local = os.path.join(basedir, studyset_name) assert os.path.exists(studysetdir_local) list_study = [] list_study_name = listdir_(studysetdir_local) for study_name in list_study_name: study = dict(name=study_name, studySetName=studyset_name) print('STUDY: {}/{}'.format(studyset_name, study_name)) studydir_local = os.path.join(studysetdir_local, study_name) assert os.path.exists(studydir_local) list_recname = listfile_(studydir_local) list_recname = [ x.replace('.json', '') for x in list_recname if (not 'firings_true.json' in x) ] list_recording = [] for recname in list_recname: recording = dict(name=recname, studyName=study_name, studySetName=studyset_name) print('RECORDING: {}/{}/{}'.format(studyset_name, study_name, recname)) with open(os.path.join(studydir_local, recname + '.json'), 'r') as f: recording = json.load(f) recording['directory'] = recdir list_recording.append(recording) study['self_reference'] = ka.store_object( study, basename='{}.json'.format(study_name)) list_study.append(study) with open(os.path.join(studydir_local, study_name + '.json'), 'w') as f: json.dump(study, f, indent=4) studyset['studies'] = list_study studyset['self_reference'] = ka.store_object( studyset, basename='{}.json'.format(studyset_name)) with open(os.path.join(studysetdir_local, studyset_name + '.json'), 'w') as f: json.dump(studyset, f, indent=4) # add studysets StudySets_add = [] for studyset_name in studysets_to_add: StudySets_add.append(studyset) StudySets = list.join(X['StudySets'], StudySets_add) studysets_obj = dict(StudySets=X['StudySets']) studysets_path = ka.store_object(studysets_obj, basename='studysets.json') with open(os.path.join(basedir, 'studysets'), 'w') as f: f.write(studysets_path)
#!/usr/bin/env python from mountaintools import client as mt import kachery as ka # Note: download token is required here mt.configDownloadFrom('spikeforest.kbucket') ka.set_config( fr='default_readwrite', to='default_readwrite' ) X = mt.loadObject(path='sha1://b678d798d67b6faa3c6240aca52f3857c9e4b877/analysis.json') ka.store_object(X, basename='analysis.json') X = ka.load_object('sha1://b678d798d67b6faa3c6240aca52f3857c9e4b877/analysis.json') def get_sha1_part_of_sha1dir(path): if path.startswith('sha1dir://'): list0 = path.split('/') list1 = list0[2].split('.') return list1[0] else: return None # studysets_to_include = ['PAIRED_BOYDEN', 'PAIRED_CRCNS_HC1', 'PAIRED_MEA64C_YGER', 'PAIRED_KAMPFF', 'PAIRED_MONOTRODE', 'SYNTH_MONOTRODE', 'SYNTH_MAGLAND', 'SYNTH_MEAREC_NEURONEXUS', 'SYNTH_MEAREC_TETRODE', 'SYNTH_MONOTRODE', 'SYNTH_VISAPY', 'HYBRID_JANELIA', 'MANUAL_FRANKLAB'] studysets_to_include = ['SYNTH_BIONET'] fnames = ['geom.csv', 'params.json', 'raw.mda', 'firings_true.mda'] # fnames = ['geom.csv', 'params.json', 'firings_true.mda'] # fnames = ['geom.csv', 'params.json'] for studyset in X['StudySets']:
def _test_store_object(val: dict): x = ka.store_object(val) assert x val2 = ka.load_object(x) assert val == val2