def test_spikeforest_analysis(tmpdir): tmpdir = str(tmpdir) # generate toy recordings delete_recordings = True num_recordings = 2 duration = 15 for num in range(1, num_recordings+1): dirname = tmpdir+'/toy_example{}'.format(num) if delete_recordings: if os.path.exists(dirname): shutil.rmtree(dirname) if not os.path.exists(dirname): rx, sx_true = se.example_datasets.toy_example1( duration=duration, num_channels=4, samplerate=30000, K=10) se.MdaRecordingExtractor.writeRecording( recording=rx, save_path=dirname) se.MdaSortingExtractor.writeSorting( sorting=sx_true, save_path=dirname+'/firings_true.mda') # Use this to optionally connect to a kbucket share: # ca.autoConfig(collection='spikeforest',key='spikeforest2-readwrite',ask_password=True) # for downloading containers if needed ca.setRemoteConfig(alternate_share_ids=['69432e9201d0']) # Specify the compute resource (see the note above) # compute_resource = 'local-computer' compute_resource = None # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'spikeforest_test0' # Grab the recordings for testing recordings = [ dict( recording_name='toy_example{}'.format(num), study_name='toy_examples', directory=tmpdir+'/toy_example{}'.format(num) ) for num in range(1, num_recordings+1) ] studies = [ dict( name='toy_examples', study_set='toy_examples', directory=os.path.abspath('.'), description='Toy examples.' ) ] # Summarize the recordings recordings = sa.summarize_recordings( recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = _define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings sortings = sa.sort_recordings( sorter=sorter, recordings=recordings, compute_resource=compute_resource ) # Summarize the sortings sortings = sa.summarize_sortings( sortings=sortings, compute_resource=compute_resource ) # Compare with ground truth sortings = sa.compare_sortings_with_truth( sortings=sortings, compute_resource=compute_resource ) # Append to results sorting_results = sorting_results+sortings # TODO: collect all the units for aggregated analysis aggregated_sorting_results = sa.aggregate_sorting_results(studies, recordings, sorting_results) # Save the output print('Saving the output') ca.saveObject( key=dict( name='spikeforest_results', output_id=output_id ), object=dict( studies=studies, recordings=recordings, sorting_results=sorting_results, aggregated_sorting_results=ca.saveObject(object=aggregated_sorting_results) ) ) for sr in aggregated_sorting_results['study_sorting_results']: study_name=sr['study'] sorter_name=sr['sorter'] n1=np.array(sr['num_matches']) n2=np.array(sr['num_false_positives']) n3=np.array(sr['num_false_negatives']) accuracies=n1/(n1+n2+n3) avg_accuracy=np.mean(accuracies) txt='STUDY: {}, SORTER: {}, AVG ACCURACY: {}'.format(study_name,sorter_name,avg_accuracy) print(txt) if avg_accuracy<0.3: if sorter_name == 'Yass': print('Average accuracy is too low, but we are excusing Yass for now.') else: raise Exception('Average accuracy is too low for test----- '+txt)
def main(): # Use this to optionally connect to a kbucket share: ca.autoConfig(collection='spikeforest', key='spikeforest2-readwrite', ask_password=True, password=os.environ.get('SPIKEFOREST_PASSWORD', None)) # Specify the compute resource (see the note above) #compute_resource = 'ccmlin008-80' #compute_resource_ks = 'ccmlin008-kilosort' compute_resource = None compute_resource_ks = None #compute_resource = 'ccmlin000-80' #compute_resource_ks = 'ccmlin000-kilosort' # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'spikeforest_test3' #group_name = 'magland_synth_test' group_name = 'mearec_sqmea_test' a = ca.loadObject( key=dict(name='spikeforest_recording_group', group_name=group_name)) recordings = a['recordings'] studies = a['studies'] recordings = [recordings[0]] # Summarize the recordings recordings_B = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = define_sorters() # We will be assembling the sorting results here sorting_results_A = [] for sorter in sorters: # Sort the recordings compute_resource0 = compute_resource if sorter['name'] == 'KiloSort': compute_resource0 = compute_resource_ks sortings = sa.sort_recordings(sorter=sorter, recordings=recordings_B, compute_resource=compute_resource0) # Append to results sorting_results_A = sorting_results_A + sortings # Summarize the sortings sorting_results_B = sa.summarize_sortings( sortings=sorting_results_A, compute_resource=compute_resource) # Compare with ground truth sorting_results_C = sa.compare_sortings_with_truth( sortings=sorting_results_B, compute_resource=compute_resource) # TODO: collect all the units for aggregated analysis # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results', output_id=output_id), object=dict(studies=studies, recordings=recordings_B, sorting_results=sorting_results_C))
def main(): # generate toy recordings if not os.path.exists('recordings'): os.mkdir('recordings') delete_recordings = False recpath = 'recordings/example1' if os.path.exists(recpath) and (delete_recordings): shutil.rmtree(recpath) if not os.path.exists(recpath): rx, sx_true = se.example_datasets.toy_example1(duration=60, num_channels=4, samplerate=30000, K=10) se.MdaRecordingExtractor.writeRecording(recording=rx, save_path=recpath) se.MdaSortingExtractor.writeSorting(sorting=sx_true, save_path=recpath + '/firings_true.mda') # for downloading containers if needed ca.setRemoteConfig(alternate_share_ids=['69432e9201d0']) # Specify the compute resource compute_resource = None num_workers = 10 # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'toy_example_local' # Grab the recordings for testing recordings = [ dict(recording_name='example1', study_name='toy_examples', directory=os.path.abspath('recordings/example1')) ] recordings = recordings * 10 studies = [ dict(name='toy_examples', study_set='toy_examples', directory=os.path.abspath('recordings'), description='Toy examples.') ] # Sorters (algs and params) are defined below sorters = _define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings compute_resource0 = compute_resource if sorter['name'] == 'KiloSort': compute_resource0 = compute_resource_ks sortings = sa.sort_recordings(sorter=sorter, recordings=recordings, compute_resource=compute_resource0, num_workers=num_workers) # Append to results sorting_results = sorting_results + sortings # Summarize the sortings sorting_results = sa.summarize_sortings(sortings=sorting_results, compute_resource=compute_resource) # Compare with ground truth sorting_results = sa.compare_sortings_with_truth( sortings=sorting_results, compute_resource=compute_resource, num_workers=num_workers) # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results'), subkey=output_id, object=dict(studies=studies, recordings=recordings, sorting_results=sorting_results))
def main(): ca.autoConfig(collection='spikeforest', key='spikeforest2-readwrite', ask_password=True, password=os.environ.get('SPIKEFOREST_PASSWORD', None)) # Use this to optionally connect to a kbucket share: # for downloading containers if needed ca.setRemoteConfig(alternate_share_ids=['69432e9201d0']) # Specify the compute resource (see the note above) compute_resource = 'default' #compute_resource = 'local-computer' #compute_resource = 'ccmlin008-default' #compute_resource_ks = 'ccmlin008-kilosort' # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'visapy_mea' # Grab the recordings for testing group_name = 'visapy_mea' a = ca.loadObject( key=dict(name='spikeforest_recording_group', group_name=group_name)) recordings = a['recordings'] studies = a['studies'] # recordings = [recordings[0]] # recordings = recordings[0:3] # Summarize the recordings recordings = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = _define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings compute_resource0 = compute_resource if sorter['name'] == 'KiloSort': compute_resource0 = compute_resource_ks sortings = sa.sort_recordings(sorter=sorter, recordings=recordings, compute_resource=compute_resource0) # Append to results sorting_results = sorting_results + sortings # Summarize the sortings sorting_results = sa.summarize_sortings(sortings=sorting_results, compute_resource=compute_resource) # Compare with ground truth sorting_results = sa.compare_sortings_with_truth( sortings=sorting_results, compute_resource=compute_resource) # Aggregate the results aggregated_sorting_results = sa.aggregate_sorting_results( studies, recordings, sorting_results) # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results'), subkey=output_id, object=dict(studies=studies, recordings=recordings, sorting_results=sorting_results, aggregated_sorting_results=ca.saveObject( object=aggregated_sorting_results))) for sr in aggregated_sorting_results['study_sorting_results']: study_name = sr['study'] sorter_name = sr['sorter'] n1 = np.array(sr['num_matches']) n2 = np.array(sr['num_false_positives']) n3 = np.array(sr['num_false_negatives']) accuracies = n1 / (n1 + n2 + n3) avg_accuracy = np.mean(accuracies) txt = 'STUDY: {}, SORTER: {}, AVG ACCURACY: {}'.format( study_name, sorter_name, avg_accuracy) print(txt)
def main(): # generate toy recordings delete_recordings = False num_recordings = 1 for num in range(1, num_recordings + 1): name = 'toy_example{}'.format(num) if delete_recordings: if os.path.exists(name): shutil.rmtree(name) if not os.path.exists(name): rx, sx_true = se.example_datasets.toy_example1(duration=60, num_channels=4, samplerate=30000, K=10) se.MdaRecordingExtractor.writeRecording(recording=rx, save_path=name) se.MdaSortingExtractor.writeSorting(sorting=sx_true, save_path=name + '/firings_true.mda') # Use this to optionally connect to a kbucket share: # ca.autoConfig(collection='spikeforest',key='spikeforest2-readwrite',ask_password=True) # for downloading containers if needed ca.setRemoteConfig(alternate_share_ids=['69432e9201d0']) # Specify the compute resource (see the note above) compute_resource = None # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'spikeforest_test0' # Grab the recordings for testing recordings = [ dict(recording_name='toy_example{}'.format(num), study_name='toy_examples', directory=os.path.abspath('toy_example{}'.format(num))) for num in range(1, num_recordings + 1) ] studies = [ dict(name='toy_examples', study_set='toy_examples', directory=os.path.abspath('.'), description='Toy examples.') ] # Summarize the recordings recordings_B = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings sortings_A = sa.sort_recordings(sorter=sorter, recordings=recordings_B, compute_resource=compute_resource) # Summarize the sortings sortings_B = sa.summarize_sortings(sortings=sortings_A, compute_resource=compute_resource) # Compare with ground truth sortings_C = sa.compare_sortings_with_truth( sortings=sortings_B, compute_resource=compute_resource) # Append to results sorting_results = sorting_results + sortings_C # TODO: collect all the units for aggregated analysis # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results', output_id=output_id), object=dict(studies=studies, recordings=recordings_B, sorting_results=sorting_results))
def apply_sorters_to_recordings(*, label, sorters, recordings, studies, study_sets, output_id=None, output_path=None, job_timeout=60 * 20, upload_to=None, skip_failing=None): # Summarize the recordings mtlogging.sublog('summarize-recordings') recordings = sa.summarize_recordings( recordings=recordings, compute_resource='default', label='Summarize recordings ({})'.format(label), upload_to=upload_to ) # Run the spike sorting mtlogging.sublog('sorting') sorting_results = sa.multi_sort_recordings( sorters=sorters, recordings=recordings, label='Sort recordings ({})'.format(label), job_timeout=job_timeout, upload_to=upload_to, skip_failing=skip_failing ) # Summarize the sortings mtlogging.sublog('summarize-sortings') sorting_results = sa.summarize_sortings( sortings=sorting_results, compute_resource='default', label='Summarize sortings ({})'.format(label) ) # Compare with ground truth mtlogging.sublog('compare-with-truth') sorting_results = sa.compare_sortings_with_truth( sortings=sorting_results, compute_resource='default', label='Compare with truth ({})'.format(label), upload_to=upload_to ) # Aggregate the results mtlogging.sublog('aggregate') aggregated_sorting_results = sa.aggregate_sorting_results( studies, recordings, sorting_results) output_object = dict( studies=studies, recordings=recordings, study_sets=study_sets, sorting_results=sorting_results, aggregated_sorting_results=mt.saveObject( object=aggregated_sorting_results, upload_to=upload_to) ) # Save the output if output_id: print('Saving the output') mtlogging.sublog('save-output') mt.saveObject( key=dict( name='spikeforest_results' ), subkey=output_id, object=output_object, upload_to=upload_to ) if output_path: print('Saving the output to {}'.format(output_path)) mtlogging.sublog('save-output-path') address = mt.saveObject(output_object, upload_to=upload_to) if not address: raise Exception('Problem saving output object.') if not mt.createSnapshot(path=address, dest_path=output_path): raise Exception('Problem saving output to {}'.format(output_path)) mtlogging.sublog('show-output-summary') for sr in aggregated_sorting_results['study_sorting_results']: study_name = sr['study'] sorter_name = sr['sorter'] n1 = np.array(sr['num_matches']) n2 = np.array(sr['num_false_positives']) n3 = np.array(sr['num_false_negatives']) accuracies = n1 / (n1 + n2 + n3) avg_accuracy = np.mean(accuracies) txt = 'STUDY: {}, SORTER: {}, AVG ACCURACY: {}'.format( study_name, sorter_name, avg_accuracy) print(txt)
# sorters=[sorter_yass] sorters = [sorter_sc] # %% # compute_resource='jfm-laptop' compute_resource = None recordings_B = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) sorting_results = [] for sorter in sorters: sortings_A = sa.sort_recordings(sorter=sorter, recordings=recordings_B, compute_resource=compute_resource) sortings_B = sa.summarize_sortings(sortings=sortings_A, compute_resource=compute_resource) sortings_C = sa.compare_sortings_with_truth( sortings=sortings_B, compute_resource=compute_resource) sorting_results = sorting_results + sortings_C # %% sortings_A # %% summaries = sa.summarize_recordings( recordings=[recording], compute_resource=None) # 'ccmlin008-default') recording['summary'] = summaries[0] # %% sorter_ms4_thr3 = dict(name='MountainSort4-thr3', processor_name='MountainSort4', params=dict(detect_sign=-1, adjacency_radius=50,
def main(): # Use this to optionally connect to a kbucket share: ca.autoConfig(collection='spikeforest', key='spikeforest2-readwrite', ask_password=True) # Specify the compute resource (see the note above) compute_resource = 'jfm-laptop' # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'spikeforest_test1' # Grab a couple recordings for testing recording1 = dict( recording_name='001_synth', study_name='datasets_noise10_K10_C4-test', study_set='magland_synth-test', directory= 'kbucket://15734439d8cf/groundtruth/magland_synth/datasets_noise10_K10_C4/001_synth' ) recording2 = dict( recording_name='002_synth', study_name='datasets_noise10_K10_C4-test', study_set='magland_synth-test', directory= 'kbucket://15734439d8cf/groundtruth/magland_synth/datasets_noise10_K10_C4/002_synth' ) recordings = [recording1, recording2] # Summarize the recordings recordings_B = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings sortings_A = sa.sort_recordings(sorter=sorter, recordings=recordings_B, compute_resource=compute_resource) # Summarize the sortings sortings_B = sa.summarize_sortings(sortings=sortings_A, compute_resource=compute_resource) # Compare with ground truth sortings_C = sa.compare_sortings_with_truth( sortings=sortings_B, compute_resource=compute_resource) # Append to results sorting_results = sorting_results + sortings_C # TODO: collect all the units for aggregated analysis # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results', output_id=output_id), object=dict(recordings=recordings_B, sorting_results=sorting_results))