def upload_ks2_output(): """ Copy ks2 output to a .tar file and upload to flatiron for all past sessions that have spike sorting output """ # if the space on the disk > 500Gb continue, otherwise, don't bother usage = _get_volume_usage('/mnt/s0/Data', 'disk') if usage['disk_available'] < 500: return one = ONE() for ilog, ks2_out in enumerate(ROOT_PATH.rglob('spike_sorting_ks2.log')): # check space on disk after every 25 extractions. stop if we are running low! if np.mod(ilog, 25) == 0: usage = _get_volume_usage('/mnt/s0/Data', 'disk') if usage['disk_available'] < 500: return ks2_path = Path(ks2_out).parent session_path = alf.io.get_session_path(ks2_out) probe = ks2_path.stem tar_dir = session_path.joinpath('spike_sorters', 'ks2_matlab', probe) tar_dir.mkdir(exist_ok=True, parents=True) # If the flag exists it means we have already extracted if tar_dir.joinpath('tar_existed.flag').exists(): # We already done this, no need to repeat!! continue eid = one.eid_from_path(session_path) # For latest sessions tar file will be created by task and automatically registered so we # may have a case where tar file already registered and uploaded but no tar_existed.flag if tar_dir.joinpath('_kilosort_raw.output.tar').exists(): # double check it indeed has been registered for this probe dset = one.alyx.rest('datasets', 'list', session=eid, name='_kilosort_raw.output.tar') collection = [ds['collection'].rsplit('/', 1)[-1] for ds in dset] if probe in collection: tar_dir.joinpath('tar_existed.flag').touch() continue if eid is None: # Skip sessions that don't exist on alyx! continue out = spikes.ks2_to_tar(ks2_path, tar_dir) register_dataset(out, one=one) # Make flag to indicate data already registered for this session tar_dir.joinpath('tar_existed.flag').touch()
def test_registration_datasets(self): # registers a single file ses = one.alyx.rest('sessions', 'create', data=MOCK_SESSION_DICT) st_file = self.alf_path.joinpath('spikes.times.npy') registration.register_dataset(file_list=st_file, one=one) dsets = one.alyx.rest('datasets', 'list', session=ses['url'][-36:]) self.assertTrue(len(dsets) == 1) # registers a list of files flist = list(self.alf_path.glob('*.npy')) r = registration.register_dataset(file_list=flist, one=one) dsets = one.alyx.rest('datasets', 'list', session=ses['url'][-36:]) self.assertTrue(len(dsets) == 2) self.assertTrue(all(not d['revision'] for d in r)) self.assertTrue(all(d['default'] for d in r)) self.assertTrue(all(d['collection'] == 'alf' for d in r)) # simulate all the datasets exists, re-register and asserts that exists is set to True # as the files haven't changed frs = one.alyx.rest('files', 'list', django=f"dataset__session,{ses['url'][-36:]}") for fr in frs: one.alyx.rest('files', 'partial_update', id=fr['url'][-36:], data={'exists': True}) r = registration.register_dataset(file_list=flist, one=one) self.assertTrue(all([all([fr['exists'] for fr in rr['file_records']]) for rr in r])) # now that files have changed, makes sure the exists flags are set to False np.save(self.alf_path.joinpath('spikes.times.npy'), np.random.random(500)) np.save(self.alf_path.joinpath('spikes.amps.npy'), np.random.random(500)) r = registration.register_dataset(file_list=flist, one=one) self.assertTrue(all([all([not(fr['exists']) for fr in rr['file_records']]) for rr in r])) # Test registering with a revision # Test that if we don't have the correct file structure it won't register flist = list(self.alf_path.glob('*.npy')) with self.assertRaises(HTTPError): registration.register_dataset(file_list=flist, one=one, revisions='v1') # # Check with correct folder it registers correctly flist = list(self.rev_path.glob('*.npy')) r = registration.register_dataset(file_list=flist, one=one, revisions='v1') self.assertTrue(all(d['revision'] == 'v1' for d in r)) self.assertTrue(all(d['default'] for d in r)) self.assertTrue(all(d['collection'] == 'alf' for d in r)) dsets = one.alyx.rest('datasets', 'list', session=ses['url'][-36:], revision='v1') # Add a protected tag to a dataset for d in dsets: one.alyx.rest('datasets', 'partial_update', id=d['url'][-36:], data={'tags': ['test_tag']}) with self.assertRaises(HTTPError): registration.register_dataset(file_list=flist, one=one, revisions='v1')
def register_dataset(self, file_list, **kwargs): """ Registers a set of files belonging to a session only on the server :param file_list: (list of pathlib.Path) :param created_by: (string) name of user in Alyx (defaults to 'root') :param repository: optional: (string) name of the server repository in Alyx :param versions: optional (list of strings): versions tags (defaults to ibllib version) :param dry: (bool) False by default :return: """ return register_dataset(file_list, one=self.one, server_only=True, **kwargs)
def test_registration_datasets(self): # registers a single file ses = one.alyx.rest('sessions', 'create', data=MOCK_SESSION_DICT) st_file = self.alf_path.joinpath('spikes.times.npy') registration.register_dataset(file_list=st_file, one=one) dsets = one.alyx.rest('datasets', 'list', session=ses['url'][-36:]) self.assertTrue(len(dsets) == 1) # registers a list of files flist = list(self.alf_path.glob('*.npy')) r = registration.register_dataset(file_list=flist, one=one) dsets = one.alyx.rest('datasets', 'list', session=ses['url'][-36:]) self.assertTrue(len(dsets) == 2) # simulate all the datasets exists, re-register and asserts that exists is set to True # as the files haven't changed frs = one.alyx.rest('files', 'list', django=f"dataset__session,{ses['url'][-36:]}") for fr in frs: one.alyx.rest('files', 'partial_update', id=fr['url'][-36:], data={'exists': True}) r = registration.register_dataset(file_list=flist, one=one) self.assertTrue( all([all([fr['exists'] for fr in rr['file_records']]) for rr in r])) # now that files have changed, makes sure the exists flags are set to False np.save(self.alf_path.joinpath('spikes.times.npy'), np.random.random(500)) np.save(self.alf_path.joinpath('spikes.amps.npy'), np.random.random(500)) r = registration.register_dataset(file_list=flist, one=one) self.assertTrue( all([ all([not (fr['exists']) for fr in rr['file_records']]) for rr in r ]))
def register_datasets(self, one=None, **kwargs): """ Register output datasets form the task to Alyx :param one: :param jobid: :param kwargs: directly passed to the register_dataset function :return: """ assert one if self.outputs: if isinstance(self.outputs, list): versions = [self.version for _ in self.outputs] else: versions = [self.version] return register_dataset(self.outputs, one=one, versions=versions, **kwargs)
def spike_amplitude_patching(): """ Patch the datasets that have incorrect spikes.amplitude datasets. While doing it also look for sessions that have spikesorting/ alf folders but for some reason haven't been registered and uploaded to flatiron for some reason (normally because .cbin file is missing). Five different scenarios to consider 1. Data extracted properly, is on flatiron and has templates.amps - do nothing 2. Data extracted properly, is on flatiron but doesn't have templates.amps - phy convert and register 3. Data extracted properly with templates.amps , but not on flatiron - phy convert and register (don't necessarily need to phy convert but double check in case it was the syncing that errored) 4. Data extracted properly without templates.amps, but non on flatiron - phy convert and register 5. Data spike sorted but not extracted - phy convert and register """ def phy2alf_conversion(session_path, ks2_path, alf_path, probe_label): try: # Find spikeglx meta data files associated with the session and probe files = spikeglx.glob_ephys_files(session_path, ext='meta') ap_files = [(ef.get("ap"), ef.get("label")) for ef in files if "ap" in ef.keys()] meta_file = next(ap[0] for ap in ap_files if ap[1] == probe_label) # The .cbin file doesn't always still exist on server so point to it from meta ap_file = meta_file.with_suffix('.cbin') # Convert to alf format spikes.ks2_to_alf( ks2_path, bin_path=meta_file.parent, out_path=alf_path, bin_file=None, ampfactor=SpikeSorting_KS2_Matlab._sample2v(ap_file)) # Sync the probes out_files, _ = spikes.sync_spike_sorting(ap_file=ap_file, out_path=alf_path) return 0, out_files, None except BaseException as err: _logger.error( f'{session_path} and {probe_label} errored with message: {err}' ) return -1, None, err def add_note_to_insertion(eid, probe, one, msg=None): insertion = one.alyx.rest('insertions', 'list', session=eid, name=probe) if len(insertion) > 0: probe_id = insertion[0]['id'] status_note = { 'user': one._par.ALYX_LOGIN, 'content_type': 'probeinsertion', 'object_id': probe_id, 'text': f'amps_patching_local_server2: {msg}' } _ = one.alyx.rest('notes', 'create', data=status_note) else: # If the probe insertion doesn't exist, make a session note status_note = { 'user': one._par.ALYX_LOGIN, 'content_type': 'session', 'object_id': eid, 'text': f'amps_patching_local_server2: {probe}: {msg}' } _ = one.alyx.rest('notes', 'create', data=status_note) one = ONE() for ks2_out in ROOT_PATH.rglob('spike_sorting_ks2.log'): ks2_path = Path(ks2_out).parent # Clean up old flags if they exist if ks2_path.joinpath('amps_patching_local_server.flag').exists(): ks2_path.joinpath('amps_patching_local_server.flag').unlink() # If we already looked at this session previously, no need to try again if ks2_path.joinpath('amps_patching_local_server2.flag').exists(): continue # Make the flag if it is the first time looking into session ks2_path.joinpath('amps_patching_local_server2.flag').touch() # Now proceed with everything else session_path = alf.io.get_session_path(ks2_out) eid = one.eid_from_path(session_path) if eid is None: # Skip sessions that don't exist on alyx! continue probe = ks2_path.stem alf_path = session_path.joinpath('alf', probe) alf_path.mkdir(parents=True, exist_ok=True) # If a clusters.metrics file exists in the alf_path, delete it. Causes registration error! cluster_metrics = alf_path.joinpath('clusters.metrics.csv') if cluster_metrics.exists(): os.remove(cluster_metrics) # templates.amps file only exists if it is new phy extractor templates_file = alf_path.joinpath('templates.amps.npy') if templates_file.exists(): dset = one.alyx.rest('datasets', 'list', session=eid, name='templates.amps.npy') # check if it has been registered for this probe specifically collection = [ds['collection'].rsplit('/', 1)[-1] for ds in dset] if probe in collection: continue # Otherwise we need to extract alf files and register datasets status, out, err = phy2alf_conversion(session_path, ks2_path, alf_path, probe) if status == 0: try: cluster_qc = EphysCellsQc(session_path, one=one) qc_file, df_units, drift = cluster_qc._compute_cell_qc( alf_path) out.append(qc_file) cluster_qc._label_probe_qc(alf_path, df_units, drift) register_dataset(out, one=one) add_note_to_insertion(eid, probe, one, msg='completed') _logger.info(f'All good: {session_path} and {probe}') except BaseException as err2: _logger.info( f'Errored at qc/ registration stage: {session_path} and {probe}' ) add_note_to_insertion(eid, probe, one, msg=err2) else: # Log the error add_note_to_insertion(eid, probe, one, msg=err) continue