Exemplo n.º 1
0
def upload_ks2_output():
    """
    Copy ks2 output to a .tar file and upload to flatiron for all past sessions that have
    spike sorting output
    """
    # if the space on the disk > 500Gb continue, otherwise, don't bother
    usage = _get_volume_usage('/mnt/s0/Data', 'disk')
    if usage['disk_available'] < 500:
        return

    one = ONE()

    for ilog, ks2_out in enumerate(ROOT_PATH.rglob('spike_sorting_ks2.log')):
        # check space on disk after every 25 extractions. stop if we are running low!
        if np.mod(ilog, 25) == 0:
            usage = _get_volume_usage('/mnt/s0/Data', 'disk')
            if usage['disk_available'] < 500:
                return

        ks2_path = Path(ks2_out).parent
        session_path = alf.io.get_session_path(ks2_out)

        probe = ks2_path.stem
        tar_dir = session_path.joinpath('spike_sorters', 'ks2_matlab', probe)
        tar_dir.mkdir(exist_ok=True, parents=True)

        # If the flag exists it means we have already extracted
        if tar_dir.joinpath('tar_existed.flag').exists():
            # We already done this, no need to repeat!!
            continue

        eid = one.eid_from_path(session_path)

        # For latest sessions tar file will be created by task and automatically registered so we
        # may have a case where tar file already registered and uploaded but no tar_existed.flag
        if tar_dir.joinpath('_kilosort_raw.output.tar').exists():
            # double check it indeed has been registered for this probe
            dset = one.alyx.rest('datasets',
                                 'list',
                                 session=eid,
                                 name='_kilosort_raw.output.tar')
            collection = [ds['collection'].rsplit('/', 1)[-1] for ds in dset]
            if probe in collection:
                tar_dir.joinpath('tar_existed.flag').touch()
                continue

        if eid is None:
            # Skip sessions that don't exist on alyx!
            continue

        out = spikes.ks2_to_tar(ks2_path, tar_dir)
        register_dataset(out, one=one)
        # Make flag to indicate data already registered for this session
        tar_dir.joinpath('tar_existed.flag').touch()
Exemplo n.º 2
0
    def test_registration_datasets(self):
        # registers a single file
        ses = one.alyx.rest('sessions', 'create', data=MOCK_SESSION_DICT)
        st_file = self.alf_path.joinpath('spikes.times.npy')
        registration.register_dataset(file_list=st_file, one=one)
        dsets = one.alyx.rest('datasets', 'list', session=ses['url'][-36:])
        self.assertTrue(len(dsets) == 1)
        # registers a list of files
        flist = list(self.alf_path.glob('*.npy'))
        r = registration.register_dataset(file_list=flist, one=one)
        dsets = one.alyx.rest('datasets', 'list', session=ses['url'][-36:])
        self.assertTrue(len(dsets) == 2)
        self.assertTrue(all(not d['revision'] for d in r))
        self.assertTrue(all(d['default'] for d in r))
        self.assertTrue(all(d['collection'] == 'alf' for d in r))

        # simulate all the datasets exists, re-register and asserts that exists is set to True
        # as the files haven't changed
        frs = one.alyx.rest('files', 'list', django=f"dataset__session,{ses['url'][-36:]}")
        for fr in frs:
            one.alyx.rest('files', 'partial_update', id=fr['url'][-36:], data={'exists': True})
        r = registration.register_dataset(file_list=flist, one=one)
        self.assertTrue(all([all([fr['exists'] for fr in rr['file_records']]) for rr in r]))
        # now that files have changed, makes sure the exists flags are set to False
        np.save(self.alf_path.joinpath('spikes.times.npy'), np.random.random(500))
        np.save(self.alf_path.joinpath('spikes.amps.npy'), np.random.random(500))
        r = registration.register_dataset(file_list=flist, one=one)
        self.assertTrue(all([all([not(fr['exists']) for fr in rr['file_records']]) for rr in r]))

        # Test registering with a revision
        # Test that if we don't have the correct file structure it won't register
        flist = list(self.alf_path.glob('*.npy'))
        with self.assertRaises(HTTPError):
            registration.register_dataset(file_list=flist, one=one, revisions='v1')
#
        # Check with correct folder it registers correctly
        flist = list(self.rev_path.glob('*.npy'))
        r = registration.register_dataset(file_list=flist, one=one, revisions='v1')
        self.assertTrue(all(d['revision'] == 'v1' for d in r))
        self.assertTrue(all(d['default'] for d in r))
        self.assertTrue(all(d['collection'] == 'alf' for d in r))
        dsets = one.alyx.rest('datasets', 'list', session=ses['url'][-36:], revision='v1')

        # Add a protected tag to a dataset
        for d in dsets:
            one.alyx.rest('datasets', 'partial_update', id=d['url'][-36:],
                          data={'tags': ['test_tag']})
        with self.assertRaises(HTTPError):
            registration.register_dataset(file_list=flist, one=one, revisions='v1')
Exemplo n.º 3
0
 def register_dataset(self, file_list, **kwargs):
     """
     Registers a set of files belonging to a session only on the server
     :param file_list: (list of pathlib.Path)
     :param created_by: (string) name of user in Alyx (defaults to 'root')
     :param repository: optional: (string) name of the server repository in Alyx
     :param versions: optional (list of strings): versions tags (defaults to ibllib version)
     :param dry: (bool) False by default
     :return:
     """
     return register_dataset(file_list, one=self.one, server_only=True, **kwargs)
Exemplo n.º 4
0
 def test_registration_datasets(self):
     # registers a single file
     ses = one.alyx.rest('sessions', 'create', data=MOCK_SESSION_DICT)
     st_file = self.alf_path.joinpath('spikes.times.npy')
     registration.register_dataset(file_list=st_file, one=one)
     dsets = one.alyx.rest('datasets', 'list', session=ses['url'][-36:])
     self.assertTrue(len(dsets) == 1)
     # registers a list of files
     flist = list(self.alf_path.glob('*.npy'))
     r = registration.register_dataset(file_list=flist, one=one)
     dsets = one.alyx.rest('datasets', 'list', session=ses['url'][-36:])
     self.assertTrue(len(dsets) == 2)
     # simulate all the datasets exists, re-register and asserts that exists is set to True
     # as the files haven't changed
     frs = one.alyx.rest('files',
                         'list',
                         django=f"dataset__session,{ses['url'][-36:]}")
     for fr in frs:
         one.alyx.rest('files',
                       'partial_update',
                       id=fr['url'][-36:],
                       data={'exists': True})
     r = registration.register_dataset(file_list=flist, one=one)
     self.assertTrue(
         all([all([fr['exists'] for fr in rr['file_records']])
              for rr in r]))
     # now that files have changed, makes sure the exists flags are set to False
     np.save(self.alf_path.joinpath('spikes.times.npy'),
             np.random.random(500))
     np.save(self.alf_path.joinpath('spikes.amps.npy'),
             np.random.random(500))
     r = registration.register_dataset(file_list=flist, one=one)
     self.assertTrue(
         all([
             all([not (fr['exists']) for fr in rr['file_records']])
             for rr in r
         ]))
Exemplo n.º 5
0
 def register_datasets(self, one=None, **kwargs):
     """
     Register output datasets form the task to Alyx
     :param one:
     :param jobid:
     :param kwargs: directly passed to the register_dataset function
     :return:
     """
     assert one
     if self.outputs:
         if isinstance(self.outputs, list):
             versions = [self.version for _ in self.outputs]
         else:
             versions = [self.version]
         return register_dataset(self.outputs,
                                 one=one,
                                 versions=versions,
                                 **kwargs)
Exemplo n.º 6
0
def spike_amplitude_patching():
    """
    Patch the datasets that have incorrect spikes.amplitude datasets. While doing it also look for
    sessions that have spikesorting/ alf folders but for some reason haven't been registered and
    uploaded to flatiron for some reason (normally because .cbin file is missing).

    Five different scenarios to consider
    1. Data extracted properly, is on flatiron and has templates.amps - do nothing
    2. Data extracted properly, is on flatiron but doesn't have templates.amps - phy convert
       and register
    3. Data extracted properly with templates.amps , but not on flatiron - phy convert and
       register (don't necessarily need to phy convert but double check in case it was the
       syncing that errored)
    4. Data extracted properly without templates.amps, but non on flatiron - phy convert and
       register
    5. Data spike sorted but not extracted - phy convert and register

    """
    def phy2alf_conversion(session_path, ks2_path, alf_path, probe_label):
        try:
            # Find spikeglx meta data files associated with the session and probe
            files = spikeglx.glob_ephys_files(session_path, ext='meta')
            ap_files = [(ef.get("ap"), ef.get("label")) for ef in files
                        if "ap" in ef.keys()]
            meta_file = next(ap[0] for ap in ap_files if ap[1] == probe_label)

            # The .cbin file doesn't always still exist on server so point to it from meta
            ap_file = meta_file.with_suffix('.cbin')

            # Convert to alf format
            spikes.ks2_to_alf(
                ks2_path,
                bin_path=meta_file.parent,
                out_path=alf_path,
                bin_file=None,
                ampfactor=SpikeSorting_KS2_Matlab._sample2v(ap_file))

            # Sync the probes
            out_files, _ = spikes.sync_spike_sorting(ap_file=ap_file,
                                                     out_path=alf_path)

            return 0, out_files, None

        except BaseException as err:
            _logger.error(
                f'{session_path} and {probe_label} errored with message: {err}'
            )

            return -1, None, err

    def add_note_to_insertion(eid, probe, one, msg=None):
        insertion = one.alyx.rest('insertions',
                                  'list',
                                  session=eid,
                                  name=probe)

        if len(insertion) > 0:
            probe_id = insertion[0]['id']
            status_note = {
                'user': one._par.ALYX_LOGIN,
                'content_type': 'probeinsertion',
                'object_id': probe_id,
                'text': f'amps_patching_local_server2: {msg}'
            }
            _ = one.alyx.rest('notes', 'create', data=status_note)
        else:
            # If the probe insertion doesn't exist, make a session note
            status_note = {
                'user': one._par.ALYX_LOGIN,
                'content_type': 'session',
                'object_id': eid,
                'text': f'amps_patching_local_server2: {probe}: {msg}'
            }
            _ = one.alyx.rest('notes', 'create', data=status_note)

    one = ONE()

    for ks2_out in ROOT_PATH.rglob('spike_sorting_ks2.log'):
        ks2_path = Path(ks2_out).parent

        # Clean up old flags if they exist
        if ks2_path.joinpath('amps_patching_local_server.flag').exists():
            ks2_path.joinpath('amps_patching_local_server.flag').unlink()

        # If we already looked at this session previously, no need to try again
        if ks2_path.joinpath('amps_patching_local_server2.flag').exists():
            continue

        # Make the flag if it is the first time looking into session
        ks2_path.joinpath('amps_patching_local_server2.flag').touch()

        # Now proceed with everything else
        session_path = alf.io.get_session_path(ks2_out)
        eid = one.eid_from_path(session_path)
        if eid is None:
            # Skip sessions that don't exist on alyx!
            continue
        probe = ks2_path.stem
        alf_path = session_path.joinpath('alf', probe)
        alf_path.mkdir(parents=True, exist_ok=True)

        # If a clusters.metrics file exists in the alf_path, delete it. Causes registration error!
        cluster_metrics = alf_path.joinpath('clusters.metrics.csv')
        if cluster_metrics.exists():
            os.remove(cluster_metrics)

        # templates.amps file only exists if it is new phy extractor
        templates_file = alf_path.joinpath('templates.amps.npy')
        if templates_file.exists():
            dset = one.alyx.rest('datasets',
                                 'list',
                                 session=eid,
                                 name='templates.amps.npy')
            # check if it has been registered for this probe specifically
            collection = [ds['collection'].rsplit('/', 1)[-1] for ds in dset]
            if probe in collection:
                continue

        # Otherwise we need to extract alf files and register datasets
        status, out, err = phy2alf_conversion(session_path, ks2_path, alf_path,
                                              probe)
        if status == 0:
            try:
                cluster_qc = EphysCellsQc(session_path, one=one)
                qc_file, df_units, drift = cluster_qc._compute_cell_qc(
                    alf_path)
                out.append(qc_file)
                cluster_qc._label_probe_qc(alf_path, df_units, drift)
                register_dataset(out, one=one)
                add_note_to_insertion(eid, probe, one, msg='completed')
                _logger.info(f'All good: {session_path} and {probe}')
            except BaseException as err2:
                _logger.info(
                    f'Errored at qc/ registration stage: {session_path} and {probe}'
                )
                add_note_to_insertion(eid, probe, one, msg=err2)
        else:
            # Log the error
            add_note_to_insertion(eid, probe, one, msg=err)
            continue