Пример #1
0
def upload_ks2_output():
    """
    Copy ks2 output to a .tar file and upload to flatiron for all past sessions that have
    spike sorting output
    """
    # if the space on the disk > 500Gb continue, otherwise, don't bother
    usage = _get_volume_usage('/mnt/s0/Data', 'disk')
    if usage['disk_available'] < 500:
        return

    one = ONE()

    for ilog, ks2_out in enumerate(ROOT_PATH.rglob('spike_sorting_ks2.log')):
        # check space on disk after every 25 extractions. stop if we are running low!
        if np.mod(ilog, 25) == 0:
            usage = _get_volume_usage('/mnt/s0/Data', 'disk')
            if usage['disk_available'] < 500:
                return

        ks2_path = Path(ks2_out).parent
        session_path = alf.io.get_session_path(ks2_out)

        probe = ks2_path.stem
        tar_dir = session_path.joinpath('spike_sorters', 'ks2_matlab', probe)
        tar_dir.mkdir(exist_ok=True, parents=True)

        # If the flag exists it means we have already extracted
        if tar_dir.joinpath('tar_existed.flag').exists():
            # We already done this, no need to repeat!!
            continue

        eid = one.eid_from_path(session_path)

        # For latest sessions tar file will be created by task and automatically registered so we
        # may have a case where tar file already registered and uploaded but no tar_existed.flag
        if tar_dir.joinpath('_kilosort_raw.output.tar').exists():
            # double check it indeed has been registered for this probe
            dset = one.alyx.rest('datasets',
                                 'list',
                                 session=eid,
                                 name='_kilosort_raw.output.tar')
            collection = [ds['collection'].rsplit('/', 1)[-1] for ds in dset]
            if probe in collection:
                tar_dir.joinpath('tar_existed.flag').touch()
                continue

        if eid is None:
            # Skip sessions that don't exist on alyx!
            continue

        out = spikes.ks2_to_tar(ks2_path, tar_dir)
        register_dataset(out, one=one)
        # Make flag to indicate data already registered for this session
        tar_dir.joinpath('tar_existed.flag').touch()
Пример #2
0
def register_session_raw_data(session_path,
                              one=None,
                              overwrite=False,
                              dry=False,
                              **kwargs):
    """
    Registers all files corresponding to raw data files to Alyx. It will select files that
    match Alyx registration patterns.
    :param session_path:
    :param one: one instance to work with
    :param overwrite: (False) if set to True, will patch the datasets. It will take very long.
    If set to False (default) will skip all already registered data.
    :param dry: do not register files, returns the list of files to be registered
    :return: list of file to register
    :return: Alyx response: dictionary of registered files
    """
    session_path = Path(session_path)
    one = ONE()
    eid = one.eid_from_path(
        session_path, use_cache=False)  # needs to make sure we're up to date
    # query the database for existing datasets on the session and allowed dataset types
    dsets = one.alyx.rest('datasets', 'list', session=eid)
    already_registered = [
        session_path.joinpath(
            Path(ds['collection'] or '').joinpath(ds['name'])) for ds in dsets
    ]
    dtypes = one.alyx.rest('dataset-types', 'list')
    registration_patterns = [
        dt['filename_pattern'] for dt in dtypes if dt['filename_pattern']
    ]
    # glob all the files
    glob_patterns = [
        pat for pat in REGISTRATION_GLOB_PATTERNS if pat.startswith('raw')
    ]
    files_2_register = []
    for gp in glob_patterns:
        f2r = list(session_path.glob(gp))
        files_2_register.extend(f2r)
    # filter 1/2 filter out datasets that do not match any dataset type
    files_2_register = list(
        filter(
            lambda f: _check_filename_for_registration(f, registration_patterns
                                                       ), files_2_register))
    # filter 2/2 unless overwrite is True, filter out the datasets that already exists
    if not overwrite:
        files_2_register = list(
            filter(lambda f: f not in already_registered, files_2_register))
    response = register_dataset(files_2_register,
                                one=one,
                                versions=None,
                                dry=dry,
                                **kwargs)
    return files_2_register, response
Пример #3
0
def correct_passive_in_wrong_folder():
    """
    Finds the occasions where the data has been transferred manually and the passive folder has
    has not been moved and got the correct file structure
    """
    one = ONE()
    lab = _get_lab(one)
    if lab[0] == 'wittenlab':

        for flag in ROOT_PATH.rglob('passive_data_for_ephys.flag'):
            passive_data_path = alf.io.get_session_path(flag)
            passive_session = passive_data_path.stem
            passive_folder = passive_data_path.joinpath('raw_behavior_data')
            sessions = os.listdir(passive_data_path.parent)

            # find the session number that isn't
            data_sess = [sess for sess in sessions if sess != passive_session]
            if len(data_sess) == 1:
                session_path = passive_data_path.parent.joinpath(data_sess[0])
            else:
                # If more than one we register passive to the latest one
                data_sess.sort()
                session_path = passive_data_path.parent.joinpath(data_sess[-1])

            # copy the file
            data_path = session_path.joinpath('raw_passive_data')
            shutil.copytree(passive_folder, data_path)
            _logger.info(f'moved {passive_folder} to {data_path}')

            # remove the passive flag
            flag.unlink()

            # find the tasks for this session and set it to waiting
            eid = one.eid_from_path(session_path)
            if eid:
                tasks = one.alyx.rest('tasks',
                                      'list',
                                      session=eid,
                                      name='TrainingRegisterRaw')
                if len(tasks) > 0:
                    stat = {'status': 'Waiting'}
                    one.alyx.rest('tasks',
                                  'partial_update',
                                  id=tasks[0]['id'],
                                  data=stat)

    else:
        return
Пример #4
0
def spike_amplitude_patching():
    """
    Patch the datasets that have incorrect spikes.amplitude datasets. While doing it also look for
    sessions that have spikesorting/ alf folders but for some reason haven't been registered and
    uploaded to flatiron for some reason (normally because .cbin file is missing).

    Five different scenarios to consider
    1. Data extracted properly, is on flatiron and has templates.amps - do nothing
    2. Data extracted properly, is on flatiron but doesn't have templates.amps - phy convert
       and register
    3. Data extracted properly with templates.amps , but not on flatiron - phy convert and
       register (don't necessarily need to phy convert but double check in case it was the
       syncing that errored)
    4. Data extracted properly without templates.amps, but non on flatiron - phy convert and
       register
    5. Data spike sorted but not extracted - phy convert and register

    """
    def phy2alf_conversion(session_path, ks2_path, alf_path, probe_label):
        try:
            # Find spikeglx meta data files associated with the session and probe
            files = spikeglx.glob_ephys_files(session_path, ext='meta')
            ap_files = [(ef.get("ap"), ef.get("label")) for ef in files
                        if "ap" in ef.keys()]
            meta_file = next(ap[0] for ap in ap_files if ap[1] == probe_label)

            # The .cbin file doesn't always still exist on server so point to it from meta
            ap_file = meta_file.with_suffix('.cbin')

            # Convert to alf format
            spikes.ks2_to_alf(
                ks2_path,
                bin_path=meta_file.parent,
                out_path=alf_path,
                bin_file=None,
                ampfactor=SpikeSorting_KS2_Matlab._sample2v(ap_file))

            # Sync the probes
            out_files, _ = spikes.sync_spike_sorting(ap_file=ap_file,
                                                     out_path=alf_path)

            return 0, out_files, None

        except BaseException as err:
            _logger.error(
                f'{session_path} and {probe_label} errored with message: {err}'
            )

            return -1, None, err

    def add_note_to_insertion(eid, probe, one, msg=None):
        insertion = one.alyx.rest('insertions',
                                  'list',
                                  session=eid,
                                  name=probe)

        if len(insertion) > 0:
            probe_id = insertion[0]['id']
            status_note = {
                'user': one._par.ALYX_LOGIN,
                'content_type': 'probeinsertion',
                'object_id': probe_id,
                'text': f'amps_patching_local_server2: {msg}'
            }
            _ = one.alyx.rest('notes', 'create', data=status_note)
        else:
            # If the probe insertion doesn't exist, make a session note
            status_note = {
                'user': one._par.ALYX_LOGIN,
                'content_type': 'session',
                'object_id': eid,
                'text': f'amps_patching_local_server2: {probe}: {msg}'
            }
            _ = one.alyx.rest('notes', 'create', data=status_note)

    one = ONE()

    for ks2_out in ROOT_PATH.rglob('spike_sorting_ks2.log'):
        ks2_path = Path(ks2_out).parent

        # Clean up old flags if they exist
        if ks2_path.joinpath('amps_patching_local_server.flag').exists():
            ks2_path.joinpath('amps_patching_local_server.flag').unlink()

        # If we already looked at this session previously, no need to try again
        if ks2_path.joinpath('amps_patching_local_server2.flag').exists():
            continue

        # Make the flag if it is the first time looking into session
        ks2_path.joinpath('amps_patching_local_server2.flag').touch()

        # Now proceed with everything else
        session_path = alf.io.get_session_path(ks2_out)
        eid = one.eid_from_path(session_path)
        if eid is None:
            # Skip sessions that don't exist on alyx!
            continue
        probe = ks2_path.stem
        alf_path = session_path.joinpath('alf', probe)
        alf_path.mkdir(parents=True, exist_ok=True)

        # If a clusters.metrics file exists in the alf_path, delete it. Causes registration error!
        cluster_metrics = alf_path.joinpath('clusters.metrics.csv')
        if cluster_metrics.exists():
            os.remove(cluster_metrics)

        # templates.amps file only exists if it is new phy extractor
        templates_file = alf_path.joinpath('templates.amps.npy')
        if templates_file.exists():
            dset = one.alyx.rest('datasets',
                                 'list',
                                 session=eid,
                                 name='templates.amps.npy')
            # check if it has been registered for this probe specifically
            collection = [ds['collection'].rsplit('/', 1)[-1] for ds in dset]
            if probe in collection:
                continue

        # Otherwise we need to extract alf files and register datasets
        status, out, err = phy2alf_conversion(session_path, ks2_path, alf_path,
                                              probe)
        if status == 0:
            try:
                cluster_qc = EphysCellsQc(session_path, one=one)
                qc_file, df_units, drift = cluster_qc._compute_cell_qc(
                    alf_path)
                out.append(qc_file)
                cluster_qc._label_probe_qc(alf_path, df_units, drift)
                register_dataset(out, one=one)
                add_note_to_insertion(eid, probe, one, msg='completed')
                _logger.info(f'All good: {session_path} and {probe}')
            except BaseException as err2:
                _logger.info(
                    f'Errored at qc/ registration stage: {session_path} and {probe}'
                )
                add_note_to_insertion(eid, probe, one, msg=err2)
        else:
            # Log the error
            add_note_to_insertion(eid, probe, one, msg=err)
            continue