Exemplo n.º 1
0
def yuta2nwb(session_path='/Users/bendichter/Desktop/Buzsaki/SenzaiBuzsaki2017/YutaMouse41/YutaMouse41-150903',
             subject_xls=None, include_spike_waveforms=True, stub=True):

    subject_path, session_id = os.path.split(session_path)
    fpath_base = os.path.split(subject_path)[0]
    identifier = session_id
    mouse_number = session_id[9:11]
    if '-' in session_id:
        subject_id, date_text = session_id.split('-')
        b = False
    else:
        subject_id, date_text = session_id.split('b')
        b = True

    if subject_xls is None:
        subject_xls = os.path.join(subject_path, 'YM' + mouse_number + ' exp_sheet.xlsx')
    else:
        if not subject_xls[-4:] == 'xlsx':
            subject_xls = os.path.join(subject_xls, 'YM' + mouse_number + ' exp_sheet.xlsx')

    session_start_time = dateparse(date_text, yearfirst=True)

    df = pd.read_excel(subject_xls)

    subject_data = {}
    for key in ['genotype', 'DOB', 'implantation', 'Probe', 'Surgery', 'virus injection', 'mouseID']:
        names = df.iloc[:, 0]
        if key in names.values:
            subject_data[key] = df.iloc[np.argmax(names == key), 1]

    if isinstance(subject_data['DOB'], datetime):
        age = session_start_time - subject_data['DOB']
    else:
        age = None

    subject = Subject(subject_id=subject_id, age=str(age),
                      genotype=subject_data['genotype'],
                      species='mouse')

    nwbfile = NWBFile(session_description='mouse in open exploration and theta maze',
                      identifier=identifier,
                      session_start_time=session_start_time.astimezone(),
                      file_create_date=datetime.now().astimezone(),
                      experimenter='Yuta Senzai',
                      session_id=session_id,
                      institution='NYU',
                      lab='Buzsaki',
                      subject=subject,
                      related_publications='DOI:10.1016/j.neuron.2016.12.011')

    print('reading and writing raw position data...', end='', flush=True)
    ns.add_position_data(nwbfile, session_path)

    shank_channels = ns.get_shank_channels(session_path)[:8]
    all_shank_channels = np.concatenate(shank_channels)

    print('setting up electrodes...', end='', flush=True)
    hilus_csv_path = os.path.join(fpath_base, 'early_session_hilus_chans.csv')
    lfp_channel = get_reference_elec(subject_xls, hilus_csv_path, session_start_time, session_id, b=b)
    print(lfp_channel)
    custom_column = [{'name': 'theta_reference',
                      'description': 'this electrode was used to calculate LFP canonical bands',
                      'data': all_shank_channels == lfp_channel}]
    ns.write_electrode_table(nwbfile, session_path, custom_columns=custom_column, max_shanks=max_shanks)

    print('reading LFPs...', end='', flush=True)
    lfp_fs, all_channels_data = ns.read_lfp(session_path, stub=stub)

    lfp_data = all_channels_data[:, all_shank_channels]
    print('writing LFPs...', flush=True)
    # lfp_data[:int(len(lfp_data)/4)]
    lfp_ts = ns.write_lfp(nwbfile, lfp_data, lfp_fs, name='lfp',
                          description='lfp signal for all shank electrodes')

    for name, channel in special_electrode_dict.items():
        ts = TimeSeries(name=name, description='environmental electrode recorded inline with neural data',
                        data=all_channels_data[channel], rate=lfp_fs, unit='V', conversion=np.nan, resolution=np.nan)
        nwbfile.add_acquisition(ts)

    # compute filtered LFP
    print('filtering LFP...', end='', flush=True)
    all_lfp_phases = []
    for passband in ('theta', 'gamma'):
        lfp_fft = filter_lfp(lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_fs, passband=passband)
        lfp_phase, _ = hilbert_lfp(lfp_fft)
        all_lfp_phases.append(lfp_phase[:, np.newaxis])
    data = np.dstack(all_lfp_phases)
    print('done.', flush=True)

    if include_spike_waveforms:
        print('writing waveforms...', end='', flush=True)
        for shankn in np.arange(1, 9, dtype=int):
            ns.write_spike_waveforms(nwbfile, session_path, shankn, stub=stub)
        print('done.', flush=True)

    decomp_series = DecompositionSeries(name='LFPDecompositionSeries',
                                        description='Theta and Gamma phase for reference LFP',
                                        data=data, rate=lfp_fs,
                                        source_timeseries=lfp_ts,
                                        metric='phase', unit='radians')
    decomp_series.add_band(band_name='theta', band_limits=(4, 10))
    decomp_series.add_band(band_name='gamma', band_limits=(30, 80))

    check_module(nwbfile, 'ecephys', 'contains processed extracellular electrophysiology data').add_data_interface(decomp_series)

    [nwbfile.add_stimulus(x) for x in ns.get_events(session_path)]

    # create epochs corresponding to experiments/environments for the mouse

    sleep_state_fpath = os.path.join(session_path, '{}--StatePeriod.mat'.format(session_id))

    exist_pos_data = any(os.path.isfile(os.path.join(session_path, '{}__{}.mat'.format(session_id, task_type['name'])))
                         for task_type in task_types)

    if exist_pos_data:
        nwbfile.add_epoch_column('label', 'name of epoch')

    for task_type in task_types:
        label = task_type['name']

        file = os.path.join(session_path, session_id + '__' + label + '.mat')
        if os.path.isfile(file):
            print('loading position for ' + label + '...', end='', flush=True)

            pos_obj = Position(name=label + '_position')

            matin = loadmat(file)
            tt = matin['twhl_norm'][:, 0]
            exp_times = find_discontinuities(tt)

            if 'conversion' in task_type:
                conversion = task_type['conversion']
            else:
                conversion = np.nan

            for pos_type in ('twhl_norm', 'twhl_linearized'):
                if pos_type in matin:
                    pos_data_norm = matin[pos_type][:, 1:]

                    spatial_series_object = SpatialSeries(
                        name=label + '_{}_spatial_series'.format(pos_type),
                        data=H5DataIO(pos_data_norm, compression='gzip'),
                        reference_frame='unknown', conversion=conversion,
                        resolution=np.nan,
                        timestamps=H5DataIO(tt, compression='gzip'))
                    pos_obj.add_spatial_series(spatial_series_object)

            check_module(nwbfile, 'behavior', 'contains processed behavioral data').add_data_interface(pos_obj)
            for i, window in enumerate(exp_times):
                nwbfile.add_epoch(start_time=window[0], stop_time=window[1],
                                  label=label + '_' + str(i))
            print('done.')

    # there are occasional mismatches between the matlab struct and the neuroscope files
    # regions: 3: 'CA3', 4: 'DG'

    df_unit_features = get_UnitFeatureCell_features(fpath_base, session_id, session_path)

    celltype_names = []
    for celltype_id, region_id in zip(df_unit_features['fineCellType'].values,
                                      df_unit_features['region'].values):
        if celltype_id == 1:
            if region_id == 3:
                celltype_names.append('pyramidal cell')
            elif region_id == 4:
                celltype_names.append('granule cell')
            else:
                raise Exception('unknown type')
        elif not np.isfinite(celltype_id):
            celltype_names.append('missing')
        else:
            celltype_names.append(celltype_dict[celltype_id])

    custom_unit_columns = [
        {
            'name': 'cell_type',
            'description': 'name of cell type',
            'data': celltype_names},
        {
            'name': 'global_id',
            'description': 'global id for cell for entire experiment',
            'data': df_unit_features['unitID'].values},
        {
            'name': 'max_electrode',
            'description': 'electrode that has the maximum amplitude of the waveform',
            'data': get_max_electrodes(nwbfile, session_path),
            'table': nwbfile.electrodes
        }]

    ns.add_units(nwbfile, session_path, custom_unit_columns, max_shanks=max_shanks)

    trialdata_path = os.path.join(session_path, session_id + '__EightMazeRun.mat')
    if os.path.isfile(trialdata_path):
        trials_data = loadmat(trialdata_path)['EightMazeRun']

        trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat')
        trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]]

        features = trialdatainfo[:7]
        features[:2] = 'start_time', 'stop_time',
        [nwbfile.add_trial_column(x, 'description') for x in features[4:] + ['condition']]

        for trial_data in trials_data:
            if trial_data[3]:
                cond = 'run_left'
            else:
                cond = 'run_right'
            nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1], condition=cond,
                              error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6])
    """
    mono_syn_fpath = os.path.join(session_path, session_id+'-MonoSynConvClick.mat')

    matin = loadmat(mono_syn_fpath)
    exc = matin['FinalExcMonoSynID']
    inh = matin['FinalInhMonoSynID']

    #exc_obj = CatCellInfo(name='excitatory_connections',
    #                      indices_values=[], cell_index=exc[:, 0] - 1, indices=exc[:, 1] - 1)
    #module_cellular.add_container(exc_obj)
    #inh_obj = CatCellInfo(name='inhibitory_connections',
    #                      indices_values=[], cell_index=inh[:, 0] - 1, indices=inh[:, 1] - 1)
    #module_cellular.add_container(inh_obj)
    """

    if os.path.isfile(sleep_state_fpath):
        matin = loadmat(sleep_state_fpath)['StatePeriod']

        table = TimeIntervals(name='states', description='sleep states of animal')
        table.add_column(name='label', description='sleep state')

        data = []
        for name in matin.dtype.names:
            for row in matin[name][0][0]:
                data.append({'start_time': row[0], 'stop_time': row[1], 'label': name})
        [table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time'])]

        check_module(nwbfile, 'behavior', 'contains behavioral data').add_data_interface(table)

    if stub:
        out_fname = session_path + '_stub.nwb'
    else:
        out_fname = session_path + '.nwb'

    print('writing NWB file...', end='', flush=True)
    with NWBHDF5IO(out_fname, mode='w') as io:
        io.write(nwbfile)
    print('done.')

    print('testing read...', end='', flush=True)
    # test read
    with NWBHDF5IO(out_fname, mode='r') as io:
        io.read()
    print('done.')
Exemplo n.º 2
0
    def run_conversion(self,
                       nwbfile: NWBFile,
                       metadata_dict: dict,
                       stub_test: bool = False):
        session_path = Path(self.source_data["folder_path"])
        session_id = session_path.stem

        # Stimuli
        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        # States
        sleep_state_fpath = session_path / f"{session_id}.SleepState.states.mat"
        # label renaming specific to Peyrache
        state_label_names = dict(WAKEstate="Awake",
                                 NREMstate="Non-REM",
                                 REMstate="REM")
        if sleep_state_fpath.is_file():
            matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0]

            table = TimeIntervals(name="states",
                                  description="Sleep states of animal.")
            table.add_column(name="label", description="Sleep state.")

            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append(
                        dict(start_time=row[0],
                             stop_time=row[1],
                             label=state_label_names[name]))
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x["start_time"])
            ]
            check_module(nwbfile, "behavior",
                         "Contains behavioral data.").add(table)

        # Position
        pos_names = ["RedLED", "BlueLED"]
        pos_idx_from = [0, 2]
        pos_idx_to = [2, 4]

        # Raw position
        whlfile_path = session_path / f"{session_id}.whl"
        whl_data = np.loadtxt(whlfile_path)
        for name, idx_from, idx_to in zip(pos_names, pos_idx_from, pos_idx_to):
            nwbfile.add_acquisition(
                peyrache_spatial_series(
                    name=name,
                    description=
                    "Raw sensor data. Values of -1 indicate that LED detection failed.",
                    data=whl_data[:, idx_from:idx_to],
                    conversion=np.nan,  # whl file is in arbitrary grid units
                ))

        # Processed position
        posfile_path = session_path / f"{session_id}.pos"
        if posfile_path.is_file(
        ):  # at least Mouse32-140820 was missing a .pos file
            try:
                pos_data = np.loadtxt(posfile_path)
                pos_obj = Position(name="SubjectPosition")
                for name, idx_from, idx_to in zip(pos_names, pos_idx_from,
                                                  pos_idx_to):
                    pos_obj.add_spatial_series(
                        peyrache_spatial_series(
                            name=name,
                            description=
                            ("(x,y) coordinates tracking subject movement through the maze."
                             "Values of -1 indicate that LED detection failed."
                             ),
                            data=pos_data[:, idx_from:idx_to],
                            conversion=1e-2,  # from cm to m
                        ))
                check_module(nwbfile, "behavior",
                             "Contains behavioral data.").add(pos_obj)
            except ValueError:  # data issue present in at least Mouse17-170201
                warn(f"Skipping .pos file for session {session_id}!")

        # Epochs - only available for sessions with raw data
        epoch_file = session_path / "raw" / f"{session_id}-raw-info" / f"{session_id}-behaviors.txt"
        if epoch_file.is_file():
            epoch_data = pd.read_csv(epoch_file, header=1)[f"{session_id}:"]
            epoch_dat_inds = []
            epoch_names = []
            for epochs in epoch_data:
                inds, name = epochs.split(": ")
                epoch_dat_inds.append(inds.split(" "))
                epoch_names.append(name)

            epoch_windows = [0]
            for epoch in epoch_dat_inds:
                exp_end_times = []
                for dat_ind in epoch:
                    recording_file = session_path / "raw" / f"{session_id}{dat_ind}.dat"
                    info_extractor = NeuroscopeRecordingExtractor(
                        recording_file)
                    dat_end_time = info_extractor.get_num_frames(
                    ) / info_extractor.get_sampling_frequency()  # seconds
                    exp_end_times.extend([dat_end_time])
                epoch_windows.extend([epoch_windows[-1] + sum(exp_end_times)] *
                                     2)
            epoch_windows = np.array(epoch_windows[:-1]).reshape(-1, 2)

            for j, epoch_name in enumerate(epoch_names):
                nwbfile.add_epoch(start_time=epoch_windows[j][0],
                                  stop_time=epoch_windows[j][1],
                                  tags=[epoch_name])
Exemplo n.º 3
0
    def convert_data(
        self,
        nwbfile: NWBFile,
        metadata_dict: dict,
        stub_test: bool = False,
        include_spike_waveforms: bool = False,
    ):
        """Convert the behavioral portion of a particular session of the GrosmarkAD dataset."""
        session_path = self.input_args["folder_path"]
        subject_path, session_id = os.path.split(session_path)

        # Stimuli
        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        # States
        sleep_state_fpath = os.path.join(session_path,
                                         "{session_id}.SleepState.states.mat")
        # label renaming specific to Watson
        state_label_names = dict(WAKEstate="Awake",
                                 NREMstate="Non-REM",
                                 REMstate="REM")
        if os.path.isfile(sleep_state_fpath):
            matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0]

            table = TimeIntervals(name="states",
                                  description="Sleep states of animal.")
            table.add_column(name="label", description="Sleep state.")

            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append(
                        dict(
                            start_time=row[0],
                            stop_time=row[1],
                            label=state_label_names[name],
                        ))
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x["start_time"])
            ]
            check_module(nwbfile, "behavior",
                         "contains behavioral data").add_data_interface(table)

        # Position
        pos_filepath = Path(
            session_path) / f"{session_id}.position.behavior.mat"
        pos_mat = loadmat(str(pos_filepath.absolute()))
        starting_time = float(
            pos_mat["position"]["timestamps"][0][0]
            [0])  # confirmed to be a regularly sampled series
        rate = float(
            pos_mat["position"]["timestamps"][0][0][1]) - starting_time
        if pos_mat["position"]["units"][0][0][0] == "m":
            conversion = 1.0
        else:
            warnings.warn(
                f"Spatial units ({pos_mat['position']['units'][0][0][0]}) not listed in meters; "
                "setting conversion to nan.")
            conversion = np.nan
        pos_data = [[x[0], y[0]] for x, y in zip(
            pos_mat["position"]["position"][0][0]["x"][0][0],
            pos_mat["position"]["position"][0][0]["y"][0][0],
        )]
        linearized_data = [[
            lin[0]
        ] for lin in pos_mat["position"]["position"][0][0]["lin"][0][0]]

        label = pos_mat["position"]["behaviorinfo"][0][0]["MazeType"][0][0][
            0].replace(" ", "")
        pos_obj = Position(name=f"{label}Position")
        spatial_series_object = SpatialSeries(
            name=f"{label}SpatialSeries",
            description=
            "(x,y) coordinates tracking subject movement through the maze.",
            data=H5DataIO(pos_data, compression="gzip"),
            reference_frame="unknown",
            conversion=conversion,
            starting_time=starting_time,
            rate=rate,
            resolution=np.nan,
        )
        pos_obj.add_spatial_series(spatial_series_object)
        check_module(
            nwbfile, "behavior",
            "contains processed behavioral data").add_data_interface(pos_obj)

        lin_pos_obj = Position(name=f"{label}LinearizedPosition")
        lin_spatial_series_object = SpatialSeries(
            name=f"{label}LinearizedTimeSeries",
            description=
            "Linearized position, defined as starting at the edge of reward area, "
            "and increasing clockwise, terminating at the opposing edge of the reward area.",
            data=H5DataIO(linearized_data, compression="gzip"),
            reference_frame="unknown",
            conversion=conversion,
            starting_time=starting_time,
            rate=rate,
            resolution=np.nan,
        )
        lin_pos_obj.add_spatial_series(lin_spatial_series_object)
        check_module(nwbfile, "behavior",
                     "contains processed behavioral data").add_data_interface(
                         lin_pos_obj)

        # Epochs
        epoch_names = list(pos_mat["position"]["Epochs"][0][0].dtype.names)
        epoch_windows = [[float(start), float(stop)]
                         for x in pos_mat["position"]["Epochs"][0][0][0][0]
                         for start, stop in x]
        nwbfile.add_epoch_column("label", "name of epoch")
        for j, epoch_name in enumerate(epoch_names):
            nwbfile.add_epoch(
                start_time=epoch_windows[j][0],
                stop_time=epoch_windows[j][1],
                label=epoch_name,
            )
Exemplo n.º 4
0
    def convert_data(
        self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False, include_spike_waveforms: bool = False
    ):
        session_path = self.input_args["folder_path"]
        # TODO: check/enforce format?
        task_types = metadata_dict.get("task_types", [])

        subject_path, session_id = os.path.split(session_path)
        fpath_base = os.path.split(subject_path)[0]

        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        exist_pos_data = any(
            os.path.isfile(os.path.join(session_path, "{}__{}.mat".format(session_id, task_type["name"])))
            for task_type in task_types
        )

        if exist_pos_data:
            nwbfile.add_epoch_column("label", "name of epoch")

        for task_type in task_types:
            label = task_type["name"]

            file = os.path.join(session_path, session_id + "__" + label + ".mat")
            if os.path.isfile(file):
                pos_obj = Position(name=label + "_position")

                matin = loadmat(file)
                tt = matin["twhl_norm"][:, 0]
                exp_times = find_discontinuities(tt)

                if "conversion" in task_type:
                    conversion = task_type["conversion"]
                else:
                    conversion = np.nan

                for pos_type in ("twhl_norm", "twhl_linearized"):
                    if pos_type in matin:
                        pos_data_norm = matin[pos_type][:, 1:]

                        spatial_series_object = SpatialSeries(
                            name=label + "_{}_spatial_series".format(pos_type),
                            data=H5DataIO(pos_data_norm, compression="gzip"),
                            reference_frame="unknown",
                            conversion=conversion,
                            resolution=np.nan,
                            timestamps=H5DataIO(tt, compression="gzip"),
                        )
                        pos_obj.add_spatial_series(spatial_series_object)

                check_module(nwbfile, "behavior", "contains processed behavioral data").add_data_interface(pos_obj)
                for i, window in enumerate(exp_times):
                    nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + "_" + str(i))

        trialdata_path = os.path.join(session_path, session_id + "__EightMazeRun.mat")
        if os.path.isfile(trialdata_path):
            trials_data = loadmat(trialdata_path)["EightMazeRun"]

            trialdatainfo_path = os.path.join(fpath_base, "EightMazeRunInfo.mat")
            trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)["EightMazeRunInfo"][0]]

            features = trialdatainfo[:7]
            features[:2] = (
                "start_time",
                "stop_time",
            )
            [nwbfile.add_trial_column(x, "description") for x in features[4:] + ["condition"]]

            for trial_data in trials_data:
                if trial_data[3]:
                    cond = "run_left"
                else:
                    cond = "run_right"
                nwbfile.add_trial(
                    start_time=trial_data[0],
                    stop_time=trial_data[1],
                    condition=cond,
                    error_run=trial_data[4],
                    stim_run=trial_data[5],
                    both_visit=trial_data[6],
                )

        sleep_state_fpath = os.path.join(session_path, "{}.SleepState.states.mat".format(session_id))
        # label renaming specific to Watson
        state_label_names = {"WAKEstate": "Awake", "NREMstate": "Non-REM", "REMstate": "REM"}
        if os.path.isfile(sleep_state_fpath):
            matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0]

            table = TimeIntervals(name="states", description="Sleep states of animal.")
            table.add_column(name="label", description="Sleep state.")

            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append({"start_time": row[0], "stop_time": row[1], "label": state_label_names[name]})
            [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])]

            check_module(nwbfile, "behavior", "contains behavioral data").add_data_interface(table)
Exemplo n.º 5
0
    def run_conversion(
        self,
        nwbfile: NWBFile,
        metadata: dict,
        stub_test: bool = False,
    ):
        session_path = Path(self.source_data["folder_path"])
        session_id = session_path.name

        # Stimuli
        [
            nwbfile.add_stimulus(x) for x in get_events(
                session_path=session_path,
                suffixes=[".lrw.evt", ".puf.evt", ".rip.evt", ".rrw.evt"])
        ]

        # Epochs
        df = pd.read_csv(session_path / f"{session_id}.cat.evt",
                         sep=" ",
                         names=("time", "begin_or_end", "of", "epoch_name"))
        epoch_starts = []
        for j in range(int(len(df) / 2)):
            epoch_starts.append(df["time"][2 * j])
            nwbfile.add_epoch(start_time=epoch_starts[j],
                              stop_time=df["time"][2 * j + 1],
                              tags=[df["epoch_name"][2 * j][18:]])

        # Trials
        trialdata_path = session_path / f"{session_id}-TrackRunTimes.mat"
        if trialdata_path.is_file():
            trials_data = loadmat(trialdata_path)["trackruntimes"]
            for trial_data in trials_data:
                nwbfile.add_trial(start_time=trial_data[0],
                                  stop_time=trial_data[1])

        # .whl position
        whl_files = []
        for whl_file in whl_files:
            add_position_data(nwbfile=nwbfile,
                              session_path=session_path,
                              whl_file_path=whl_file,
                              starting_time=epoch_starts[j])

        # States
        sleep_state_fpath = session_path / f"{session_id}.SleepState.states.mat"
        # label renaming
        state_label_names = dict(WAKEstate="Awake",
                                 NREMstate="Non-REM",
                                 REMstate="REM")
        if sleep_state_fpath.is_file():
            matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0]

            table = TimeIntervals(name="states",
                                  description="Sleep states of animal.")
            table.add_column(name="label", description="Sleep state.")

            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append(
                        dict(start_time=row[0],
                             stop_time=row[1],
                             label=state_label_names[name]))
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x["start_time"])
            ]
            check_module(nwbfile, "behavior",
                         "Contains behavioral data.").add(table)
Exemplo n.º 6
0
    def run_conversion(self,
                       nwbfile: NWBFile,
                       metadata: dict,
                       stub_test: bool = False):
        session_path = Path(self.source_data["folder_path"])
        task_types = [
            dict(name="OpenFieldPosition_ExtraLarge"),
            dict(name="OpenFieldPosition_New_Curtain", conversion=0.46),
            dict(name="OpenFieldPosition_New", conversion=0.46),
            dict(name="OpenFieldPosition_Old_Curtain", conversion=0.46),
            dict(name="OpenFieldPosition_Old", conversion=0.46),
            dict(name="OpenFieldPosition_Oldlast", conversion=0.46),
            dict(name="EightMazePosition", conversion=0.65 / 2),
        ]

        subject_path = session_path.parent
        session_id = session_path.stem

        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        sleep_state_fpath = session_path / f"{session_id}--StatePeriod.mat"

        exist_pos_data = any([
            (session_path / "{session_id}__{task_type['name']}.mat").is_file()
            for task_type in task_types
        ])
        if exist_pos_data:
            nwbfile.add_epoch_column("label", "Name of epoch.")

        # Epoch intervals
        for task_type in task_types:
            label = task_type["name"]

            file = session_path / f"{session_id}__{label}.mat"
            if file.is_file():
                pos_obj = Position(name=f"{label}_position")

                matin = loadmat(file)
                tt = matin["twhl_norm"][:, 0]
                exp_times = find_discontinuities(tt)

                if "conversion" in task_type:
                    conversion = task_type["conversion"]
                else:
                    conversion = np.nan

                for pos_type in ("twhl_norm", "twhl_linearized"):
                    if pos_type in matin:
                        pos_data_norm = matin[pos_type][:, 1:]

                        spatial_series_object = SpatialSeries(
                            name=f"{label}_{pos_type}_spatial_series",
                            data=H5DataIO(pos_data_norm, compression="gzip"),
                            reference_frame="unknown",
                            conversion=conversion,
                            resolution=np.nan,
                            timestamps=H5DataIO(tt, compression="gzip"),
                        )
                        pos_obj.add_spatial_series(spatial_series_object)

                check_module(
                    nwbfile, "behavior",
                    "Contains processed behavioral data.").add_data_interface(
                        pos_obj)
                for i, window in enumerate(exp_times):
                    nwbfile.add_epoch(
                        start_time=window[0],
                        stop_time=window[1],
                        tags=f"{label}_{str(i)}",
                    )

        # Trial intervals
        trialdata_path = session_path / f"{session_id}__EightMazeRun.mat"
        if trialdata_path.is_file():
            trials_data = loadmat(trialdata_path)["EightMazeRun"]

            trialdatainfo_path = subject_path / "EightMazeRunInfo.mat"
            trialdatainfo = [
                x[0]
                for x in loadmat(trialdatainfo_path)["EightMazeRunInfo"][0]
            ]

            features = trialdatainfo[:7]
            features[:2] = (
                "start_time",
                "stop_time",
            )
            [
                nwbfile.add_trial_column(x, "description")
                for x in features[4:] + ["condition"]
            ]

            for trial_data in trials_data:
                if trial_data[3]:
                    cond = "run_left"
                else:
                    cond = "run_right"
                nwbfile.add_trial(
                    start_time=trial_data[0],
                    stop_time=trial_data[1],
                    condition=cond,
                    error_run=trial_data[4],
                    stim_run=trial_data[5],
                    both_visit=trial_data[6],
                )

        # SLeep states
        if sleep_state_fpath.is_file():
            matin = loadmat(sleep_state_fpath)["StatePeriod"]
            table = TimeIntervals(name="states",
                                  description="sleep states of animal")
            table.add_column(name="label", description="sleep state")
            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append(
                        dict(start_time=row[0], stop_time=row[1], label=name))
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x["start_time"])
            ]
            check_module(nwbfile, "behavior",
                         "Contains behavioral data.").add_data_interface(table)
Exemplo n.º 7
0
    def run_conversion(self, nwbfile: NWBFile, metadata: dict):
        session_path = Path(self.source_data["folder_path"])
        session_id = session_path.stem

        # Load the file with behavioral data
        behavior_file_path = Path(session_path) / f"{session_id}.behavior.mat"
        behavior_mat = read_matlab_file(str(behavior_file_path))["behavior"]

        # Add trials
        events = behavior_mat["events"]
        trial_interval_list = events["trialIntervals"]

        data = []
        for start_time, stop_time in trial_interval_list:
            data.append(
                dict(
                    start_time=float(start_time),
                    stop_time=float(stop_time),
                ))
        [
            nwbfile.add_trial(**row)
            for row in sorted(data, key=lambda x: x["start_time"])
        ]

        trial_list = events["trials"]
        direction_list = [trial.get("direction", "") for trial in trial_list]
        trial_type_list = [trial.get("type", "") for trial in trial_list]

        if not all([direction == "" for direction in direction_list]):
            nwbfile.add_trial_column(name="direction",
                                     description="direction of the trial",
                                     data=direction_list)

        if not all([trial_type == "" for trial_type in trial_type_list]):
            nwbfile.add_trial_column(name="trial_type",
                                     description="type of trial",
                                     data=trial_type_list)

        # Position
        module_name = "behavior"
        module_description = "Contains behavioral data concerning position."
        processing_module = get_module(nwbfile=nwbfile,
                                       name=module_name,
                                       description=module_description)

        timestamps = np.array(behavior_mat["timestamps"])[..., 0]

        position = behavior_mat["position"]
        pos_data = [[x, y, z]
                    for (x, y,
                         z) in zip(position["x"], position["y"], position["y"])
                    ]
        pos_data = np.array(pos_data)[..., 0]

        unit = behavior_mat.get("units", "")

        if unit == ["m", "meter", "meters"]:
            conversion = 1.0
        else:
            warnings.warn(f"Spatial units {unit} not listed in meters; "
                          "setting conversion to nan.")
            conversion = np.nan

        description = behavior_mat.get("description",
                                       "generic_position_tracking").replace(
                                           "/", "-")
        rotation_type = behavior_mat.get("rotationType", "non_specified")

        pos_obj = Position(name=f"{description}_task".replace(" ", "_"))

        spatial_series_object = SpatialSeries(
            name="position",
            description="(x,y,z) coordinates tracking subject movement.",
            data=H5DataIO(pos_data, compression="gzip"),
            reference_frame="unknown",
            unit=unit,
            conversion=conversion,
            timestamps=timestamps,
            resolution=np.nan,
        )

        pos_obj.add_spatial_series(spatial_series_object)

        # Add error if available
        errorPerMarker = behavior_mat.get("errorPerMarker", None)
        if errorPerMarker:
            error_data = np.array([error for error in errorPerMarker])[..., 0]

            spatial_series_object = SpatialSeries(
                name="error_per_marker",
                description=
                "Estimated error for marker tracking from optitrack system.",
                data=H5DataIO(error_data, compression="gzip"),
                reference_frame="unknown",
                conversion=conversion,
                timestamps=timestamps,
                resolution=np.nan,
            )
            pos_obj.add_spatial_series(spatial_series_object)

        processing_module.add_data_interface(pos_obj)

        # Compass
        try:
            orientation = behavior_mat["orientation"]
            orientation_data = [[
                x, y, z, w
            ] for (x, y, z, w) in zip(orientation["x"], orientation["y"],
                                      orientation["z"], orientation["w"])]
            orientation_data = np.array(orientation_data)[..., 0]

            compass_obj = CompassDirection(name=f"allocentric_frame_tracking")

            spatial_series_object = SpatialSeries(
                name="orientation",
                description=
                f"(x, y, z, w) orientation coordinates, orientation type: {rotation_type}",
                data=H5DataIO(orientation_data, compression="gzip"),
                reference_frame="unknown",
                conversion=conversion,
                timestamps=timestamps,
                resolution=np.nan,
            )
            compass_obj.add_spatial_series(spatial_series_object)
            processing_module.add_data_interface(compass_obj)

        except KeyError:
            warnings.warn(f"Orientation data not found")

        # States
        module_name = "ecephys"
        module_description = "Contains behavioral data concerning classified states."
        processing_module = get_module(nwbfile=nwbfile,
                                       name=module_name,
                                       description=module_description)

        # Sleep states
        sleep_file_path = session_path / f"{session_id}.SleepState.states.mat"
        if Path(sleep_file_path).exists():
            mat_file = read_matlab_file(sleep_file_path)

            state_label_names = dict(WAKEstate="Awake",
                                     NREMstate="Non-REM",
                                     REMstate="REM",
                                     MAstate="MA")
            sleep_state_dic = mat_file["SleepState"]["ints"]
            table = TimeIntervals(name="sleep_states",
                                  description="Sleep state of the animal.")
            table.add_column(name="label", description="Sleep state.")

            data = []
            for sleep_state in state_label_names:
                values = sleep_state_dic[sleep_state]
                if len(values) != 0 and isinstance(values[0], int):
                    values = [values]
                for start_time, stop_time in values:
                    data.append(
                        dict(
                            start_time=float(start_time),
                            stop_time=float(stop_time),
                            label=state_label_names[sleep_state],
                        ))
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x["start_time"])
            ]
            processing_module.add(table)

        # Add epochs
        lfp_file_path = session_path / f"{session_path.name}.lfp"
        raw_file_path = session_path / f"{session_id}.dat"
        xml_file_path = session_path / f"{session_id}.xml"

        if raw_file_path.is_file():
            recorder = NeuroscopeRecordingExtractor(
                file_path=raw_file_path, xml_file_path=xml_file_path)
        else:
            recorder = NeuroscopeRecordingExtractor(
                file_path=lfp_file_path, xml_file_path=xml_file_path)

        num_frames = recorder.get_num_frames()
        sampling_frequency = recorder.get_sampling_frequency()
        end_of_the_session = num_frames / sampling_frequency

        session_start = 0.0
        start_trials_time = min(
            [interval[0] for interval in trial_interval_list])
        end_trials_time = max(
            [interval[1] for interval in trial_interval_list])
        end_of_the_session = end_of_the_session

        nwbfile.add_epoch(start_time=session_start,
                          stop_time=start_trials_time,
                          tags="before trials")
        nwbfile.add_epoch(start_time=start_trials_time,
                          stop_time=end_trials_time,
                          tags="during trials")
        nwbfile.add_epoch(start_time=end_trials_time,
                          stop_time=end_of_the_session,
                          tags="after trials")
Exemplo n.º 8
0
    def convert_data(self,
                     nwbfile: NWBFile,
                     metadata_dict: dict,
                     stub_test: bool = False,
                     include_spike_waveforms: bool = False):
        session_path = self.input_args['folder_path']
        # TODO: check/enforce format?
        task_types = metadata_dict['task_types']

        subject_path, session_id = os.path.split(session_path)
        fpath_base = os.path.split(subject_path)[0]

        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        sleep_state_fpath = os.path.join(
            session_path, '{}--StatePeriod.mat'.format(session_id))

        exist_pos_data = any(
            os.path.isfile(
                os.path.join(
                    session_path, '{}__{}.mat'.format(session_id,
                                                      task_type['name'])))
            for task_type in task_types)

        if exist_pos_data:
            nwbfile.add_epoch_column('label', 'name of epoch')

        for task_type in task_types:
            label = task_type['name']

            file = os.path.join(session_path,
                                session_id + '__' + label + '.mat')
            if os.path.isfile(file):
                pos_obj = Position(name=label + '_position')

                matin = loadmat(file)
                tt = matin['twhl_norm'][:, 0]
                exp_times = find_discontinuities(tt)

                if 'conversion' in task_type:
                    conversion = task_type['conversion']
                else:
                    conversion = np.nan

                for pos_type in ('twhl_norm', 'twhl_linearized'):
                    if pos_type in matin:
                        pos_data_norm = matin[pos_type][:, 1:]

                        spatial_series_object = SpatialSeries(
                            name=label + '_{}_spatial_series'.format(pos_type),
                            data=H5DataIO(pos_data_norm, compression='gzip'),
                            reference_frame='unknown',
                            conversion=conversion,
                            resolution=np.nan,
                            timestamps=H5DataIO(tt, compression='gzip'))
                        pos_obj.add_spatial_series(spatial_series_object)

                check_module(
                    nwbfile, 'behavior',
                    'contains processed behavioral data').add_data_interface(
                        pos_obj)
                for i, window in enumerate(exp_times):
                    nwbfile.add_epoch(start_time=window[0],
                                      stop_time=window[1],
                                      label=label + '_' + str(i))

        trialdata_path = os.path.join(session_path,
                                      session_id + '__EightMazeRun.mat')
        if os.path.isfile(trialdata_path):
            trials_data = loadmat(trialdata_path)['EightMazeRun']

            trialdatainfo_path = os.path.join(fpath_base,
                                              'EightMazeRunInfo.mat')
            trialdatainfo = [
                x[0]
                for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]
            ]

            features = trialdatainfo[:7]
            features[:2] = 'start_time', 'stop_time',
            [
                nwbfile.add_trial_column(x, 'description')
                for x in features[4:] + ['condition']
            ]

            for trial_data in trials_data:
                if trial_data[3]:
                    cond = 'run_left'
                else:
                    cond = 'run_right'
                nwbfile.add_trial(start_time=trial_data[0],
                                  stop_time=trial_data[1],
                                  condition=cond,
                                  error_run=trial_data[4],
                                  stim_run=trial_data[5],
                                  both_visit=trial_data[6])

        if os.path.isfile(sleep_state_fpath):
            matin = loadmat(sleep_state_fpath)['StatePeriod']

            table = TimeIntervals(name='states',
                                  description='sleep states of animal')
            table.add_column(name='label', description='sleep state')

            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append({
                        'start_time': row[0],
                        'stop_time': row[1],
                        'label': name
                    })
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x['start_time'])
            ]

            check_module(nwbfile, 'behavior',
                         'contains behavioral data').add_data_interface(table)
Exemplo n.º 9
0
    def run_conversion(self, nwbfile: NWBFile, metadata: dict):
        session_path = Path(self.source_data["folder_path"])
        session_id = session_path.stem

        module_name = "Neural states"
        module_description = "Contains behavioral data concerning classified states."
        processing_module = get_module(nwbfile=nwbfile,
                                       name=module_name,
                                       description=module_description)

        # Sleep states
        sleep_file_path = session_path / f"{session_id}.SleepState.states.mat"
        if Path(sleep_file_path).exists():
            mat_file = read_matlab_file(sleep_file_path)

            state_label_names = dict(WAKEstate="Awake",
                                     NREMstate="Non-REM",
                                     REMstate="REM",
                                     MAstate="MA")
            sleep_state_dic = mat_file["SleepState"]["ints"]
            table = TimeIntervals(name="Sleep states",
                                  description="Sleep state of the animal.")
            table.add_column(name="label", description="Sleep state.")

            data = []
            for sleep_state in state_label_names:
                values = sleep_state_dic[sleep_state]
                if len(values) != 0 and isinstance(values[0], int):
                    values = [values]
                for start_time, stop_time in values:
                    data.append(
                        dict(
                            start_time=float(start_time),
                            stop_time=float(stop_time),
                            label=state_label_names[sleep_state],
                        ))
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x["start_time"])
            ]
            processing_module.add(table)

        # Up and down states
        behavioral_file_path = session_path / f"{session_id}.SlowWaves.events.mat"
        behavioral_file = read_matlab_file(behavioral_file_path)
        table = TimeIntervals(
            name="Up-Down states",
            description="Up and down states classified by LFP.")
        table.add_column(name="label", description="state.")

        data = []
        up_and_down_intervals_dic = behavioral_file["SlowWaves"]["ints"]
        for state, values in up_and_down_intervals_dic.items():
            for start_time, stop_time in values:
                data.append(
                    dict(start_time=float(start_time),
                         stop_time=float(stop_time),
                         label=state))
        [
            table.add_row(**row)
            for row in sorted(data, key=lambda x: x["start_time"])
        ]
        processing_module.add(table)

        # Laser diode and visual laser
        laser_details = dict(
            LaserDiode=dict(name="Laser diode",
                            description="Laser pulses for optogenetics."),
            VisualLaser=dict(
                name="Visual laser",
                description="Laser pulses for subject stimulation."),
        )
        for laser_type, laser_detail in laser_details.items():
            laser_file_path = session_path / f"{session_id}_Pulses_{laser_type}.mat"
            if laser_file_path.exists():
                laser_file = read_matlab_file(laser_file_path)
                table = TimeIntervals(name=laser_detail["name"],
                                      description=laser_detail["description"])
                table.add_column(name="amplitude",
                                 description="Amplitude of the laser pulse.")

                data = []
                laser_pulses = laser_file["Pulses"]["periods"]
                amplitudes = laser_file["Pulses"]["amplitude"]
                for interval, amplitude in zip(laser_pulses, amplitudes):
                    data.append(
                        dict(start_time=float(interval[0]),
                             stop_time=float(interval[1]),
                             amplitude=amplitude))
                [
                    table.add_row(**row)
                    for row in sorted(data, key=lambda x: x["start_time"])
                ]
                processing_module.add(table)
Exemplo n.º 10
0
    def run_conversion(self, nwbfile: NWBFile, metadata: dict):
        """
        Primary conversion function for the custom Feldman lab behavioral interface.

        Uses the synch information in the nidq_synch_file to set trial times in NWBFile.
        """
        folder_path = Path(self.source_data["folder_path"])

        trial_numbers, stimulus_numbers, trial_times_from_nidq = get_trials_info(
            recording_nidq=SpikeGLXRecordingExtractor(
                file_path=self.source_data["nidq_synch_file"]),
            trial_ongoing_channel=self.source_data["trial_ongoing_channel"],
            event_channel=self.source_data["event_channel"])
        if trial_numbers[0] != 0:
            trial_numbers, stimulus_numbers, trial_times_from_nidq = clip_trials(
                trial_numbers=trial_numbers,
                stimulus_numbers=stimulus_numbers,
                trial_times=trial_times_from_nidq)
        header_segments = [
            x for x in folder_path.iterdir() if "header" in x.name
        ]
        first_header = read_csv(header_segments[0],
                                header=None,
                                sep="\t",
                                index_col=0).T
        nwbfile.trials = TimeIntervals(name="trials",
                                       description=str({
                                           x: y.values[0]
                                           for x, y in first_header.items()
                                       }).replace("'", "\""))

        exclude_columns = set(["TrNum", "Segment"])
        trial_csv_column_names = dict(
            StimNum="stimulus_number",
            StimLayout="stimulus_layout",
            StimOnsetTime="stimulus_onset_time",
            StimOrder="stimulus_order",
            Tone="tone",
            TrOutcome="trial_outcome",
            TrType="trial_type",
            RewardTime="reward_time",
            RWStartTime="reward_start_time",
            RWEndTime="reward_end_time",
            NLicks="number_of_licks",
            LickInWindow="licks_in_window",
            Laser="laser_is_on",
            CumVol="cumulative_volume",
            CumNRewards="cumulative_number_of_rewards",
            ISS0Time="ISS0Time",
            Arm0Time="Arm0Time")
        trial_csv_column_descriptions = dict(
            StimNum="The identifier value for stimulus type.",
            StimLayout=
            ("The index of the stimulus layout. 1=Std, 2=Trains, 3=IL, 4=Trains+IL, 5=RFMap, 6=2WC, 7=MWS, 8=MWD"
             ),
            StimOnsetTime="The time the stimulus was presented.",
            StimOrder="Index of stimulus ordering.",
            Tone="Index of the tone.",
            TrOutcome="The outcome index for each trial.",
            TrType="The type index for each trial.",
            RewardTime="Index of reward timing.",
            RWStartTime="Times when reward began.",
            RWEndTime="Times when reward ended.",
            NLicks="Number of licks.",
            LickInWindow="Number of licks in selected window.",
            Laser="Boolean indicating laser activity.",
            CumVol="Cumulative volume.",
            CumNRewards="Cumulative number of rewards.",
            ISS0Time="Data needed for spikes.mat roundtrip.",
            Arm0Time="Data needed for spikes.mat roundtrip.")
        stimulus_csv_column_names = dict(Time_ms="stimulus_times",
                                         Ampl="stimulus_amplitudes",
                                         Posn="stimulus_ordinalities")
        stimulus_column_description = dict(
            stimulus_elements="Type index of each stimulus element.",
            stimulus_times="Time of occurrence of each stimulus element.",
            stimulus_amplitudes=
            "Amplitudes for each stimulus element. Unknown units.",
            stimulus_ordinalities=
            "Ordinal position of the stimulus element in the train.",
            stimulus_rises="Rises for each stimulus element. Unknown units.",
            stimulus_gngs="GNGs for the stimulus element. Unknown units.",
            stimulus_shapes="Shape index of the stimulus element.",
            stimulus_durations="Duration of the stimulus element in seconds.",
            stimulus_probabilities=
            "Probability that the stimulus was presented; 0 if deterministic.",
            stimulus_piezo_labels=
            "Manually assigned labels to each stimulus element.")
        add_trial_columns(
            nwbfile=nwbfile,
            trial_csv_column_names=trial_csv_column_names,
            trial_csv_column_descriptions=trial_csv_column_descriptions,
            stimulus_column_names=stimulus_column_description.keys(),
            stimulus_column_description=stimulus_column_description,
            exclude_columns=exclude_columns)
        for header_segment in header_segments:
            header_data = read_csv(header_segment,
                                   header=None,
                                   sep="\t",
                                   index_col=0).T
            trial_data = read_csv(str(header_segment).replace(
                "header", "trials"),
                                  header=0,
                                  sep="\t")
            if trial_data["TrStartTime"].iloc[-1] == 0 and trial_data[
                    "TrEndTime"].iloc[-1] == 0:
                trial_data.drop(trial_data.index[-1], inplace=True)
            stimulus_data = read_csv(str(header_segment).replace(
                "header", "stimuli"),
                                     header=0,
                                     sep="\t")

            trial_segment_csv_start_times = np.array(
                trial_data.loc[:, "TrStartTime"])
            for csv_column in trial_data:
                if "Time" in csv_column and not np.all(
                        np.array(trial_data.loc[:, csv_column]) == 0):
                    trial_data.loc[:, csv_column] = (
                        (np.array(trial_data.loc[:, csv_column]) -
                         trial_segment_csv_start_times) / 1e3 +
                        trial_times_from_nidq[trial_data.loc[:, "TrNum"], 0])
            trial_data.loc[:, "Laser"] = trial_data.loc[:,
                                                        "Laser"].astype(bool)
            last_trial = 0
            m = 0
            for j, (trial, offset) in enumerate(
                    zip(stimulus_data.loc[:, "Trial"],
                        stimulus_data.loc[:, "Time_ms"])):
                if trial == last_trial:
                    m += 1
                else:
                    last_trial = trial
                    m = 1
                stimulus_data.loc[j, "Time_ms"] = trial_times_from_nidq[
                    trial, 0] + offset / 1e3 * m

            add_trials(nwbfile=nwbfile,
                       trial_times=trial_times_from_nidq,
                       trial_data=trial_data,
                       stimulus_data=stimulus_data,
                       header_data=header_data,
                       trial_csv_column_names=trial_csv_column_names,
                       stimulus_csv_column_names=stimulus_csv_column_names,
                       exclude_columns=exclude_columns)
Exemplo n.º 11
0
def run_conversion(
        fpath_in='/Volumes/easystore5T/data/Brunton/subj_01_day_4.h5',
        fpath_out='/Volumes/easystore5T/data/Brunton/subj_01_day_4.nwb',
        events_path='C:/Users/micha/Desktop/Brunton Lab Data/event_times.csv',
        r2_path='C:/Users/micha/Desktop/Brunton Lab Data/full_model_r2.npy',
        coarse_events_path='C:/Users/micha/Desktop/Brunton Lab Data/coarse_labels/coarse_labels',
        reach_features_path='C:/Users/micha/Desktop/Brunton Lab Data/behavioral_features.csv',
        elec_loc_labels_path='elec_loc_labels.csv',
        special_chans=SPECIAL_CHANNELS,
        session_description='no description'
):
    print(f"Converting {fpath_in}...")
    fname = os.path.split(os.path.splitext(fpath_in)[0])[1]
    _, subject_id, _, session = fname.split('_')

    file = File(fpath_in, 'r')

    nwbfile = NWBFile(
        session_description=session_description,
        identifier=str(uuid.uuid4()),
        session_start_time=datetime.fromtimestamp(file['start_timestamp'][()]),
        subject=Subject(subject_id=subject_id, species="H**o sapiens"),
        session_id=session
    )

    # extract electrode groups
    file_elec_col_names = file['chan_info']['axis1'][:]
    elec_data = file['chan_info']['block0_values']

    re_exp = re.compile("([ a-zA-Z]+)([0-9]+)")

    channel_labels_dset = file['chan_info']['axis0']

    group_names, group_nums = [], []
    for i, bytes_ in enumerate(channel_labels_dset):
        if bytes_ not in special_chans:
            str_ = bytes_.decode()
            res = re_exp.match(str_).groups()
            group_names.append(res[0])
            group_nums.append(int(res[1]))

    is_elec = ~np.isin(channel_labels_dset, special_chans)

    dset = DatasetView(file['dataset']).lazy_transpose()

    # add special channels
    for kwargs in (
            dict(
                name='EOGL',
                description='Electrooculography for tracking saccades - left',
            ),
            dict(
                name='EOGR',
                description='Electrooculography for tracking saccades - right',
            ),
            dict(
                name='ECGL',
                description='Electrooculography for tracking saccades - left',
            ),
            dict(
                name='ECGR',
                description='Electrooculography for tracking saccades - right',
            )
    ):
        if kwargs['name'].encode() in channel_labels_dset:
            nwbfile.add_acquisition(
                TimeSeries(
                    rate=file['f_sample'][()],
                    conversion=np.nan,
                    unit='V',
                    data=dset[:, list(channel_labels_dset).index(kwargs['name'].encode())],
                    **kwargs
                )
            )

    # add electrode groups
    df = pd.read_csv(elec_loc_labels_path)
    df_subject = df[df['subject_ID'] == 'subj' + subject_id]
    electrode_group_descriptions = {row['label']: row['long_name'] for _, row in df_subject.iterrows()}

    groups_map = dict()
    for group_name, group_description in electrode_group_descriptions.items():
        device = nwbfile.create_device(name=group_name)
        groups_map[group_name] = nwbfile.create_electrode_group(
            name=group_name,
            description=group_description,
            device=device,
            location='unknown'
        )

    # add required cols to electrodes table
    for row, group_name in zip(elec_data[:].T, group_names):
        nwbfile.add_electrode(
            x=row[file_elec_col_names == b'X'][0],
            y=row[file_elec_col_names == b'Y'][0],
            z=row[file_elec_col_names == b'Z'][0],
            imp=np.nan,
            location='unknown',
            filtering='250 Hz lowpass',
            group=groups_map[group_name],
        )

    # load r2 values to input into custom cols in electrodes table
    r2 = np.load(r2_path)
    low_freq_r2 = np.ravel(r2[int(subject_id)-1, :len(group_names), 0])

    high_freq_r2 = np.ravel(r2[int(subject_id)-1, :len(group_names), 1])

    # add custom cols to electrodes table
    elecs_dset = file['chan_info']['block0_values']

    def get_data(label):
        return elecs_dset[file_elec_col_names == label, :].ravel()[is_elec]

    [nwbfile.add_electrode_column(**kwargs) for kwargs in (
        dict(
            name='standard_deviation',
            description="standard deviation of each electrode's data for the entire recording period",
            data=get_data(b'SD_channels')
        ),
        dict(
            name='kurtosis',
            description="kurtosis of each electrode's data for the entire recording period",
            data=get_data(b'Kurt_channels')
        ),
        dict(
            name='median_deviation',
            description="median absolute deviation estimator for standard deviation for each electrode",
            data=get_data(b'standardizeDenoms')
        ),
        dict(
            name='good',
            description='good electrodes',
            data=get_data(b'goodChanInds').astype(bool)

        ),
        dict(
            name='low_freq_R2',
            description='R^2 for low frequency band on each electrode',
            data=low_freq_r2
        ),
        dict(
            name='high_freq_R2',
            description='R^2 for high frequency band on each electrode',
            data=high_freq_r2
        )
    )]

    # confirm that electrodes table looks right
    # nwbfile.electrodes.to_dataframe()

    # add ElectricalSeries
    elecs_data = dset.lazy_slice[:, is_elec]
    n_bytes = np.dtype(elecs_data).itemsize

    nwbfile.add_acquisition(
        ElectricalSeries(
            name='ElectricalSeries',
            data=H5DataIO(
                data=DataChunkIterator(
                    data=elecs_data,
                    maxshape=elecs_data.shape,
                    buffer_size=int(5000 * 1e6) // elecs_data.shape[1] * n_bytes
                ),
                compression='gzip'
            ),
            rate=file['f_sample'][()],
            conversion=1e-6,  # data is in uV
            electrodes=nwbfile.create_electrode_table_region(
                region=list(range(len(nwbfile.electrodes))),
                description='all electrodes'
            )
        )
    )

    # add pose data
    pose_dset = file['pose_data']['block0_values']

    nwbfile.create_processing_module(
        name='behavior',
        description='pose data').add(
        Position(
            spatial_series=[
                SpatialSeries(
                    name=file['pose_data']['axis0'][x_ind][:-2].decode(),
                    data=H5DataIO(
                        data=pose_dset[:, [x_ind, y_ind]],
                        compression='gzip'
                    ),
                    reference_frame='unknown',
                    conversion=np.nan,
                    rate=30.
                ) for x_ind, y_ind in zip(
                    range(0, pose_dset.shape[1], 2),
                    range(1, pose_dset.shape[1], 2))
            ]
        )
    )

    # add events
    events = pd.read_csv(events_path)
    mask = (events['Subject'] == int(subject_id)) & (events['Recording day'] == int(session))
    events = events[mask]
    timestamps = events['Event time'].values
    events = events.reset_index()

    events = Events(
        name='ReachEvents',
        description=events['Event type'][0],  # Specifies which arm was used
        timestamps=timestamps,
        resolution=2e-3,  # resolution of the timestamps, i.e., smallest possible difference between timestamps
    )

    # add the Events type to the processing group of the NWB file
    nwbfile.processing['behavior'].add(events)

    # add coarse behavioral labels
    event_fp = f'sub{subject_id}_fullday_{session}'
    full_fp = coarse_events_path + '//' + event_fp + '.npy'
    coarse_events = np.load(full_fp, allow_pickle=True)

    label, data = np.unique(coarse_events, return_inverse=True)
    transition_idx = np.where(np.diff(data) != 0)
    start_t = nwbfile.processing["behavior"].data_interfaces["Position"]['L_Wrist'].starting_time
    rate = nwbfile.processing["behavior"].data_interfaces["Position"]['L_Wrist'].rate
    times = np.divide(transition_idx, rate) + start_t  # 30Hz sampling rate
    max_time = (np.shape(coarse_events)[0] / rate) + start_t
    times = np.hstack([start_t, np.ravel(times), max_time])
    transition_labels = np.hstack([label[data[transition_idx]], label[data[-1]]])

    nwbfile.add_epoch_column(name='labels', description='Coarse behavioral labels')

    for start_time, stop_time, label in zip(times[:-1], times[1:], transition_labels):
        nwbfile.add_epoch(start_time=start_time, stop_time=stop_time, labels=label)

    # add additional reaching features
    reach_features = pd.read_csv(reach_features_path)
    mask = (reach_features['Subject'] == int(subject_id)) & (reach_features['Recording day'] == int(session))
    reach_features = reach_features[mask]

    reaches = TimeIntervals(name='reaches', description='Features of each reach')
    reaches.add_column(name='Reach_magnitude_px', description='Magnitude of reach in pixels')
    reaches.add_column(name='Reach_angle_degrees', description='Reach angle in degrees')
    reaches.add_column(name='Onset_speed_px_per_sec', description='Onset speed in pixels / second)')
    reaches.add_column(name='Speech_ratio', description='rough estimation of whether someone is likely to be speaking '
                                                        'based on a power ratio of audio data; ranges from 0 (no '
                                                        'speech) to 1 (high likelihood of speech)h')
    reaches.add_column(name='Bimanual_ratio', description='ratio of ipsilateral wrist reach magnitude to the sum of '
                                                          'ipsilateral and contralateral wrist magnitudes; ranges from '
                                                          '0 (unimanual/contralateral move only) to 1 (only ipsilateral'
                                                          ' arm moving); 0.5 indicates bimanual movement')
    reaches.add_column(name='Bimanual_overlap', description='The amount of ipsilateral and contralateral wrist temporal'
                                                            'overlap as a fraction of the entire contralateral movement'
                                                            ' duration')
    reaches.add_column(name='Bimanual_class', description='binary feature that classifies each movement event as '
                                                          'unimanual (0) or bimanual (1) based on how close in time a '
                                                          'ipsilateral wrist movement started relative to each '
                                                          'contralateral wrist movement events')
    for row in reach_features.iterrows():
        row_data = row[1]
        start_time = row_data['Time of day (sec)']
        stop_time = start_time + row_data['Reach duration (sec)']
        reaches.add_row(start_time=start_time,
                        stop_time=stop_time,
                        Reach_magnitude_px=row_data['Reach magnitude (px)'],
                        Reach_angle_degrees=row_data['Reach angle (degrees)'],
                        Onset_speed_px_per_sec=row_data['Onset speed (px/sec)'],
                        Speech_ratio=row_data['Speech ratio'],
                        Bimanual_ratio=row_data['Bimanual ratio'],
                        Bimanual_overlap=row_data['Bimanual overlap (sec)'],
                        Bimanual_class=row_data['Bimanual class']
                        )

    nwbfile.add_time_intervals(reaches)

    with NWBHDF5IO(fpath_out, 'w') as io:
        io.write(nwbfile)
Exemplo n.º 12
0
def yuta2nwb(
        session_path='D:/BuzsakiData/SenzaiY/YutaMouse41/YutaMouse41-150903',
        # '/Users/bendichter/Desktop/Buzsaki/SenzaiBuzsaki2017/YutaMouse41/YutaMouse41-150903',
        subject_xls=None,
        include_spike_waveforms=True,
        stub=True,
        cache_spec=True):

    subject_path, session_id = os.path.split(session_path)
    fpath_base = os.path.split(subject_path)[0]
    identifier = session_id
    mouse_number = session_id[9:11]
    if '-' in session_id:
        subject_id, date_text = session_id.split('-')
        b = False
    else:
        subject_id, date_text = session_id.split('b')
        b = True

    if subject_xls is None:
        subject_xls = os.path.join(subject_path,
                                   'YM' + mouse_number + ' exp_sheet.xlsx')
    else:
        if not subject_xls[-4:] == 'xlsx':
            subject_xls = os.path.join(subject_xls,
                                       'YM' + mouse_number + ' exp_sheet.xlsx')

    session_start_time = dateparse(date_text, yearfirst=True)

    df = pd.read_excel(subject_xls)

    subject_data = {}
    for key in [
            'genotype', 'DOB', 'implantation', 'Probe', 'Surgery',
            'virus injection', 'mouseID'
    ]:
        names = df.iloc[:, 0]
        if key in names.values:
            subject_data[key] = df.iloc[np.argmax(names == key), 1]

    if isinstance(subject_data['DOB'], datetime):
        age = session_start_time - subject_data['DOB']
    else:
        age = None

    subject = Subject(subject_id=subject_id,
                      age=str(age),
                      genotype=subject_data['genotype'],
                      species='mouse')

    nwbfile = NWBFile(
        session_description='mouse in open exploration and theta maze',
        identifier=identifier,
        session_start_time=session_start_time.astimezone(),
        file_create_date=datetime.now().astimezone(),
        experimenter='Yuta Senzai',
        session_id=session_id,
        institution='NYU',
        lab='Buzsaki',
        subject=subject,
        related_publications='DOI:10.1016/j.neuron.2016.12.011')

    print('reading and writing raw position data...', end='', flush=True)
    ns.add_position_data(nwbfile, session_path)

    shank_channels = ns.get_shank_channels(session_path)[:8]
    nshanks = len(shank_channels)
    all_shank_channels = np.concatenate(shank_channels)

    print('setting up electrodes...', end='', flush=True)
    hilus_csv_path = os.path.join(fpath_base, 'early_session_hilus_chans.csv')
    lfp_channel = get_reference_elec(subject_xls,
                                     hilus_csv_path,
                                     session_start_time,
                                     session_id,
                                     b=b)

    custom_column = [{
        'name': 'theta_reference',
        'description':
        'this electrode was used to calculate LFP canonical bands',
        'data': all_shank_channels == lfp_channel
    }]
    ns.write_electrode_table(nwbfile,
                             session_path,
                             custom_columns=custom_column,
                             max_shanks=max_shanks)

    print('reading raw electrode data...', end='', flush=True)
    if stub:
        # example recording extractor for fast testing
        xml_filepath = os.path.join(session_path, session_id + '.xml')
        xml_root = et.parse(xml_filepath).getroot()
        acq_sampling_frequency = float(
            xml_root.find('acquisitionSystem').find('samplingRate').text)
        num_channels = 4
        num_frames = 10000
        X = np.random.normal(0, 1, (num_channels, num_frames))
        geom = np.random.normal(0, 1, (num_channels, 2))
        X = (X * 100).astype(int)
        sre = se.NumpyRecordingExtractor(
            timeseries=X, sampling_frequency=acq_sampling_frequency, geom=geom)
    else:
        nre = se.NeuroscopeRecordingExtractor('{}/{}.dat'.format(
            session_path, session_id))
        sre = se.SubRecordingExtractor(nre, channel_ids=all_shank_channels)

    print('writing raw electrode data...', end='', flush=True)
    se.NwbRecordingExtractor.add_electrical_series(sre, nwbfile)
    print('done.')

    print('reading spiking units...', end='', flush=True)
    if stub:
        spike_times = [200, 300, 400]
        num_frames = 10000
        allshanks = []
        for k in range(nshanks):
            SX = se.NumpySortingExtractor()
            for j in range(len(spike_times)):
                SX.add_unit(unit_id=j + 1,
                            times=np.sort(
                                np.random.uniform(0, num_frames,
                                                  spike_times[j])))
            allshanks.append(SX)
        se_allshanks = se.MultiSortingExtractor(allshanks)
        se_allshanks.set_sampling_frequency(acq_sampling_frequency)
    else:
        se_allshanks = se.NeuroscopeMultiSortingExtractor(session_path,
                                                          keep_mua_units=False)

    electrode_group = []
    for shankn in np.arange(1, nshanks + 1, dtype=int):
        for id in se_allshanks.sortings[shankn - 1].get_unit_ids():
            electrode_group.append(nwbfile.electrode_groups['shank' +
                                                            str(shankn)])

    df_unit_features = get_UnitFeatureCell_features(fpath_base, session_id,
                                                    session_path)

    celltype_names = []
    for celltype_id, region_id in zip(df_unit_features['fineCellType'].values,
                                      df_unit_features['region'].values):
        if celltype_id == 1:
            if region_id == 3:
                celltype_names.append('pyramidal cell')
            elif region_id == 4:
                celltype_names.append('granule cell')
            else:
                raise Exception('unknown type')
        elif not np.isfinite(celltype_id):
            celltype_names.append('missing')
        else:
            celltype_names.append(celltype_dict[celltype_id])

    # Add custom column data into the SortingExtractor so it can be written by the converter
    # Note there is currently a hidden assumption that the way in which the NeuroscopeSortingExtractor
    # merges the cluster IDs matches one-to-one with the get_UnitFeatureCell_features extraction
    property_descriptions = {
        'cell_type': 'name of cell type',
        'global_id': 'global id for cell for entire experiment',
        'shank_id': '0-indexed id of cluster of shank',
        'electrode_group': 'the electrode group that each spike unit came from'
    }
    property_values = {
        'cell_type': celltype_names,
        'global_id': df_unit_features['unitID'].values,
        'shank_id': [x - 2 for x in df_unit_features['unitIDshank'].values],
        # - 2 b/c the get_UnitFeatureCell_features removes 0 and 1 IDs from each shank
        'electrode_group': electrode_group
    }
    for unit_id in se_allshanks.get_unit_ids():
        for property_name in property_descriptions.keys():
            se_allshanks.set_unit_property(
                unit_id, property_name,
                property_values[property_name][unit_id])

    se.NwbSortingExtractor.write_sorting(
        se_allshanks,
        nwbfile=nwbfile,
        property_descriptions=property_descriptions)
    print('done.')

    # Read and write LFP's
    print('reading LFPs...', end='', flush=True)
    lfp_fs, all_channels_lfp_data = ns.read_lfp(session_path, stub=stub)

    lfp_data = all_channels_lfp_data[:, all_shank_channels]
    print('writing LFPs...', flush=True)
    # lfp_data[:int(len(lfp_data)/4)]
    lfp_ts = ns.write_lfp(nwbfile,
                          lfp_data,
                          lfp_fs,
                          name='lfp',
                          description='lfp signal for all shank electrodes')

    # Read and add special environmental electrodes
    for name, channel in special_electrode_dict.items():
        ts = TimeSeries(
            name=name,
            description=
            'environmental electrode recorded inline with neural data',
            data=all_channels_lfp_data[:, channel],
            rate=lfp_fs,
            unit='V',
            #conversion=np.nan,
            resolution=np.nan)
        nwbfile.add_acquisition(ts)

    # compute filtered LFP
    print('filtering LFP...', end='', flush=True)
    all_lfp_phases = []
    for passband in ('theta', 'gamma'):
        lfp_fft = filter_lfp(
            lfp_data[:, all_shank_channels == lfp_channel].ravel(),
            lfp_fs,
            passband=passband)
        lfp_phase, _ = hilbert_lfp(lfp_fft)
        all_lfp_phases.append(lfp_phase[:, np.newaxis])
    data = np.dstack(all_lfp_phases)
    print('done.', flush=True)

    if include_spike_waveforms:
        print('writing waveforms...', end='', flush=True)
        nshanks = min((max_shanks, len(ns.get_shank_channels(session_path))))

        for shankn in np.arange(nshanks, dtype=int) + 1:
            # Get spike activty from .spk file on a per-shank and per-sample basis
            ns.write_spike_waveforms(nwbfile, session_path, shankn, stub=stub)
        print('done.', flush=True)

    # Get the LFP Decomposition Series
    decomp_series = DecompositionSeries(
        name='LFPDecompositionSeries',
        description='Theta and Gamma phase for reference LFP',
        data=data,
        rate=lfp_fs,
        source_timeseries=lfp_ts,
        metric='phase',
        unit='radians')
    decomp_series.add_band(band_name='theta', band_limits=(4, 10))
    decomp_series.add_band(band_name='gamma', band_limits=(30, 80))

    check_module(nwbfile, 'ecephys',
                 'contains processed extracellular electrophysiology data'
                 ).add_data_interface(decomp_series)

    [nwbfile.add_stimulus(x) for x in ns.get_events(session_path)]

    # create epochs corresponding to experiments/environments for the mouse

    sleep_state_fpath = os.path.join(session_path,
                                     '{}--StatePeriod.mat'.format(session_id))

    exist_pos_data = any(
        os.path.isfile(
            os.path.join(session_path, '{}__{}.mat'.format(
                session_id, task_type['name']))) for task_type in task_types)

    if exist_pos_data:
        nwbfile.add_epoch_column('label', 'name of epoch')

    for task_type in task_types:
        label = task_type['name']

        file = os.path.join(session_path, session_id + '__' + label + '.mat')
        if os.path.isfile(file):
            print('loading position for ' + label + '...', end='', flush=True)

            pos_obj = Position(name=label + '_position')

            matin = loadmat(file)
            tt = matin['twhl_norm'][:, 0]
            exp_times = find_discontinuities(tt)

            if 'conversion' in task_type:
                conversion = task_type['conversion']
            else:
                conversion = np.nan

            for pos_type in ('twhl_norm', 'twhl_linearized'):
                if pos_type in matin:
                    pos_data_norm = matin[pos_type][:, 1:]

                    spatial_series_object = SpatialSeries(
                        name=label + '_{}_spatial_series'.format(pos_type),
                        data=H5DataIO(pos_data_norm, compression='gzip'),
                        reference_frame='unknown',
                        conversion=conversion,
                        resolution=np.nan,
                        timestamps=H5DataIO(tt, compression='gzip'))
                    pos_obj.add_spatial_series(spatial_series_object)

            check_module(
                nwbfile, 'behavior',
                'contains processed behavioral data').add_data_interface(
                    pos_obj)
            for i, window in enumerate(exp_times):
                nwbfile.add_epoch(start_time=window[0],
                                  stop_time=window[1],
                                  label=label + '_' + str(i))
            print('done.')

    # there are occasional mismatches between the matlab struct and the neuroscope files
    # regions: 3: 'CA3', 4: 'DG'

    trialdata_path = os.path.join(session_path,
                                  session_id + '__EightMazeRun.mat')
    if os.path.isfile(trialdata_path):
        trials_data = loadmat(trialdata_path)['EightMazeRun']

        trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat')
        trialdatainfo = [
            x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]
        ]

        features = trialdatainfo[:7]
        features[:2] = 'start_time', 'stop_time',
        [
            nwbfile.add_trial_column(x, 'description')
            for x in features[4:] + ['condition']
        ]

        for trial_data in trials_data:
            if trial_data[3]:
                cond = 'run_left'
            else:
                cond = 'run_right'
            nwbfile.add_trial(start_time=trial_data[0],
                              stop_time=trial_data[1],
                              condition=cond,
                              error_run=trial_data[4],
                              stim_run=trial_data[5],
                              both_visit=trial_data[6])
    """
    mono_syn_fpath = os.path.join(session_path, session_id+'-MonoSynConvClick.mat')

    matin = loadmat(mono_syn_fpath)
    exc = matin['FinalExcMonoSynID']
    inh = matin['FinalInhMonoSynID']

    #exc_obj = CatCellInfo(name='excitatory_connections',
    #                      indices_values=[], cell_index=exc[:, 0] - 1, indices=exc[:, 1] - 1)
    #module_cellular.add_container(exc_obj)
    #inh_obj = CatCellInfo(name='inhibitory_connections',
    #                      indices_values=[], cell_index=inh[:, 0] - 1, indices=inh[:, 1] - 1)
    #module_cellular.add_container(inh_obj)
    """

    if os.path.isfile(sleep_state_fpath):
        matin = loadmat(sleep_state_fpath)['StatePeriod']

        table = TimeIntervals(name='states',
                              description='sleep states of animal')
        table.add_column(name='label', description='sleep state')

        data = []
        for name in matin.dtype.names:
            for row in matin[name][0][0]:
                data.append({
                    'start_time': row[0],
                    'stop_time': row[1],
                    'label': name
                })
        [
            table.add_row(**row)
            for row in sorted(data, key=lambda x: x['start_time'])
        ]

        check_module(nwbfile, 'behavior',
                     'contains behavioral data').add_data_interface(table)

    print('writing NWB file...', end='', flush=True)
    if stub:
        out_fname = session_path + '_stub.nwb'
    else:
        out_fname = session_path + '.nwb'

    with NWBHDF5IO(out_fname, mode='w') as io:
        io.write(nwbfile, cache_spec=cache_spec)
    print('done.')

    print('testing read...', end='', flush=True)
    # test read
    with NWBHDF5IO(out_fname, mode='r') as io:
        io.read()
    print('done.')