Пример #1
0
def yuta2nwb(session_path='/Users/bendichter/Desktop/Buzsaki/SenzaiBuzsaki2017/YutaMouse41/YutaMouse41-150903',
             subject_xls=None, include_spike_waveforms=True, stub=True):

    subject_path, session_id = os.path.split(session_path)
    fpath_base = os.path.split(subject_path)[0]
    identifier = session_id
    mouse_number = session_id[9:11]
    if '-' in session_id:
        subject_id, date_text = session_id.split('-')
        b = False
    else:
        subject_id, date_text = session_id.split('b')
        b = True

    if subject_xls is None:
        subject_xls = os.path.join(subject_path, 'YM' + mouse_number + ' exp_sheet.xlsx')
    else:
        if not subject_xls[-4:] == 'xlsx':
            subject_xls = os.path.join(subject_xls, 'YM' + mouse_number + ' exp_sheet.xlsx')

    session_start_time = dateparse(date_text, yearfirst=True)

    df = pd.read_excel(subject_xls)

    subject_data = {}
    for key in ['genotype', 'DOB', 'implantation', 'Probe', 'Surgery', 'virus injection', 'mouseID']:
        names = df.iloc[:, 0]
        if key in names.values:
            subject_data[key] = df.iloc[np.argmax(names == key), 1]

    if isinstance(subject_data['DOB'], datetime):
        age = session_start_time - subject_data['DOB']
    else:
        age = None

    subject = Subject(subject_id=subject_id, age=str(age),
                      genotype=subject_data['genotype'],
                      species='mouse')

    nwbfile = NWBFile(session_description='mouse in open exploration and theta maze',
                      identifier=identifier,
                      session_start_time=session_start_time.astimezone(),
                      file_create_date=datetime.now().astimezone(),
                      experimenter='Yuta Senzai',
                      session_id=session_id,
                      institution='NYU',
                      lab='Buzsaki',
                      subject=subject,
                      related_publications='DOI:10.1016/j.neuron.2016.12.011')

    print('reading and writing raw position data...', end='', flush=True)
    ns.add_position_data(nwbfile, session_path)

    shank_channels = ns.get_shank_channels(session_path)[:8]
    all_shank_channels = np.concatenate(shank_channels)

    print('setting up electrodes...', end='', flush=True)
    hilus_csv_path = os.path.join(fpath_base, 'early_session_hilus_chans.csv')
    lfp_channel = get_reference_elec(subject_xls, hilus_csv_path, session_start_time, session_id, b=b)
    print(lfp_channel)
    custom_column = [{'name': 'theta_reference',
                      'description': 'this electrode was used to calculate LFP canonical bands',
                      'data': all_shank_channels == lfp_channel}]
    ns.write_electrode_table(nwbfile, session_path, custom_columns=custom_column, max_shanks=max_shanks)

    print('reading LFPs...', end='', flush=True)
    lfp_fs, all_channels_data = ns.read_lfp(session_path, stub=stub)

    lfp_data = all_channels_data[:, all_shank_channels]
    print('writing LFPs...', flush=True)
    # lfp_data[:int(len(lfp_data)/4)]
    lfp_ts = ns.write_lfp(nwbfile, lfp_data, lfp_fs, name='lfp',
                          description='lfp signal for all shank electrodes')

    for name, channel in special_electrode_dict.items():
        ts = TimeSeries(name=name, description='environmental electrode recorded inline with neural data',
                        data=all_channels_data[channel], rate=lfp_fs, unit='V', conversion=np.nan, resolution=np.nan)
        nwbfile.add_acquisition(ts)

    # compute filtered LFP
    print('filtering LFP...', end='', flush=True)
    all_lfp_phases = []
    for passband in ('theta', 'gamma'):
        lfp_fft = filter_lfp(lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_fs, passband=passband)
        lfp_phase, _ = hilbert_lfp(lfp_fft)
        all_lfp_phases.append(lfp_phase[:, np.newaxis])
    data = np.dstack(all_lfp_phases)
    print('done.', flush=True)

    if include_spike_waveforms:
        print('writing waveforms...', end='', flush=True)
        for shankn in np.arange(1, 9, dtype=int):
            ns.write_spike_waveforms(nwbfile, session_path, shankn, stub=stub)
        print('done.', flush=True)

    decomp_series = DecompositionSeries(name='LFPDecompositionSeries',
                                        description='Theta and Gamma phase for reference LFP',
                                        data=data, rate=lfp_fs,
                                        source_timeseries=lfp_ts,
                                        metric='phase', unit='radians')
    decomp_series.add_band(band_name='theta', band_limits=(4, 10))
    decomp_series.add_band(band_name='gamma', band_limits=(30, 80))

    check_module(nwbfile, 'ecephys', 'contains processed extracellular electrophysiology data').add_data_interface(decomp_series)

    [nwbfile.add_stimulus(x) for x in ns.get_events(session_path)]

    # create epochs corresponding to experiments/environments for the mouse

    sleep_state_fpath = os.path.join(session_path, '{}--StatePeriod.mat'.format(session_id))

    exist_pos_data = any(os.path.isfile(os.path.join(session_path, '{}__{}.mat'.format(session_id, task_type['name'])))
                         for task_type in task_types)

    if exist_pos_data:
        nwbfile.add_epoch_column('label', 'name of epoch')

    for task_type in task_types:
        label = task_type['name']

        file = os.path.join(session_path, session_id + '__' + label + '.mat')
        if os.path.isfile(file):
            print('loading position for ' + label + '...', end='', flush=True)

            pos_obj = Position(name=label + '_position')

            matin = loadmat(file)
            tt = matin['twhl_norm'][:, 0]
            exp_times = find_discontinuities(tt)

            if 'conversion' in task_type:
                conversion = task_type['conversion']
            else:
                conversion = np.nan

            for pos_type in ('twhl_norm', 'twhl_linearized'):
                if pos_type in matin:
                    pos_data_norm = matin[pos_type][:, 1:]

                    spatial_series_object = SpatialSeries(
                        name=label + '_{}_spatial_series'.format(pos_type),
                        data=H5DataIO(pos_data_norm, compression='gzip'),
                        reference_frame='unknown', conversion=conversion,
                        resolution=np.nan,
                        timestamps=H5DataIO(tt, compression='gzip'))
                    pos_obj.add_spatial_series(spatial_series_object)

            check_module(nwbfile, 'behavior', 'contains processed behavioral data').add_data_interface(pos_obj)
            for i, window in enumerate(exp_times):
                nwbfile.add_epoch(start_time=window[0], stop_time=window[1],
                                  label=label + '_' + str(i))
            print('done.')

    # there are occasional mismatches between the matlab struct and the neuroscope files
    # regions: 3: 'CA3', 4: 'DG'

    df_unit_features = get_UnitFeatureCell_features(fpath_base, session_id, session_path)

    celltype_names = []
    for celltype_id, region_id in zip(df_unit_features['fineCellType'].values,
                                      df_unit_features['region'].values):
        if celltype_id == 1:
            if region_id == 3:
                celltype_names.append('pyramidal cell')
            elif region_id == 4:
                celltype_names.append('granule cell')
            else:
                raise Exception('unknown type')
        elif not np.isfinite(celltype_id):
            celltype_names.append('missing')
        else:
            celltype_names.append(celltype_dict[celltype_id])

    custom_unit_columns = [
        {
            'name': 'cell_type',
            'description': 'name of cell type',
            'data': celltype_names},
        {
            'name': 'global_id',
            'description': 'global id for cell for entire experiment',
            'data': df_unit_features['unitID'].values},
        {
            'name': 'max_electrode',
            'description': 'electrode that has the maximum amplitude of the waveform',
            'data': get_max_electrodes(nwbfile, session_path),
            'table': nwbfile.electrodes
        }]

    ns.add_units(nwbfile, session_path, custom_unit_columns, max_shanks=max_shanks)

    trialdata_path = os.path.join(session_path, session_id + '__EightMazeRun.mat')
    if os.path.isfile(trialdata_path):
        trials_data = loadmat(trialdata_path)['EightMazeRun']

        trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat')
        trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]]

        features = trialdatainfo[:7]
        features[:2] = 'start_time', 'stop_time',
        [nwbfile.add_trial_column(x, 'description') for x in features[4:] + ['condition']]

        for trial_data in trials_data:
            if trial_data[3]:
                cond = 'run_left'
            else:
                cond = 'run_right'
            nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1], condition=cond,
                              error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6])
    """
    mono_syn_fpath = os.path.join(session_path, session_id+'-MonoSynConvClick.mat')

    matin = loadmat(mono_syn_fpath)
    exc = matin['FinalExcMonoSynID']
    inh = matin['FinalInhMonoSynID']

    #exc_obj = CatCellInfo(name='excitatory_connections',
    #                      indices_values=[], cell_index=exc[:, 0] - 1, indices=exc[:, 1] - 1)
    #module_cellular.add_container(exc_obj)
    #inh_obj = CatCellInfo(name='inhibitory_connections',
    #                      indices_values=[], cell_index=inh[:, 0] - 1, indices=inh[:, 1] - 1)
    #module_cellular.add_container(inh_obj)
    """

    if os.path.isfile(sleep_state_fpath):
        matin = loadmat(sleep_state_fpath)['StatePeriod']

        table = TimeIntervals(name='states', description='sleep states of animal')
        table.add_column(name='label', description='sleep state')

        data = []
        for name in matin.dtype.names:
            for row in matin[name][0][0]:
                data.append({'start_time': row[0], 'stop_time': row[1], 'label': name})
        [table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time'])]

        check_module(nwbfile, 'behavior', 'contains behavioral data').add_data_interface(table)

    if stub:
        out_fname = session_path + '_stub.nwb'
    else:
        out_fname = session_path + '.nwb'

    print('writing NWB file...', end='', flush=True)
    with NWBHDF5IO(out_fname, mode='w') as io:
        io.write(nwbfile)
    print('done.')

    print('testing read...', end='', flush=True)
    # test read
    with NWBHDF5IO(out_fname, mode='r') as io:
        io.read()
    print('done.')
Пример #2
0
    def write_all_blocks(self, blocks, **kwargs):
        """
        Write list of blocks to the file
        """
        # todo: allow metadata in NWBFile constructor to be taken from kwargs
        annotations = defaultdict(set)
        for annotation_name in GLOBAL_ANNOTATIONS:
            if annotation_name in kwargs:
                annotations[annotation_name] = kwargs[annotation_name]
            else:
                for block in blocks:
                    if annotation_name in block.annotations:
                        try:
                            annotations[annotation_name].add(
                                block.annotations[annotation_name])
                        except TypeError:
                            if annotation_name in POSSIBLE_JSON_FIELDS:
                                encoded = json.dumps(
                                    block.annotations[annotation_name])
                                annotations[annotation_name].add(encoded)
                            else:
                                raise
                if annotation_name in annotations:
                    if len(annotations[annotation_name]) > 1:
                        raise NotImplementedError(
                            "We don't yet support multiple values for {}".
                            format(annotation_name))
                    # take single value from set
                    annotations[annotation_name], = annotations[
                        annotation_name]
        if "identifier" not in annotations:
            annotations["identifier"] = self.filename
        if "session_description" not in annotations:
            annotations[
                "session_description"] = blocks[0].description or self.filename
            # todo: concatenate descriptions of multiple blocks if different
        if "session_start_time" not in annotations:
            raise Exception(
                "Writing to NWB requires an annotation 'session_start_time'")
        # todo: handle subject
        # todo: store additional Neo annotations somewhere in NWB file
        nwbfile = NWBFile(**annotations)

        assert self.nwb_file_mode in ('w',
                                      )  # possibly expand to 'a'ppend later
        if self.nwb_file_mode == "w" and os.path.exists(self.filename):
            os.remove(self.filename)
        io_nwb = pynwb.NWBHDF5IO(self.filename, mode=self.nwb_file_mode)

        if sum(statistics(block)["SpikeTrain"]["count"]
               for block in blocks) > 0:
            nwbfile.add_unit_column('_name',
                                    'the name attribute of the SpikeTrain')
            # nwbfile.add_unit_column('_description',
            # 'the description attribute of the SpikeTrain')
            nwbfile.add_unit_column(
                'segment',
                'the name of the Neo Segment to which the SpikeTrain belongs')
            nwbfile.add_unit_column(
                'block',
                'the name of the Neo Block to which the SpikeTrain belongs')

        if sum(statistics(block)["Epoch"]["count"] for block in blocks) > 0:
            nwbfile.add_epoch_column('_name',
                                     'the name attribute of the Epoch')
            # nwbfile.add_epoch_column('_description', 'the description attribute of the Epoch')
            nwbfile.add_epoch_column(
                'segment',
                'the name of the Neo Segment to which the Epoch belongs')
            nwbfile.add_epoch_column(
                'block',
                'the name of the Neo Block to which the Epoch belongs')

        for i, block in enumerate(blocks):
            self.write_block(nwbfile, block)
        io_nwb.write(nwbfile)
        io_nwb.close()

        with pynwb.NWBHDF5IO(self.filename, "r") as io_validate:
            errors = pynwb.validate(io_validate, namespace="core")
            if errors:
                raise Exception(
                    f"Errors found when validating {self.filename}")
Пример #3
0
def nwb_copy_file(old_file, new_file, cp_objs={}):
    """
    Copy fields defined in 'obj', from existing NWB file to new NWB file.

    Parameters
    ----------
    old_file : str, path
        String such as '/path/to/old_file.nwb'.
    new_file : str, path
        String such as '/path/to/new_file.nwb'.
    cp_objs : dict
        Name:Value pairs (Group:Children) listing the groups and respective
        children from the current NWB file to be copied. Children can be:
        - Boolean, indicating an attribute (e.g. for institution, lab)
        - List of strings, containing several children names
        Example:
        {'institution':True,
         'lab':True,
         'acquisition':['microphone'],
         'ecephys':['LFP','DecompositionSeries']}
    """

    manager = get_manager()

    # Open original signal file
    with NWBHDF5IO(old_file, 'r', manager=manager,
                   load_namespaces=True) as io1:
        nwb_old = io1.read()

        # Creates new file
        nwb_new = NWBFile(session_description=str(nwb_old.session_description),
                          identifier='',
                          session_start_time=datetime.now(tzlocal()))
        with NWBHDF5IO(new_file, mode='w', manager=manager,
                       load_namespaces=False) as io2:
            # Institution name ------------------------------------------------
            if 'institution' in cp_objs:
                nwb_new.institution = str(nwb_old.institution)

            # Lab name --------------------------------------------------------
            if 'lab' in cp_objs:
                nwb_new.lab = str(nwb_old.lab)

            # Session id ------------------------------------------------------
            if 'session' in cp_objs:
                nwb_new.session_id = nwb_old.session_id

            # Devices ---------------------------------------------------------
            if 'devices' in cp_objs:
                for aux in list(nwb_old.devices.keys()):
                    dev = Device(nwb_old.devices[aux].name)
                    nwb_new.add_device(dev)

            # Electrode groups ------------------------------------------------
            if 'electrode_groups' in cp_objs:
                for aux in list(nwb_old.electrode_groups.keys()):
                    nwb_new.create_electrode_group(
                        name=str(nwb_old.electrode_groups[aux].name),
                        description=str(nwb_old.electrode_groups[
                            aux].description),
                        location=str(nwb_old.electrode_groups[aux].location),
                        device=nwb_new.get_device(
                            nwb_old.electrode_groups[aux].device.name)
                    )

            # Electrodes ------------------------------------------------------
            if 'electrodes' in cp_objs:
                nElec = len(nwb_old.electrodes['x'].data[:])
                for aux in np.arange(nElec):
                    nwb_new.add_electrode(
                        x=nwb_old.electrodes['x'][aux],
                        y=nwb_old.electrodes['y'][aux],
                        z=nwb_old.electrodes['z'][aux],
                        imp=nwb_old.electrodes['imp'][aux],
                        location=str(nwb_old.electrodes['location'][aux]),
                        filtering=str(nwb_old.electrodes['filtering'][aux]),
                        group=nwb_new.get_electrode_group(
                            nwb_old.electrodes['group'][aux].name),
                        group_name=str(nwb_old.electrodes['group_name'][aux])
                    )
                # if there are custom variables
                new_vars = list(nwb_old.electrodes.colnames)
                default_vars = ['x', 'y', 'z', 'imp', 'location', 'filtering',
                                'group', 'group_name']
                [new_vars.remove(var) for var in default_vars]
                for var in new_vars:

                    if var == 'label':
                        var_data = [str(elem) for elem in nwb_old.electrodes[
                                                          var].data[:]]
                    else:
                        var_data = np.array(nwb_old.electrodes[var].data[:])

                    nwb_new.add_electrode_column(name=str(var),
                                                 description=
                                                 str(nwb_old.electrodes[
                                                     var].description),
                                                 data=var_data)

            # Epochs ----------------------------------------------------------
            if 'epochs' in cp_objs:
                nEpochs = len(nwb_old.epochs['start_time'].data[:])
                for i in np.arange(nEpochs):
                    nwb_new.add_epoch(
                        start_time=nwb_old.epochs['start_time'].data[i],
                        stop_time=nwb_old.epochs['stop_time'].data[i])
                # if there are custom variables
                new_vars = list(nwb_old.epochs.colnames)
                default_vars = ['start_time', 'stop_time', 'tags',
                                'timeseries']
                [new_vars.remove(var) for var in default_vars if
                 var in new_vars]
                for var in new_vars:
                    nwb_new.add_epoch_column(name=var,
                                             description=nwb_old.epochs[
                                                 var].description,
                                             data=nwb_old.epochs[var].data[:])

            # Invalid times ---------------------------------------------------
            if 'invalid_times' in cp_objs:
                nInvalid = len(nwb_old.invalid_times['start_time'][:])
                for aux in np.arange(nInvalid):
                    nwb_new.add_invalid_time_interval(
                        start_time=nwb_old.invalid_times['start_time'][aux],
                        stop_time=nwb_old.invalid_times['stop_time'][aux])

            # Trials ----------------------------------------------------------
            if 'trials' in cp_objs:
                nTrials = len(nwb_old.trials['start_time'])
                for aux in np.arange(nTrials):
                    nwb_new.add_trial(
                        start_time=nwb_old.trials['start_time'][aux],
                        stop_time=nwb_old.trials['stop_time'][aux])
                # if there are custom variables
                new_vars = list(nwb_old.trials.colnames)
                default_vars = ['start_time', 'stop_time']
                [new_vars.remove(var) for var in default_vars]
                for var in new_vars:
                    nwb_new.add_trial_column(name=var,
                                             description=nwb_old.trials[
                                                 var].description,
                                             data=nwb_old.trials[var].data[:])

            # Intervals -------------------------------------------------------
            if 'intervals' in cp_objs:
                all_objs_names = list(nwb_old.intervals.keys())
                for obj_name in all_objs_names:
                    obj_old = nwb_old.intervals[obj_name]
                    # create and add TimeIntervals
                    obj = TimeIntervals(name=obj_old.name,
                                        description=obj_old.description)
                    nInt = len(obj_old['start_time'])
                    for ind in np.arange(nInt):
                        obj.add_interval(start_time=obj_old['start_time'][ind],
                                         stop_time=obj_old['stop_time'][ind])
                    # Add to file
                    nwb_new.add_time_intervals(obj)

            # Stimulus --------------------------------------------------------
            if 'stimulus' in cp_objs:
                all_objs_names = list(nwb_old.stimulus.keys())
                for obj_name in all_objs_names:
                    obj_old = nwb_old.stimulus[obj_name]
                    obj = TimeSeries(name=obj_old.name,
                                     description=obj_old.description,
                                     data=obj_old.data[:],
                                     rate=obj_old.rate,
                                     resolution=obj_old.resolution,
                                     conversion=obj_old.conversion,
                                     starting_time=obj_old.starting_time,
                                     unit=obj_old.unit)
                    nwb_new.add_stimulus(obj)

            # Processing modules ----------------------------------------------
            if 'ecephys' in cp_objs:
                if cp_objs['ecephys'] is True:
                    interfaces = nwb_old.processing[
                        'ecephys'].data_interfaces.keys()
                else:  # list of items
                    interfaces = [
                        nwb_old.processing['ecephys'].data_interfaces[key]
                        for key in cp_objs['ecephys']
                    ]
                # Add ecephys module to NWB file
                ecephys_module = ProcessingModule(
                    name='ecephys',
                    description='Extracellular electrophysiology data.'
                )
                nwb_new.add_processing_module(ecephys_module)
                for interface_old in interfaces:
                    obj = copy_obj(interface_old, nwb_old, nwb_new)
                    if obj is not None:
                        ecephys_module.add_data_interface(obj)

            # Acquisition -----------------------------------------------------
            if 'acquisition' in cp_objs:
                if cp_objs['acquisition'] is True:
                    all_acq_names = list(nwb_old.acquisition.keys())
                else:  # list of items
                    all_acq_names = cp_objs['acquisition']
                for acq_name in all_acq_names:
                    obj_old = nwb_old.acquisition[acq_name]
                    obj = copy_obj(obj_old, nwb_old, nwb_new)
                    if obj is not None:
                        nwb_new.add_acquisition(obj)

            # Subject ---------------------------------------------------------
            if 'subject' in cp_objs:
                try:
                    cortical_surfaces = CorticalSurfaces()
                    surfaces = nwb_old.subject.cortical_surfaces.surfaces
                    for sfc in list(surfaces.keys()):
                        cortical_surfaces.create_surface(
                            name=surfaces[sfc].name,
                            faces=surfaces[sfc].faces,
                            vertices=surfaces[sfc].vertices)
                    nwb_new.subject = ECoGSubject(
                        cortical_surfaces=cortical_surfaces,
                        subject_id=nwb_old.subject.subject_id,
                        age=nwb_old.subject.age,
                        description=nwb_old.subject.description,
                        genotype=nwb_old.subject.genotype,
                        sex=nwb_old.subject.sex,
                        species=nwb_old.subject.species,
                        weight=nwb_old.subject.weight,
                        date_of_birth=nwb_old.subject.date_of_birth)
                except:
                    nwb_new.subject = Subject(age=nwb_old.subject.age,
                                              description=nwb_old.subject.description,
                                              genotype=nwb_old.subject.genotype,
                                              sex=nwb_old.subject.sex,
                                              species=nwb_old.subject.species,
                                              subject_id=nwb_old.subject.subject_id,
                                              weight=nwb_old.subject.weight,
                                              date_of_birth=nwb_old.subject.date_of_birth)

            # Write new file with copied fields
            io2.write(nwb_new, link_data=False)
Пример #4
0
    def convert_data(
        self,
        nwbfile: NWBFile,
        metadata_dict: dict,
        stub_test: bool = False,
        include_spike_waveforms: bool = False,
    ):
        """Convert the behavioral portion of a particular session of the GrosmarkAD dataset."""
        session_path = self.input_args["folder_path"]
        subject_path, session_id = os.path.split(session_path)

        # Stimuli
        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        # States
        sleep_state_fpath = os.path.join(session_path,
                                         "{session_id}.SleepState.states.mat")
        # label renaming specific to Watson
        state_label_names = dict(WAKEstate="Awake",
                                 NREMstate="Non-REM",
                                 REMstate="REM")
        if os.path.isfile(sleep_state_fpath):
            matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0]

            table = TimeIntervals(name="states",
                                  description="Sleep states of animal.")
            table.add_column(name="label", description="Sleep state.")

            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append(
                        dict(
                            start_time=row[0],
                            stop_time=row[1],
                            label=state_label_names[name],
                        ))
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x["start_time"])
            ]
            check_module(nwbfile, "behavior",
                         "contains behavioral data").add_data_interface(table)

        # Position
        pos_filepath = Path(
            session_path) / f"{session_id}.position.behavior.mat"
        pos_mat = loadmat(str(pos_filepath.absolute()))
        starting_time = float(
            pos_mat["position"]["timestamps"][0][0]
            [0])  # confirmed to be a regularly sampled series
        rate = float(
            pos_mat["position"]["timestamps"][0][0][1]) - starting_time
        if pos_mat["position"]["units"][0][0][0] == "m":
            conversion = 1.0
        else:
            warnings.warn(
                f"Spatial units ({pos_mat['position']['units'][0][0][0]}) not listed in meters; "
                "setting conversion to nan.")
            conversion = np.nan
        pos_data = [[x[0], y[0]] for x, y in zip(
            pos_mat["position"]["position"][0][0]["x"][0][0],
            pos_mat["position"]["position"][0][0]["y"][0][0],
        )]
        linearized_data = [[
            lin[0]
        ] for lin in pos_mat["position"]["position"][0][0]["lin"][0][0]]

        label = pos_mat["position"]["behaviorinfo"][0][0]["MazeType"][0][0][
            0].replace(" ", "")
        pos_obj = Position(name=f"{label}Position")
        spatial_series_object = SpatialSeries(
            name=f"{label}SpatialSeries",
            description=
            "(x,y) coordinates tracking subject movement through the maze.",
            data=H5DataIO(pos_data, compression="gzip"),
            reference_frame="unknown",
            conversion=conversion,
            starting_time=starting_time,
            rate=rate,
            resolution=np.nan,
        )
        pos_obj.add_spatial_series(spatial_series_object)
        check_module(
            nwbfile, "behavior",
            "contains processed behavioral data").add_data_interface(pos_obj)

        lin_pos_obj = Position(name=f"{label}LinearizedPosition")
        lin_spatial_series_object = SpatialSeries(
            name=f"{label}LinearizedTimeSeries",
            description=
            "Linearized position, defined as starting at the edge of reward area, "
            "and increasing clockwise, terminating at the opposing edge of the reward area.",
            data=H5DataIO(linearized_data, compression="gzip"),
            reference_frame="unknown",
            conversion=conversion,
            starting_time=starting_time,
            rate=rate,
            resolution=np.nan,
        )
        lin_pos_obj.add_spatial_series(lin_spatial_series_object)
        check_module(nwbfile, "behavior",
                     "contains processed behavioral data").add_data_interface(
                         lin_pos_obj)

        # Epochs
        epoch_names = list(pos_mat["position"]["Epochs"][0][0].dtype.names)
        epoch_windows = [[float(start), float(stop)]
                         for x in pos_mat["position"]["Epochs"][0][0][0][0]
                         for start, stop in x]
        nwbfile.add_epoch_column("label", "name of epoch")
        for j, epoch_name in enumerate(epoch_names):
            nwbfile.add_epoch(
                start_time=epoch_windows[j][0],
                stop_time=epoch_windows[j][1],
                label=epoch_name,
            )
Пример #5
0
    def run_conversion(self,
                       nwbfile: NWBFile,
                       metadata: dict,
                       stub_test: bool = False):
        session_path = Path(self.source_data["folder_path"])
        task_types = [
            dict(name="OpenFieldPosition_ExtraLarge"),
            dict(name="OpenFieldPosition_New_Curtain", conversion=0.46),
            dict(name="OpenFieldPosition_New", conversion=0.46),
            dict(name="OpenFieldPosition_Old_Curtain", conversion=0.46),
            dict(name="OpenFieldPosition_Old", conversion=0.46),
            dict(name="OpenFieldPosition_Oldlast", conversion=0.46),
            dict(name="EightMazePosition", conversion=0.65 / 2),
        ]

        subject_path = session_path.parent
        session_id = session_path.stem

        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        sleep_state_fpath = session_path / f"{session_id}--StatePeriod.mat"

        exist_pos_data = any([
            (session_path / "{session_id}__{task_type['name']}.mat").is_file()
            for task_type in task_types
        ])
        if exist_pos_data:
            nwbfile.add_epoch_column("label", "Name of epoch.")

        # Epoch intervals
        for task_type in task_types:
            label = task_type["name"]

            file = session_path / f"{session_id}__{label}.mat"
            if file.is_file():
                pos_obj = Position(name=f"{label}_position")

                matin = loadmat(file)
                tt = matin["twhl_norm"][:, 0]
                exp_times = find_discontinuities(tt)

                if "conversion" in task_type:
                    conversion = task_type["conversion"]
                else:
                    conversion = np.nan

                for pos_type in ("twhl_norm", "twhl_linearized"):
                    if pos_type in matin:
                        pos_data_norm = matin[pos_type][:, 1:]

                        spatial_series_object = SpatialSeries(
                            name=f"{label}_{pos_type}_spatial_series",
                            data=H5DataIO(pos_data_norm, compression="gzip"),
                            reference_frame="unknown",
                            conversion=conversion,
                            resolution=np.nan,
                            timestamps=H5DataIO(tt, compression="gzip"),
                        )
                        pos_obj.add_spatial_series(spatial_series_object)

                check_module(
                    nwbfile, "behavior",
                    "Contains processed behavioral data.").add_data_interface(
                        pos_obj)
                for i, window in enumerate(exp_times):
                    nwbfile.add_epoch(
                        start_time=window[0],
                        stop_time=window[1],
                        tags=f"{label}_{str(i)}",
                    )

        # Trial intervals
        trialdata_path = session_path / f"{session_id}__EightMazeRun.mat"
        if trialdata_path.is_file():
            trials_data = loadmat(trialdata_path)["EightMazeRun"]

            trialdatainfo_path = subject_path / "EightMazeRunInfo.mat"
            trialdatainfo = [
                x[0]
                for x in loadmat(trialdatainfo_path)["EightMazeRunInfo"][0]
            ]

            features = trialdatainfo[:7]
            features[:2] = (
                "start_time",
                "stop_time",
            )
            [
                nwbfile.add_trial_column(x, "description")
                for x in features[4:] + ["condition"]
            ]

            for trial_data in trials_data:
                if trial_data[3]:
                    cond = "run_left"
                else:
                    cond = "run_right"
                nwbfile.add_trial(
                    start_time=trial_data[0],
                    stop_time=trial_data[1],
                    condition=cond,
                    error_run=trial_data[4],
                    stim_run=trial_data[5],
                    both_visit=trial_data[6],
                )

        # SLeep states
        if sleep_state_fpath.is_file():
            matin = loadmat(sleep_state_fpath)["StatePeriod"]
            table = TimeIntervals(name="states",
                                  description="sleep states of animal")
            table.add_column(name="label", description="sleep state")
            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append(
                        dict(start_time=row[0], stop_time=row[1], label=name))
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x["start_time"])
            ]
            check_module(nwbfile, "behavior",
                         "Contains behavioral data.").add_data_interface(table)
Пример #6
0
    def convert_data(
        self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False, include_spike_waveforms: bool = False
    ):
        session_path = self.input_args["folder_path"]
        # TODO: check/enforce format?
        task_types = metadata_dict.get("task_types", [])

        subject_path, session_id = os.path.split(session_path)
        fpath_base = os.path.split(subject_path)[0]

        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        exist_pos_data = any(
            os.path.isfile(os.path.join(session_path, "{}__{}.mat".format(session_id, task_type["name"])))
            for task_type in task_types
        )

        if exist_pos_data:
            nwbfile.add_epoch_column("label", "name of epoch")

        for task_type in task_types:
            label = task_type["name"]

            file = os.path.join(session_path, session_id + "__" + label + ".mat")
            if os.path.isfile(file):
                pos_obj = Position(name=label + "_position")

                matin = loadmat(file)
                tt = matin["twhl_norm"][:, 0]
                exp_times = find_discontinuities(tt)

                if "conversion" in task_type:
                    conversion = task_type["conversion"]
                else:
                    conversion = np.nan

                for pos_type in ("twhl_norm", "twhl_linearized"):
                    if pos_type in matin:
                        pos_data_norm = matin[pos_type][:, 1:]

                        spatial_series_object = SpatialSeries(
                            name=label + "_{}_spatial_series".format(pos_type),
                            data=H5DataIO(pos_data_norm, compression="gzip"),
                            reference_frame="unknown",
                            conversion=conversion,
                            resolution=np.nan,
                            timestamps=H5DataIO(tt, compression="gzip"),
                        )
                        pos_obj.add_spatial_series(spatial_series_object)

                check_module(nwbfile, "behavior", "contains processed behavioral data").add_data_interface(pos_obj)
                for i, window in enumerate(exp_times):
                    nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + "_" + str(i))

        trialdata_path = os.path.join(session_path, session_id + "__EightMazeRun.mat")
        if os.path.isfile(trialdata_path):
            trials_data = loadmat(trialdata_path)["EightMazeRun"]

            trialdatainfo_path = os.path.join(fpath_base, "EightMazeRunInfo.mat")
            trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)["EightMazeRunInfo"][0]]

            features = trialdatainfo[:7]
            features[:2] = (
                "start_time",
                "stop_time",
            )
            [nwbfile.add_trial_column(x, "description") for x in features[4:] + ["condition"]]

            for trial_data in trials_data:
                if trial_data[3]:
                    cond = "run_left"
                else:
                    cond = "run_right"
                nwbfile.add_trial(
                    start_time=trial_data[0],
                    stop_time=trial_data[1],
                    condition=cond,
                    error_run=trial_data[4],
                    stim_run=trial_data[5],
                    both_visit=trial_data[6],
                )

        sleep_state_fpath = os.path.join(session_path, "{}.SleepState.states.mat".format(session_id))
        # label renaming specific to Watson
        state_label_names = {"WAKEstate": "Awake", "NREMstate": "Non-REM", "REMstate": "REM"}
        if os.path.isfile(sleep_state_fpath):
            matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0]

            table = TimeIntervals(name="states", description="Sleep states of animal.")
            table.add_column(name="label", description="Sleep state.")

            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append({"start_time": row[0], "stop_time": row[1], "label": state_label_names[name]})
            [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])]

            check_module(nwbfile, "behavior", "contains behavioral data").add_data_interface(table)
Пример #7
0
    def convert_data(self,
                     nwbfile: NWBFile,
                     metadata_dict: dict,
                     stub_test: bool = False,
                     include_spike_waveforms: bool = False):
        session_path = self.input_args['folder_path']
        # TODO: check/enforce format?
        task_types = metadata_dict['task_types']

        subject_path, session_id = os.path.split(session_path)
        fpath_base = os.path.split(subject_path)[0]

        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        sleep_state_fpath = os.path.join(
            session_path, '{}--StatePeriod.mat'.format(session_id))

        exist_pos_data = any(
            os.path.isfile(
                os.path.join(
                    session_path, '{}__{}.mat'.format(session_id,
                                                      task_type['name'])))
            for task_type in task_types)

        if exist_pos_data:
            nwbfile.add_epoch_column('label', 'name of epoch')

        for task_type in task_types:
            label = task_type['name']

            file = os.path.join(session_path,
                                session_id + '__' + label + '.mat')
            if os.path.isfile(file):
                pos_obj = Position(name=label + '_position')

                matin = loadmat(file)
                tt = matin['twhl_norm'][:, 0]
                exp_times = find_discontinuities(tt)

                if 'conversion' in task_type:
                    conversion = task_type['conversion']
                else:
                    conversion = np.nan

                for pos_type in ('twhl_norm', 'twhl_linearized'):
                    if pos_type in matin:
                        pos_data_norm = matin[pos_type][:, 1:]

                        spatial_series_object = SpatialSeries(
                            name=label + '_{}_spatial_series'.format(pos_type),
                            data=H5DataIO(pos_data_norm, compression='gzip'),
                            reference_frame='unknown',
                            conversion=conversion,
                            resolution=np.nan,
                            timestamps=H5DataIO(tt, compression='gzip'))
                        pos_obj.add_spatial_series(spatial_series_object)

                check_module(
                    nwbfile, 'behavior',
                    'contains processed behavioral data').add_data_interface(
                        pos_obj)
                for i, window in enumerate(exp_times):
                    nwbfile.add_epoch(start_time=window[0],
                                      stop_time=window[1],
                                      label=label + '_' + str(i))

        trialdata_path = os.path.join(session_path,
                                      session_id + '__EightMazeRun.mat')
        if os.path.isfile(trialdata_path):
            trials_data = loadmat(trialdata_path)['EightMazeRun']

            trialdatainfo_path = os.path.join(fpath_base,
                                              'EightMazeRunInfo.mat')
            trialdatainfo = [
                x[0]
                for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]
            ]

            features = trialdatainfo[:7]
            features[:2] = 'start_time', 'stop_time',
            [
                nwbfile.add_trial_column(x, 'description')
                for x in features[4:] + ['condition']
            ]

            for trial_data in trials_data:
                if trial_data[3]:
                    cond = 'run_left'
                else:
                    cond = 'run_right'
                nwbfile.add_trial(start_time=trial_data[0],
                                  stop_time=trial_data[1],
                                  condition=cond,
                                  error_run=trial_data[4],
                                  stim_run=trial_data[5],
                                  both_visit=trial_data[6])

        if os.path.isfile(sleep_state_fpath):
            matin = loadmat(sleep_state_fpath)['StatePeriod']

            table = TimeIntervals(name='states',
                                  description='sleep states of animal')
            table.add_column(name='label', description='sleep state')

            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append({
                        'start_time': row[0],
                        'stop_time': row[1],
                        'label': name
                    })
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x['start_time'])
            ]

            check_module(nwbfile, 'behavior',
                         'contains behavioral data').add_data_interface(table)
Пример #8
0
def chang2nwb(blockpath,
              outpath=None,
              session_start_time=None,
              session_description=None,
              identifier=None,
              anin4=False,
              ecog_format='auto',
              external_subject=True,
              include_pitch=False,
              include_intensity=False,
              speakers=True,
              mic=False,
              mini=False,
              hilb=False,
              verbose=False,
              imaging_path=None,
              parse_transcript=False,
              include_cortical_surfaces=True,
              include_electrodes=True,
              include_ekg=True,
              subject_image_list=None,
              rest_period=None,
              load_warped=False,
              **kwargs):
    """

    Parameters
    ----------
    blockpath: str
    outpath: None | str
        if None, output = [blockpath]/[blockname].nwb
    session_start_time: datetime.datetime
        default: datetime(1900, 1, 1)
    session_description: str
        default: blockname
    identifier: str
        default: blockname
    anin4: False | str
        Whether or not to convert ANIN4. ANIN4 is used as an extra channel for
        things like button presses, and is usually unused. If a string is
        supplied, that is used as the name of the timeseries.
    ecog_format: str
        ({'htk'}, 'mat', 'raw')
    external_subject: bool (optional)
        True: (default) cortical mesh is saved in an external file and a link is
            provided to that file. This is useful if you have multiple sessions for a single subject.
        False: cortical mesh is saved normally
    include_pitch: bool (optional)
        add pitch data. Default: False
    include_intensity: bool (optional)
        add intensity data. Default: False
    speakers: bool (optional)
        Default: False
    mic: bool (optional)
        default: False
    mini: only save data stub. Used for testing
    hilb: bool
        include Hilbert Transform data. Default: False
    verbose: bool (optional)
    imaging_path: str (optional)
        None: use IMAGING_DIR
        'local': use subject_dir/Imaging/
        else: use supplied string
    parse_transcript: str (optional)
    include_cortical_surfaces: bool (optional)
    include_electrodes: bool (optional)
    include_ekg: bool (optional)
    subject_image_list: list (optional)
        List of paths of images to include
    rest_period: None | array-like
    kwargs: dict
        passed to pynwb.NWBFile

    Returns
    -------

    """

    behav_module = None

    basepath, blockname = os.path.split(blockpath)
    subject_id = get_subject_id(blockname)
    if identifier is None:
        identifier = blockname

    if session_description is None:
        session_description = blockname

    if outpath is None:
        outpath = blockpath + '.nwb'
    out_base_path = os.path.split(outpath)[0]

    if session_start_time is None:
        session_start_time = datetime(1900, 1, 1).astimezone(timezone('UTC'))

    if imaging_path is None:
        subj_imaging_path = path.join(IMAGING_PATH, subject_id)
    elif imaging_path == 'local':
        subj_imaging_path = path.join(basepath, 'imaging')
    else:
        subj_imaging_path = os.path.join(imaging_path, subject_id)

    # file paths
    bad_time_file = path.join(blockpath, 'Artifacts', 'badTimeSegments.mat')
    ecog_path = path.join(blockpath, 'RawHTK')
    ecog400_path = path.join(blockpath, 'ecog400', 'ecog.mat')
    elec_metadata_file = path.join(subj_imaging_path, 'elecs',
                                   'TDT_elecs_all.mat')
    mesh_path = path.join(subj_imaging_path, 'Meshes')
    pial_files = glob.glob(path.join(mesh_path, '*pial.mat'))

    # Create the NWB file object
    nwbfile = NWBFile(session_description,
                      identifier,
                      session_start_time,
                      datetime.now().astimezone(),
                      session_id=identifier,
                      institution='University of California, San Francisco',
                      lab='Chang Lab',
                      **kwargs)

    nwbfile.add_electrode_column('bad', 'electrode identified as too noisy')

    bad_elecs_inds = get_bad_elecs(blockpath)

    if include_electrodes:
        add_electrodes(nwbfile,
                       elec_metadata_file,
                       bad_elecs_inds,
                       load_warped=load_warped)
    else:
        device = nwbfile.create_device('256Grid')
        electrode_group = nwbfile.create_electrode_group(
            name='256Grid electrodes',
            description='auto_group',
            location='location',
            device=device)

        for elec_counter in range(256):
            bad = elec_counter in bad_elecs_inds
            nwbfile.add_electrode(id=elec_counter + 1,
                                  x=np.nan,
                                  y=np.nan,
                                  z=np.nan,
                                  imp=np.nan,
                                  location=' ',
                                  filtering='none',
                                  group=electrode_group,
                                  bad=bad)
    ecog_elecs = list(range(len(nwbfile.electrodes)))
    ecog_elecs_region = nwbfile.create_electrode_table_region(
        ecog_elecs, 'ECoG electrodes on brain')

    # Read electrophysiology data from HTK files and add them to NWB file
    if ecog_format == 'auto':
        ecog_rate, data, ecog_path = auto_ecog(blockpath,
                                               ecog_elecs,
                                               verbose=False)
    elif ecog_format == 'htk':
        if verbose:
            print('reading htk acquisition...', flush=True)
        ecog_rate, data = readhtks(ecog_path, ecog_elecs)
        data = data.squeeze()
        if verbose:
            print('done', flush=True)

    elif ecog_format == 'mat':
        with File(ecog400_path, 'r') as f:
            data = f['ecogDS']['data'][:, ecog_elecs]
            ecog_rate = f['ecogDS']['sampFreq'][:].ravel()[0]
        ecog_path = ecog400_path

    elif ecog_format == 'raw':
        ecog_path = os.path.join(tdt_data_path, subject_id, blockname,
                                 'raw.mat')
        ecog_rate, data = load_wavs(ecog_path)

    else:
        raise ValueError('unrecognized argument: ecog_format')

    ts_desc = "all Wav data"

    if mini:
        data = data[:2000]

    ecog_ts = ElectricalSeries(name='ElectricalSeries',
                               data=H5DataIO(data, compression='gzip'),
                               electrodes=ecog_elecs_region,
                               rate=ecog_rate,
                               description=ts_desc,
                               conversion=0.001)
    nwbfile.add_acquisition(ecog_ts)

    if include_ekg:
        ekg_elecs = find_ekg_elecs(elec_metadata_file)
        if len(ekg_elecs):
            add_ekg(nwbfile, ecog_path, ekg_elecs)

    if mic:
        # Add microphone recording from room
        fs, data = get_analog(blockpath, 1)
        nwbfile.add_acquisition(
            TimeSeries('microphone',
                       data,
                       'audio unit',
                       rate=fs,
                       description="audio recording from microphone in room"))
    if speakers:
        fs, data = get_analog(blockpath, 2)
        # Add audio stimulus 1
        nwbfile.add_stimulus(
            TimeSeries('speaker 1',
                       data,
                       'NA',
                       rate=fs,
                       description="audio stimulus 1"))

        # Add audio stimulus 2
        fs, data = get_analog(blockpath, 3)
        if fs is not None:
            nwbfile.add_stimulus(
                TimeSeries('speaker 2',
                           data,
                           'NA',
                           rate=fs,
                           description='the second stimulus source'))

    if anin4:
        fs, data = get_analog(blockpath, 4)
        nwbfile.add_acquisition(
            TimeSeries(anin4,
                       data,
                       'aux unit',
                       rate=fs,
                       description="aux analog recording"))

    # Add bad time segments
    if os.path.exists(bad_time_file) and os.stat(bad_time_file).st_size:
        bad_time = sio.loadmat(bad_time_file)['badTimeSegments']
        for row in bad_time:
            nwbfile.add_invalid_time_interval(start_time=row[0],
                                              stop_time=row[1],
                                              tags=('ECoG artifact', ),
                                              timeseries=ecog_ts)

    if rest_period is not None:
        nwbfile.add_epoch_column(name='label', description='label')
        nwbfile.add_epoch(start_time=rest_period[0],
                          stop_time=rest_period[1],
                          label='rest_period')

    if hilb:
        block_hilb_path = os.path.join(hilb_dir, subject_id, blockname,
                                       blockname + '_AA.h5')
        file = File(block_hilb_path, 'r')

        data = transpose_iter(
            file['X'])  # transposes data during iterative write
        filter_center = file['filter_center'][:]
        filter_sigma = file['filter_sigma'][:]

        data = H5DataIO(DataChunkIterator(tqdm(data,
                                               desc='writing hilbert data'),
                                          buffer_size=400 * 20),
                        compression='gzip')

        decomp_series = DecompositionSeries(
            name='LFPDecompositionSeries',
            description='Gaussian band Hilbert transform',
            data=data,
            rate=400.,
            source_timeseries=ecog_ts,
            metric='amplitude')

        for band_mean, band_stdev in zip(filter_center, filter_sigma):
            decomp_series.add_band(band_mean=band_mean, band_stdev=band_stdev)

        hilb_mod = nwbfile.create_processing_module(
            name='ecephys', description='holds hilbert analysis results')
        hilb_mod.add_container(decomp_series)

    if include_cortical_surfaces:
        subject = ECoGSubject(subject_id=subject_id)
        subject.cortical_surfaces = create_cortical_surfaces(
            pial_files, subject_id)
    else:
        subject = Subject(subject_id=subject_id, species='H**o sapiens')

    if subject_image_list is not None:
        subject = add_images_to_subject(subject, subject_image_list)

    if external_subject:
        subj_fpath = path.join(out_base_path, subject_id + '.nwb')
        if not os.path.isfile(subj_fpath):
            subj_nwbfile = NWBFile(session_description=subject_id,
                                   identifier=subject_id,
                                   subject=subject,
                                   session_start_time=datetime(
                                       1900, 1, 1).astimezone(timezone('UTC')))
            with NWBHDF5IO(subj_fpath, manager=manager, mode='w') as subj_io:
                subj_io.write(subj_nwbfile)
        subj_read_io = NWBHDF5IO(subj_fpath, manager=manager, mode='r')
        subj_nwbfile = subj_read_io.read()
        subject = subj_nwbfile.subject

    nwbfile.subject = subject

    if parse_transcript:
        if parse_transcript == 'CV':
            parseout = parse(blockpath, blockname)
            df = make_df(parseout, 0, subject_id, align_pos=1)
            nwbfile.add_trial_column('cv_transition_time',
                                     'time of CV transition in seconds')
            nwbfile.add_trial_column(
                'speak',
                'if True, subject is speaking. If False, subject is listening')
            nwbfile.add_trial_column('condition', 'syllable spoken')
            for _, row in df.iterrows():
                nwbfile.add_trial(start_time=row['start'],
                                  stop_time=row['stop'],
                                  cv_transition_time=row['align'],
                                  speak=row['mode'] == 'speak',
                                  condition=row['label'])
        elif parse_transcript == 'singing':
            parseout = parse(blockpath, blockname)
            df = make_df(parseout, 0, subject_id, align_pos=0)
            if not len(df):
                df = pd.DataFrame(parseout)
                df['mode'] = 'speak'

            df = df.loc[df['label'].astype('bool'), :]  # handle empty labels
            nwbfile.add_trial_column(
                'speak',
                'if True, subject is speaking. If False, subject is listening')
            nwbfile.add_trial_column('condition', 'syllable spoken')
            for _, row in df.iterrows():
                nwbfile.add_trial(start_time=row['start'],
                                  stop_time=row['stop'],
                                  speak=row['mode'] == 'speak',
                                  condition=row['label'])
        elif parse_transcript == 'emphasis':
            parseout = parse(blockpath, blockname)
            try:
                df = make_df(parseout, 0, subject_id, align_pos=0)
            except:
                df = pd.DataFrame(parseout)
            if not len(df):
                df = pd.DataFrame(parseout)
            df = df.loc[df['label'].astype('bool'), :]  # handle empty labels
            nwbfile.add_trial_column('condition', 'word emphasized')
            nwbfile.add_trial_column(
                'speak',
                'if True, subject is speaking. If False, subject is listening')
            for _, row in df.iterrows():
                nwbfile.add_trial(start_time=row['start'],
                                  stop_time=row['stop'],
                                  speak=True,
                                  condition=row['label'])
        elif parse_transcript == 'MOCHA':
            nwbfile = create_transcription(nwbfile, transcript_path, blockname)

    # behavior
    if include_pitch:
        if behav_module is None:
            behav_module = nwbfile.create_processing_module(
                'behavior', 'processing about behavior')
        if os.path.isfile(
                os.path.join(blockpath, 'pitch_' + blockname + '.mat')):
            fs, data = load_pitch(blockpath)
            pitch_ts = TimeSeries(
                data=data,
                rate=fs,
                unit='Hz',
                name='pitch',
                description=
                'Pitch as extracted from Praat. NaNs mark unvoiced regions.')
            behav_module.add_container(
                BehavioralTimeSeries(name='pitch', time_series=pitch_ts))
        else:
            print('No pitch file for ' + blockname)

    if include_intensity:
        if behav_module is None:
            behav_module = nwbfile.create_processing_module(
                'behavior', 'processing about behavior')
        if os.path.isfile(
                os.path.join(blockpath, 'intensity_' + blockname + '.mat')):
            fs, data = load_pitch(blockpath)
            intensity_ts = TimeSeries(
                data=data,
                rate=fs,
                unit='dB',
                name='intensity',
                description='Intensity of speech in dB extracted from Praat.')
            behav_module.add_container(
                BehavioralTimeSeries(name='intensity',
                                     time_series=intensity_ts))
        else:
            print('No intensity file for ' + blockname)

    # Export the NWB file
    with NWBHDF5IO(outpath, manager=manager, mode='w') as io:
        io.write(nwbfile)

    if external_subject:
        subj_read_io.close()

    if hilb:
        file.close()

    # read check
    with NWBHDF5IO(outpath, manager=manager, mode='r') as io:
        io.read()
Пример #9
0
def nwb_copy_file(old_file, new_file, cp_objs={}, save_to_file=True):
    """
    Copy fields defined in 'obj', from existing NWB file to new NWB file.

    Parameters
    ----------
    old_file : str, path, nwbfile
        String or path to nwb file '/path/to/old_file.nwb'. Alternatively, the
        nwbfile object.
    new_file : str, path
        String such as '/path/to/new_file.nwb'.
    cp_objs : dict
        Name:Value pairs (Group:Children) listing the groups and respective
        children from the current NWB file to be copied. Children can be:
        - Boolean, indicating an attribute (e.g. for institution, lab)
        - List of strings, containing several children names
        Example:
        {'institution':True,
         'lab':True,
         'acquisition':['microphone'],
         'ecephys':['LFP','DecompositionSeries']}
    save_to_file: Boolean
        If True, saves directly to new_file.nwb. If False, only returns nwb_new.

    Returns:
    --------
    nwb_new : nwbfile object
    """

    manager = get_manager()

    # Get from nwbfile object in memory or from file
    if isinstance(old_file, NWBFile):
        nwb_old = old_file
        io1 = False
    else:
        io1 = NWBHDF5IO(str(old_file),
                        'r',
                        manager=manager,
                        load_namespaces=True)
        nwb_old = io1.read()

    # Creates new file
    nwb_new = NWBFile(
        session_description=str(nwb_old.session_description),
        identifier=id_generator(),
        session_start_time=nwb_old.session_start_time,
    )
    with NWBHDF5IO(new_file, mode='w', manager=manager,
                   load_namespaces=False) as io2:
        # Institution name ------------------------------------------------
        if 'institution' in cp_objs:
            nwb_new.institution = str(nwb_old.institution)

        # Lab name --------------------------------------------------------
        if 'lab' in cp_objs:
            nwb_new.lab = str(nwb_old.lab)

        # Session id ------------------------------------------------------
        if 'session' in cp_objs:
            nwb_new.session_id = nwb_old.session_id

        # Devices ---------------------------------------------------------
        if 'devices' in cp_objs:
            for aux in list(nwb_old.devices.keys()):
                dev = Device(nwb_old.devices[aux].name)
                nwb_new.add_device(dev)

        # Electrode groups ------------------------------------------------
        if 'electrode_groups' in cp_objs and nwb_old.electrode_groups is not None:
            for aux in list(nwb_old.electrode_groups.keys()):
                nwb_new.create_electrode_group(
                    name=str(nwb_old.electrode_groups[aux].name),
                    description=str(nwb_old.electrode_groups[aux].description),
                    location=str(nwb_old.electrode_groups[aux].location),
                    device=nwb_new.get_device(
                        nwb_old.electrode_groups[aux].device.name))

        # Electrodes ------------------------------------------------------
        if 'electrodes' in cp_objs and nwb_old.electrodes is not None:
            nElec = len(nwb_old.electrodes['x'].data[:])
            for aux in np.arange(nElec):
                nwb_new.add_electrode(
                    x=nwb_old.electrodes['x'][aux],
                    y=nwb_old.electrodes['y'][aux],
                    z=nwb_old.electrodes['z'][aux],
                    imp=nwb_old.electrodes['imp'][aux],
                    location=str(nwb_old.electrodes['location'][aux]),
                    filtering=str(nwb_old.electrodes['filtering'][aux]),
                    group=nwb_new.get_electrode_group(
                        nwb_old.electrodes['group'][aux].name),
                    group_name=str(nwb_old.electrodes['group_name'][aux]))
            # if there are custom variables
            new_vars = list(nwb_old.electrodes.colnames)
            default_vars = [
                'x', 'y', 'z', 'imp', 'location', 'filtering', 'group',
                'group_name'
            ]
            [new_vars.remove(var) for var in default_vars]
            for var in new_vars:
                if var == 'label':
                    var_data = [
                        str(elem) for elem in nwb_old.electrodes[var].data[:]
                    ]
                else:
                    var_data = np.array(nwb_old.electrodes[var].data[:])

                nwb_new.add_electrode_column(
                    name=str(var),
                    description=str(nwb_old.electrodes[var].description),
                    data=var_data)

            # If Bipolar scheme for electrodes
            for v in nwb_old.lab_meta_data.values():
                if isinstance(v, EcephysExt) and hasattr(
                        v, 'bipolar_scheme_table'):
                    bst_old = v.bipolar_scheme_table
                    bst_new = BipolarSchemeTable(
                        name=bst_old.name, description=bst_old.description)
                    ecephys_ext = EcephysExt(name=v.name)
                    ecephys_ext.bipolar_scheme_table = bst_new
                    nwb_new.add_lab_meta_data(ecephys_ext)

        # Epochs ----------------------------------------------------------
        if 'epochs' in cp_objs and nwb_old.epochs is not None:
            nEpochs = len(nwb_old.epochs['start_time'].data[:])
            for i in np.arange(nEpochs):
                nwb_new.add_epoch(
                    start_time=nwb_old.epochs['start_time'].data[i],
                    stop_time=nwb_old.epochs['stop_time'].data[i])
            # if there are custom variables
            new_vars = list(nwb_old.epochs.colnames)
            default_vars = ['start_time', 'stop_time', 'tags', 'timeseries']
            [new_vars.remove(var) for var in default_vars if var in new_vars]
            for var in new_vars:
                nwb_new.add_epoch_column(
                    name=var,
                    description=nwb_old.epochs[var].description,
                    data=nwb_old.epochs[var].data[:])

        # Invalid times ---------------------------------------------------
        if 'invalid_times' in cp_objs and nwb_old.invalid_times is not None:
            nInvalid = len(nwb_old.invalid_times['start_time'][:])
            for aux in np.arange(nInvalid):
                nwb_new.add_invalid_time_interval(
                    start_time=nwb_old.invalid_times['start_time'][aux],
                    stop_time=nwb_old.invalid_times['stop_time'][aux])

        # Trials ----------------------------------------------------------
        if 'trials' in cp_objs and nwb_old.trials is not None:
            nTrials = len(nwb_old.trials['start_time'])
            for aux in np.arange(nTrials):
                nwb_new.add_trial(start_time=nwb_old.trials['start_time'][aux],
                                  stop_time=nwb_old.trials['stop_time'][aux])
            # if there are custom variables
            new_vars = list(nwb_old.trials.colnames)
            default_vars = ['start_time', 'stop_time']
            [new_vars.remove(var) for var in default_vars]
            for var in new_vars:
                nwb_new.add_trial_column(
                    name=var,
                    description=nwb_old.trials[var].description,
                    data=nwb_old.trials[var].data[:])

        # Intervals -------------------------------------------------------
        if 'intervals' in cp_objs and nwb_old.intervals is not None:
            all_objs_names = list(nwb_old.intervals.keys())
            for obj_name in all_objs_names:
                obj_old = nwb_old.intervals[obj_name]
                # create and add TimeIntervals
                obj = TimeIntervals(name=obj_old.name,
                                    description=obj_old.description)
                nInt = len(obj_old['start_time'])
                for ind in np.arange(nInt):
                    obj.add_interval(start_time=obj_old['start_time'][ind],
                                     stop_time=obj_old['stop_time'][ind])
                # Add to file
                nwb_new.add_time_intervals(obj)

        # Stimulus --------------------------------------------------------
        if 'stimulus' in cp_objs:
            all_objs_names = list(nwb_old.stimulus.keys())
            for obj_name in all_objs_names:
                obj_old = nwb_old.stimulus[obj_name]
                obj = TimeSeries(name=obj_old.name,
                                 description=obj_old.description,
                                 data=obj_old.data[:],
                                 rate=obj_old.rate,
                                 resolution=obj_old.resolution,
                                 conversion=obj_old.conversion,
                                 starting_time=obj_old.starting_time,
                                 unit=obj_old.unit)
                nwb_new.add_stimulus(obj)

        # Processing modules ----------------------------------------------
        if 'ecephys' in cp_objs:
            interfaces = [
                nwb_old.processing['ecephys'].data_interfaces[key]
                for key in cp_objs['ecephys']
            ]
            # Add ecephys module to NWB file
            ecephys_module = ProcessingModule(
                name='ecephys',
                description='Extracellular electrophysiology data.')
            nwb_new.add_processing_module(ecephys_module)
            for interface_old in interfaces:
                obj = copy_obj(interface_old, nwb_old, nwb_new)
                if obj is not None:
                    ecephys_module.add_data_interface(obj)

        if 'behavior' in cp_objs:
            interfaces = [
                nwb_old.processing['behavior'].data_interfaces[key]
                for key in cp_objs['behavior']
            ]
            if 'behavior' not in nwb_new.processing:
                # Add behavior module to NWB file
                behavior_module = ProcessingModule(
                    name='behavior', description='behavioral data.')
                nwb_new.add_processing_module(behavior_module)
            for interface_old in interfaces:
                obj = copy_obj(interface_old, nwb_old, nwb_new)
                if obj is not None:
                    behavior_module.add_data_interface(obj)

        # Acquisition -----------------------------------------------------
        # Can get raw ElecetricalSeries and Mic recording
        if 'acquisition' in cp_objs:
            for acq_name in cp_objs['acquisition']:
                obj_old = nwb_old.acquisition[acq_name]
                acq = copy_obj(obj_old, nwb_old, nwb_new)
                nwb_new.add_acquisition(acq)

        # Surveys ---------------------------------------------------------
        if 'surveys' in cp_objs and 'behavior' in nwb_old.processing:
            surveys_list = [
                v for v in
                nwb_old.processing['behavior'].data_interfaces.values()
                if v.neurodata_type == 'SurveyTable'
            ]
            if cp_objs['surveys'] and len(surveys_list) > 0:
                if 'behavior' not in nwb_new.processing:
                    # Add behavior module to NWB file
                    behavior_module = ProcessingModule(
                        name='behavior', description='behavioral data.')
                    nwb_new.add_processing_module(behavior_module)
                for obj_old in surveys_list:
                    srv = copy_obj(obj_old, nwb_old, nwb_new)
                    behavior_module.add_data_interface(srv)

        # Subject ---------------------------------------------------------
        if nwb_old.subject is not None:
            if 'subject' in cp_objs:
                try:
                    cortical_surfaces = CorticalSurfaces()
                    surfaces = nwb_old.subject.cortical_surfaces.surfaces
                    for sfc in list(surfaces.keys()):
                        cortical_surfaces.create_surface(
                            name=surfaces[sfc].name,
                            faces=surfaces[sfc].faces,
                            vertices=surfaces[sfc].vertices)
                    nwb_new.subject = ECoGSubject(
                        cortical_surfaces=cortical_surfaces,
                        subject_id=nwb_old.subject.subject_id,
                        age=nwb_old.subject.age,
                        description=nwb_old.subject.description,
                        genotype=nwb_old.subject.genotype,
                        sex=nwb_old.subject.sex,
                        species=nwb_old.subject.species,
                        weight=nwb_old.subject.weight,
                        date_of_birth=nwb_old.subject.date_of_birth)
                except:
                    nwb_new.subject = Subject(**nwb_old.subject.fields)

        # Write new file with copied fields
        if save_to_file:
            io2.write(nwb_new, link_data=False)

    # Close old file and return new nwbfile object
    if io1:
        io1.close()

    return nwb_new
Пример #10
0
def run_conversion(
        fpath_in='/Volumes/easystore5T/data/Brunton/subj_01_day_4.h5',
        fpath_out='/Volumes/easystore5T/data/Brunton/subj_01_day_4.nwb',
        events_path='C:/Users/micha/Desktop/Brunton Lab Data/event_times.csv',
        r2_path='C:/Users/micha/Desktop/Brunton Lab Data/full_model_r2.npy',
        coarse_events_path='C:/Users/micha/Desktop/Brunton Lab Data/coarse_labels/coarse_labels',
        reach_features_path='C:/Users/micha/Desktop/Brunton Lab Data/behavioral_features.csv',
        elec_loc_labels_path='elec_loc_labels.csv',
        special_chans=SPECIAL_CHANNELS,
        session_description='no description'
):
    print(f"Converting {fpath_in}...")
    fname = os.path.split(os.path.splitext(fpath_in)[0])[1]
    _, subject_id, _, session = fname.split('_')

    file = File(fpath_in, 'r')

    nwbfile = NWBFile(
        session_description=session_description,
        identifier=str(uuid.uuid4()),
        session_start_time=datetime.fromtimestamp(file['start_timestamp'][()]),
        subject=Subject(subject_id=subject_id, species="H**o sapiens"),
        session_id=session
    )

    # extract electrode groups
    file_elec_col_names = file['chan_info']['axis1'][:]
    elec_data = file['chan_info']['block0_values']

    re_exp = re.compile("([ a-zA-Z]+)([0-9]+)")

    channel_labels_dset = file['chan_info']['axis0']

    group_names, group_nums = [], []
    for i, bytes_ in enumerate(channel_labels_dset):
        if bytes_ not in special_chans:
            str_ = bytes_.decode()
            res = re_exp.match(str_).groups()
            group_names.append(res[0])
            group_nums.append(int(res[1]))

    is_elec = ~np.isin(channel_labels_dset, special_chans)

    dset = DatasetView(file['dataset']).lazy_transpose()

    # add special channels
    for kwargs in (
            dict(
                name='EOGL',
                description='Electrooculography for tracking saccades - left',
            ),
            dict(
                name='EOGR',
                description='Electrooculography for tracking saccades - right',
            ),
            dict(
                name='ECGL',
                description='Electrooculography for tracking saccades - left',
            ),
            dict(
                name='ECGR',
                description='Electrooculography for tracking saccades - right',
            )
    ):
        if kwargs['name'].encode() in channel_labels_dset:
            nwbfile.add_acquisition(
                TimeSeries(
                    rate=file['f_sample'][()],
                    conversion=np.nan,
                    unit='V',
                    data=dset[:, list(channel_labels_dset).index(kwargs['name'].encode())],
                    **kwargs
                )
            )

    # add electrode groups
    df = pd.read_csv(elec_loc_labels_path)
    df_subject = df[df['subject_ID'] == 'subj' + subject_id]
    electrode_group_descriptions = {row['label']: row['long_name'] for _, row in df_subject.iterrows()}

    groups_map = dict()
    for group_name, group_description in electrode_group_descriptions.items():
        device = nwbfile.create_device(name=group_name)
        groups_map[group_name] = nwbfile.create_electrode_group(
            name=group_name,
            description=group_description,
            device=device,
            location='unknown'
        )

    # add required cols to electrodes table
    for row, group_name in zip(elec_data[:].T, group_names):
        nwbfile.add_electrode(
            x=row[file_elec_col_names == b'X'][0],
            y=row[file_elec_col_names == b'Y'][0],
            z=row[file_elec_col_names == b'Z'][0],
            imp=np.nan,
            location='unknown',
            filtering='250 Hz lowpass',
            group=groups_map[group_name],
        )

    # load r2 values to input into custom cols in electrodes table
    r2 = np.load(r2_path)
    low_freq_r2 = np.ravel(r2[int(subject_id)-1, :len(group_names), 0])

    high_freq_r2 = np.ravel(r2[int(subject_id)-1, :len(group_names), 1])

    # add custom cols to electrodes table
    elecs_dset = file['chan_info']['block0_values']

    def get_data(label):
        return elecs_dset[file_elec_col_names == label, :].ravel()[is_elec]

    [nwbfile.add_electrode_column(**kwargs) for kwargs in (
        dict(
            name='standard_deviation',
            description="standard deviation of each electrode's data for the entire recording period",
            data=get_data(b'SD_channels')
        ),
        dict(
            name='kurtosis',
            description="kurtosis of each electrode's data for the entire recording period",
            data=get_data(b'Kurt_channels')
        ),
        dict(
            name='median_deviation',
            description="median absolute deviation estimator for standard deviation for each electrode",
            data=get_data(b'standardizeDenoms')
        ),
        dict(
            name='good',
            description='good electrodes',
            data=get_data(b'goodChanInds').astype(bool)

        ),
        dict(
            name='low_freq_R2',
            description='R^2 for low frequency band on each electrode',
            data=low_freq_r2
        ),
        dict(
            name='high_freq_R2',
            description='R^2 for high frequency band on each electrode',
            data=high_freq_r2
        )
    )]

    # confirm that electrodes table looks right
    # nwbfile.electrodes.to_dataframe()

    # add ElectricalSeries
    elecs_data = dset.lazy_slice[:, is_elec]
    n_bytes = np.dtype(elecs_data).itemsize

    nwbfile.add_acquisition(
        ElectricalSeries(
            name='ElectricalSeries',
            data=H5DataIO(
                data=DataChunkIterator(
                    data=elecs_data,
                    maxshape=elecs_data.shape,
                    buffer_size=int(5000 * 1e6) // elecs_data.shape[1] * n_bytes
                ),
                compression='gzip'
            ),
            rate=file['f_sample'][()],
            conversion=1e-6,  # data is in uV
            electrodes=nwbfile.create_electrode_table_region(
                region=list(range(len(nwbfile.electrodes))),
                description='all electrodes'
            )
        )
    )

    # add pose data
    pose_dset = file['pose_data']['block0_values']

    nwbfile.create_processing_module(
        name='behavior',
        description='pose data').add(
        Position(
            spatial_series=[
                SpatialSeries(
                    name=file['pose_data']['axis0'][x_ind][:-2].decode(),
                    data=H5DataIO(
                        data=pose_dset[:, [x_ind, y_ind]],
                        compression='gzip'
                    ),
                    reference_frame='unknown',
                    conversion=np.nan,
                    rate=30.
                ) for x_ind, y_ind in zip(
                    range(0, pose_dset.shape[1], 2),
                    range(1, pose_dset.shape[1], 2))
            ]
        )
    )

    # add events
    events = pd.read_csv(events_path)
    mask = (events['Subject'] == int(subject_id)) & (events['Recording day'] == int(session))
    events = events[mask]
    timestamps = events['Event time'].values
    events = events.reset_index()

    events = Events(
        name='ReachEvents',
        description=events['Event type'][0],  # Specifies which arm was used
        timestamps=timestamps,
        resolution=2e-3,  # resolution of the timestamps, i.e., smallest possible difference between timestamps
    )

    # add the Events type to the processing group of the NWB file
    nwbfile.processing['behavior'].add(events)

    # add coarse behavioral labels
    event_fp = f'sub{subject_id}_fullday_{session}'
    full_fp = coarse_events_path + '//' + event_fp + '.npy'
    coarse_events = np.load(full_fp, allow_pickle=True)

    label, data = np.unique(coarse_events, return_inverse=True)
    transition_idx = np.where(np.diff(data) != 0)
    start_t = nwbfile.processing["behavior"].data_interfaces["Position"]['L_Wrist'].starting_time
    rate = nwbfile.processing["behavior"].data_interfaces["Position"]['L_Wrist'].rate
    times = np.divide(transition_idx, rate) + start_t  # 30Hz sampling rate
    max_time = (np.shape(coarse_events)[0] / rate) + start_t
    times = np.hstack([start_t, np.ravel(times), max_time])
    transition_labels = np.hstack([label[data[transition_idx]], label[data[-1]]])

    nwbfile.add_epoch_column(name='labels', description='Coarse behavioral labels')

    for start_time, stop_time, label in zip(times[:-1], times[1:], transition_labels):
        nwbfile.add_epoch(start_time=start_time, stop_time=stop_time, labels=label)

    # add additional reaching features
    reach_features = pd.read_csv(reach_features_path)
    mask = (reach_features['Subject'] == int(subject_id)) & (reach_features['Recording day'] == int(session))
    reach_features = reach_features[mask]

    reaches = TimeIntervals(name='reaches', description='Features of each reach')
    reaches.add_column(name='Reach_magnitude_px', description='Magnitude of reach in pixels')
    reaches.add_column(name='Reach_angle_degrees', description='Reach angle in degrees')
    reaches.add_column(name='Onset_speed_px_per_sec', description='Onset speed in pixels / second)')
    reaches.add_column(name='Speech_ratio', description='rough estimation of whether someone is likely to be speaking '
                                                        'based on a power ratio of audio data; ranges from 0 (no '
                                                        'speech) to 1 (high likelihood of speech)h')
    reaches.add_column(name='Bimanual_ratio', description='ratio of ipsilateral wrist reach magnitude to the sum of '
                                                          'ipsilateral and contralateral wrist magnitudes; ranges from '
                                                          '0 (unimanual/contralateral move only) to 1 (only ipsilateral'
                                                          ' arm moving); 0.5 indicates bimanual movement')
    reaches.add_column(name='Bimanual_overlap', description='The amount of ipsilateral and contralateral wrist temporal'
                                                            'overlap as a fraction of the entire contralateral movement'
                                                            ' duration')
    reaches.add_column(name='Bimanual_class', description='binary feature that classifies each movement event as '
                                                          'unimanual (0) or bimanual (1) based on how close in time a '
                                                          'ipsilateral wrist movement started relative to each '
                                                          'contralateral wrist movement events')
    for row in reach_features.iterrows():
        row_data = row[1]
        start_time = row_data['Time of day (sec)']
        stop_time = start_time + row_data['Reach duration (sec)']
        reaches.add_row(start_time=start_time,
                        stop_time=stop_time,
                        Reach_magnitude_px=row_data['Reach magnitude (px)'],
                        Reach_angle_degrees=row_data['Reach angle (degrees)'],
                        Onset_speed_px_per_sec=row_data['Onset speed (px/sec)'],
                        Speech_ratio=row_data['Speech ratio'],
                        Bimanual_ratio=row_data['Bimanual ratio'],
                        Bimanual_overlap=row_data['Bimanual overlap (sec)'],
                        Bimanual_class=row_data['Bimanual class']
                        )

    nwbfile.add_time_intervals(reaches)

    with NWBHDF5IO(fpath_out, 'w') as io:
        io.write(nwbfile)
Пример #11
0
def yuta2nwb(
        session_path='D:/BuzsakiData/SenzaiY/YutaMouse41/YutaMouse41-150903',
        # '/Users/bendichter/Desktop/Buzsaki/SenzaiBuzsaki2017/YutaMouse41/YutaMouse41-150903',
        subject_xls=None,
        include_spike_waveforms=True,
        stub=True,
        cache_spec=True):

    subject_path, session_id = os.path.split(session_path)
    fpath_base = os.path.split(subject_path)[0]
    identifier = session_id
    mouse_number = session_id[9:11]
    if '-' in session_id:
        subject_id, date_text = session_id.split('-')
        b = False
    else:
        subject_id, date_text = session_id.split('b')
        b = True

    if subject_xls is None:
        subject_xls = os.path.join(subject_path,
                                   'YM' + mouse_number + ' exp_sheet.xlsx')
    else:
        if not subject_xls[-4:] == 'xlsx':
            subject_xls = os.path.join(subject_xls,
                                       'YM' + mouse_number + ' exp_sheet.xlsx')

    session_start_time = dateparse(date_text, yearfirst=True)

    df = pd.read_excel(subject_xls)

    subject_data = {}
    for key in [
            'genotype', 'DOB', 'implantation', 'Probe', 'Surgery',
            'virus injection', 'mouseID'
    ]:
        names = df.iloc[:, 0]
        if key in names.values:
            subject_data[key] = df.iloc[np.argmax(names == key), 1]

    if isinstance(subject_data['DOB'], datetime):
        age = session_start_time - subject_data['DOB']
    else:
        age = None

    subject = Subject(subject_id=subject_id,
                      age=str(age),
                      genotype=subject_data['genotype'],
                      species='mouse')

    nwbfile = NWBFile(
        session_description='mouse in open exploration and theta maze',
        identifier=identifier,
        session_start_time=session_start_time.astimezone(),
        file_create_date=datetime.now().astimezone(),
        experimenter='Yuta Senzai',
        session_id=session_id,
        institution='NYU',
        lab='Buzsaki',
        subject=subject,
        related_publications='DOI:10.1016/j.neuron.2016.12.011')

    print('reading and writing raw position data...', end='', flush=True)
    ns.add_position_data(nwbfile, session_path)

    shank_channels = ns.get_shank_channels(session_path)[:8]
    nshanks = len(shank_channels)
    all_shank_channels = np.concatenate(shank_channels)

    print('setting up electrodes...', end='', flush=True)
    hilus_csv_path = os.path.join(fpath_base, 'early_session_hilus_chans.csv')
    lfp_channel = get_reference_elec(subject_xls,
                                     hilus_csv_path,
                                     session_start_time,
                                     session_id,
                                     b=b)

    custom_column = [{
        'name': 'theta_reference',
        'description':
        'this electrode was used to calculate LFP canonical bands',
        'data': all_shank_channels == lfp_channel
    }]
    ns.write_electrode_table(nwbfile,
                             session_path,
                             custom_columns=custom_column,
                             max_shanks=max_shanks)

    print('reading raw electrode data...', end='', flush=True)
    if stub:
        # example recording extractor for fast testing
        xml_filepath = os.path.join(session_path, session_id + '.xml')
        xml_root = et.parse(xml_filepath).getroot()
        acq_sampling_frequency = float(
            xml_root.find('acquisitionSystem').find('samplingRate').text)
        num_channels = 4
        num_frames = 10000
        X = np.random.normal(0, 1, (num_channels, num_frames))
        geom = np.random.normal(0, 1, (num_channels, 2))
        X = (X * 100).astype(int)
        sre = se.NumpyRecordingExtractor(
            timeseries=X, sampling_frequency=acq_sampling_frequency, geom=geom)
    else:
        nre = se.NeuroscopeRecordingExtractor('{}/{}.dat'.format(
            session_path, session_id))
        sre = se.SubRecordingExtractor(nre, channel_ids=all_shank_channels)

    print('writing raw electrode data...', end='', flush=True)
    se.NwbRecordingExtractor.add_electrical_series(sre, nwbfile)
    print('done.')

    print('reading spiking units...', end='', flush=True)
    if stub:
        spike_times = [200, 300, 400]
        num_frames = 10000
        allshanks = []
        for k in range(nshanks):
            SX = se.NumpySortingExtractor()
            for j in range(len(spike_times)):
                SX.add_unit(unit_id=j + 1,
                            times=np.sort(
                                np.random.uniform(0, num_frames,
                                                  spike_times[j])))
            allshanks.append(SX)
        se_allshanks = se.MultiSortingExtractor(allshanks)
        se_allshanks.set_sampling_frequency(acq_sampling_frequency)
    else:
        se_allshanks = se.NeuroscopeMultiSortingExtractor(session_path,
                                                          keep_mua_units=False)

    electrode_group = []
    for shankn in np.arange(1, nshanks + 1, dtype=int):
        for id in se_allshanks.sortings[shankn - 1].get_unit_ids():
            electrode_group.append(nwbfile.electrode_groups['shank' +
                                                            str(shankn)])

    df_unit_features = get_UnitFeatureCell_features(fpath_base, session_id,
                                                    session_path)

    celltype_names = []
    for celltype_id, region_id in zip(df_unit_features['fineCellType'].values,
                                      df_unit_features['region'].values):
        if celltype_id == 1:
            if region_id == 3:
                celltype_names.append('pyramidal cell')
            elif region_id == 4:
                celltype_names.append('granule cell')
            else:
                raise Exception('unknown type')
        elif not np.isfinite(celltype_id):
            celltype_names.append('missing')
        else:
            celltype_names.append(celltype_dict[celltype_id])

    # Add custom column data into the SortingExtractor so it can be written by the converter
    # Note there is currently a hidden assumption that the way in which the NeuroscopeSortingExtractor
    # merges the cluster IDs matches one-to-one with the get_UnitFeatureCell_features extraction
    property_descriptions = {
        'cell_type': 'name of cell type',
        'global_id': 'global id for cell for entire experiment',
        'shank_id': '0-indexed id of cluster of shank',
        'electrode_group': 'the electrode group that each spike unit came from'
    }
    property_values = {
        'cell_type': celltype_names,
        'global_id': df_unit_features['unitID'].values,
        'shank_id': [x - 2 for x in df_unit_features['unitIDshank'].values],
        # - 2 b/c the get_UnitFeatureCell_features removes 0 and 1 IDs from each shank
        'electrode_group': electrode_group
    }
    for unit_id in se_allshanks.get_unit_ids():
        for property_name in property_descriptions.keys():
            se_allshanks.set_unit_property(
                unit_id, property_name,
                property_values[property_name][unit_id])

    se.NwbSortingExtractor.write_sorting(
        se_allshanks,
        nwbfile=nwbfile,
        property_descriptions=property_descriptions)
    print('done.')

    # Read and write LFP's
    print('reading LFPs...', end='', flush=True)
    lfp_fs, all_channels_lfp_data = ns.read_lfp(session_path, stub=stub)

    lfp_data = all_channels_lfp_data[:, all_shank_channels]
    print('writing LFPs...', flush=True)
    # lfp_data[:int(len(lfp_data)/4)]
    lfp_ts = ns.write_lfp(nwbfile,
                          lfp_data,
                          lfp_fs,
                          name='lfp',
                          description='lfp signal for all shank electrodes')

    # Read and add special environmental electrodes
    for name, channel in special_electrode_dict.items():
        ts = TimeSeries(
            name=name,
            description=
            'environmental electrode recorded inline with neural data',
            data=all_channels_lfp_data[:, channel],
            rate=lfp_fs,
            unit='V',
            #conversion=np.nan,
            resolution=np.nan)
        nwbfile.add_acquisition(ts)

    # compute filtered LFP
    print('filtering LFP...', end='', flush=True)
    all_lfp_phases = []
    for passband in ('theta', 'gamma'):
        lfp_fft = filter_lfp(
            lfp_data[:, all_shank_channels == lfp_channel].ravel(),
            lfp_fs,
            passband=passband)
        lfp_phase, _ = hilbert_lfp(lfp_fft)
        all_lfp_phases.append(lfp_phase[:, np.newaxis])
    data = np.dstack(all_lfp_phases)
    print('done.', flush=True)

    if include_spike_waveforms:
        print('writing waveforms...', end='', flush=True)
        nshanks = min((max_shanks, len(ns.get_shank_channels(session_path))))

        for shankn in np.arange(nshanks, dtype=int) + 1:
            # Get spike activty from .spk file on a per-shank and per-sample basis
            ns.write_spike_waveforms(nwbfile, session_path, shankn, stub=stub)
        print('done.', flush=True)

    # Get the LFP Decomposition Series
    decomp_series = DecompositionSeries(
        name='LFPDecompositionSeries',
        description='Theta and Gamma phase for reference LFP',
        data=data,
        rate=lfp_fs,
        source_timeseries=lfp_ts,
        metric='phase',
        unit='radians')
    decomp_series.add_band(band_name='theta', band_limits=(4, 10))
    decomp_series.add_band(band_name='gamma', band_limits=(30, 80))

    check_module(nwbfile, 'ecephys',
                 'contains processed extracellular electrophysiology data'
                 ).add_data_interface(decomp_series)

    [nwbfile.add_stimulus(x) for x in ns.get_events(session_path)]

    # create epochs corresponding to experiments/environments for the mouse

    sleep_state_fpath = os.path.join(session_path,
                                     '{}--StatePeriod.mat'.format(session_id))

    exist_pos_data = any(
        os.path.isfile(
            os.path.join(session_path, '{}__{}.mat'.format(
                session_id, task_type['name']))) for task_type in task_types)

    if exist_pos_data:
        nwbfile.add_epoch_column('label', 'name of epoch')

    for task_type in task_types:
        label = task_type['name']

        file = os.path.join(session_path, session_id + '__' + label + '.mat')
        if os.path.isfile(file):
            print('loading position for ' + label + '...', end='', flush=True)

            pos_obj = Position(name=label + '_position')

            matin = loadmat(file)
            tt = matin['twhl_norm'][:, 0]
            exp_times = find_discontinuities(tt)

            if 'conversion' in task_type:
                conversion = task_type['conversion']
            else:
                conversion = np.nan

            for pos_type in ('twhl_norm', 'twhl_linearized'):
                if pos_type in matin:
                    pos_data_norm = matin[pos_type][:, 1:]

                    spatial_series_object = SpatialSeries(
                        name=label + '_{}_spatial_series'.format(pos_type),
                        data=H5DataIO(pos_data_norm, compression='gzip'),
                        reference_frame='unknown',
                        conversion=conversion,
                        resolution=np.nan,
                        timestamps=H5DataIO(tt, compression='gzip'))
                    pos_obj.add_spatial_series(spatial_series_object)

            check_module(
                nwbfile, 'behavior',
                'contains processed behavioral data').add_data_interface(
                    pos_obj)
            for i, window in enumerate(exp_times):
                nwbfile.add_epoch(start_time=window[0],
                                  stop_time=window[1],
                                  label=label + '_' + str(i))
            print('done.')

    # there are occasional mismatches between the matlab struct and the neuroscope files
    # regions: 3: 'CA3', 4: 'DG'

    trialdata_path = os.path.join(session_path,
                                  session_id + '__EightMazeRun.mat')
    if os.path.isfile(trialdata_path):
        trials_data = loadmat(trialdata_path)['EightMazeRun']

        trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat')
        trialdatainfo = [
            x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]
        ]

        features = trialdatainfo[:7]
        features[:2] = 'start_time', 'stop_time',
        [
            nwbfile.add_trial_column(x, 'description')
            for x in features[4:] + ['condition']
        ]

        for trial_data in trials_data:
            if trial_data[3]:
                cond = 'run_left'
            else:
                cond = 'run_right'
            nwbfile.add_trial(start_time=trial_data[0],
                              stop_time=trial_data[1],
                              condition=cond,
                              error_run=trial_data[4],
                              stim_run=trial_data[5],
                              both_visit=trial_data[6])
    """
    mono_syn_fpath = os.path.join(session_path, session_id+'-MonoSynConvClick.mat')

    matin = loadmat(mono_syn_fpath)
    exc = matin['FinalExcMonoSynID']
    inh = matin['FinalInhMonoSynID']

    #exc_obj = CatCellInfo(name='excitatory_connections',
    #                      indices_values=[], cell_index=exc[:, 0] - 1, indices=exc[:, 1] - 1)
    #module_cellular.add_container(exc_obj)
    #inh_obj = CatCellInfo(name='inhibitory_connections',
    #                      indices_values=[], cell_index=inh[:, 0] - 1, indices=inh[:, 1] - 1)
    #module_cellular.add_container(inh_obj)
    """

    if os.path.isfile(sleep_state_fpath):
        matin = loadmat(sleep_state_fpath)['StatePeriod']

        table = TimeIntervals(name='states',
                              description='sleep states of animal')
        table.add_column(name='label', description='sleep state')

        data = []
        for name in matin.dtype.names:
            for row in matin[name][0][0]:
                data.append({
                    'start_time': row[0],
                    'stop_time': row[1],
                    'label': name
                })
        [
            table.add_row(**row)
            for row in sorted(data, key=lambda x: x['start_time'])
        ]

        check_module(nwbfile, 'behavior',
                     'contains behavioral data').add_data_interface(table)

    print('writing NWB file...', end='', flush=True)
    if stub:
        out_fname = session_path + '_stub.nwb'
    else:
        out_fname = session_path + '.nwb'

    with NWBHDF5IO(out_fname, mode='w') as io:
        io.write(nwbfile, cache_spec=cache_spec)
    print('done.')

    print('testing read...', end='', flush=True)
    # test read
    with NWBHDF5IO(out_fname, mode='r') as io:
        io.read()
    print('done.')