示例#1
0
def load_time_meta():
    data_dir = neural_data_dir / 'StoriesData_Dec2018' / 'stories_textgridsbyJeanne'
    files = data_dir.glob("*TextGrid*")
    time_to_words = []
    for file in files:
        textgrid = TextGrid.load(file)
        words = [tier for tier in textgrid.tiers if tier.nameid == 'words'][0]
        rows = defaultdict(list)
        for (time_start, time_end, word) in words.simple_transcript:
            rows['time_start'].append(float(time_start))
            rows['time_end'].append(float(time_end))
            rows['word'].append(word)
        story_index = int(file.stem)
        story = stories_meta.sel(number=story_index).values
        story = next(iter(set(story)))  # Boar was read twice
        rows = DataArray(rows['word'],
                         coords={
                             'filepath':
                             ('time_bin', [file.name] * len(rows['word'])),
                             'story':
                             ('time_bin', [story] * len(rows['word'])),
                             'time_start': ('time_bin', rows['time_start']),
                             'time_end': ('time_bin', rows['time_end']),
                         },
                         dims=['time_bin'])
        gather_indexes(rows)
        time_to_words.append(rows)
    time_to_words = merge_data_arrays(time_to_words)
    return time_to_words
示例#2
0
def _merge_voxel_meta(data, meta, bold_shift_seconds):
    data_missing = set(meta['story'].values) - set(data['story'].values)
    if data_missing:
        warnings.warn(f"Stories missing from the data: {data_missing}")
    meta_missing = set(data['story'].values) - set(meta['story'].values)
    if meta_missing:
        warnings.warn(f"Stories missing from the meta: {meta_missing}")

    ignored_words = [None, '', '<s>', '</s>', '<s']
    annotated_data = []
    for story in tqdm(ordered_set(data['story'].values), desc='merge meta'):
        if story not in meta['story'].values:
            continue
        story_meta = meta.sel(story=story)
        story_meta = story_meta.sortby('time_end')

        story_data = data.sel(story=story).stack(timepoint=['timepoint_value'])
        story_data = story_data.sortby('timepoint_value')
        timepoints = story_data['timepoint_value'].values.tolist()
        assert is_sorted(timepoints)
        timepoints = [
            timepoint - bold_shift_seconds for timepoint in timepoints
        ]
        sentences = []
        last_timepoint = -np.inf
        for timepoint in timepoints:
            if last_timepoint >= max(story_meta['time_end'].values):
                break
            if timepoint <= 0:
                sentences.append(None)
                continue  # ignore fixation period
            timebin_meta = [
                last_timepoint < end <= timepoint
                for end in story_meta['time_end'].values
            ]
            timebin_meta = story_meta[{'time_bin': timebin_meta}]
            sentence = ' '.join(word.strip() for word in timebin_meta.values
                                if word not in ignored_words)
            sentence = sentence.lower().strip()
            # quick-fixes
            if story == 'Boar' and sentence == 'interactions the the':  # Boar duplicate
                sentence = 'interactions the'
            if story == 'KingOfBirds' and sentence == 'the fact that the larger':  # missing word in TextGrid
                sentence = 'earth ' + sentence
            if story == 'MrSticky' and sentence == 'worry don\'t worry i went extra slowly since it\'s':
                sentence = 'don\'t worry i went extra slowly since it\'s'
            sentences.append(sentence)
            last_timepoint = timebin_meta['time_end'].values[-1]
        sentence_index = [
            i for i, sentence in enumerate(sentences) if sentence
        ]
        sentences = np.array(sentences)[sentence_index]
        if story not in ['Boar', 'KingOfBirds',
                         'MrSticky']:  # ignore quick-fixes
            annotated_sentence = ' '.join(sentences)
            meta_sentence = ' '.join(word.strip() for word in story_meta.values if word not in ignored_words) \
                .lower().strip()
            assert annotated_sentence == meta_sentence
        # re-interpret timepoints as stimuli
        coords = {}
        for coord_name, dims, coord_value in walk_coords(story_data):
            dims = [
                dim if not dim.startswith('timepoint') else 'presentation'
                for dim in dims
            ]
            # discard the timepoints for which the stimulus did not change (empty word)
            coord_value = coord_value if not array_is_element(
                dims, 'presentation') else coord_value[sentence_index]
            coords[coord_name] = dims, coord_value
        coords = {
            **coords,
            **{
                'stimulus_sentence': ('presentation', sentences)
            }
        }
        story_data = story_data[{
            dim: slice(None) if dim != 'timepoint' else sentence_index
            for dim in story_data.dims
        }]
        dims = [
            dim if not dim.startswith('timepoint') else 'presentation'
            for dim in story_data.dims
        ]
        story_data = xr.DataArray(story_data.values, coords=coords, dims=dims)
        story_data['story'] = 'presentation', [story] * len(
            story_data['presentation'])
        gather_indexes(story_data)
        annotated_data.append(story_data)
    annotated_data = merge_data_arrays(annotated_data)
    return annotated_data
示例#3
0
def load_voxel_data(bold_shift_seconds=4):
    data = load_filtered_voxel_timepoints()
    gather_indexes(data)
    meta = load_time_meta()
    annotated_data = _merge_voxel_meta(data, meta, bold_shift_seconds)
    return annotated_data
示例#4
0
def load_filtered_voxel_timepoints():
    data = load_voxel_timepoints()
    data = data.sel(threshold='from90to100')
    gather_indexes(data)
    data = data.sel(subject_nStories=8)
    return data
示例#5
0
        'time_with_fixation':
        ('story', [338, 318, 394, 396, 302, 410, 348, 394, 388, 422, 338]),
        'time_without_fixation': ('story', [
            5 * 60 + 6, 4 * 60 + 46, 6 * 60 + 2, 6 * 60 + 4, 4 * 60 + 30,
            6 * 60 + 18, 5 * 60 + 16, 6 * 60 + 2, 5 * 60 + 56, 6 * 60 + 30,
            5 * 60 + 6
        ]),
        'recording_timepoints':
        ('story', [169, 159, 197, 198, 151, 205, 174, 197, 194, 211, 169])
    },
    dims=['story'])
stories_meta['story_index'] = 'story', [
    ".".join([str(value) for value in values]) for values in zip(
        *[stories_meta[coord].values for coord in ['story_name', 'reader']])
]
gather_indexes(stories_meta)


def _merge_voxel_meta(data, meta, bold_shift_seconds):
    data_missing = set(meta['story'].values) - set(data['story'].values)
    if data_missing:
        warnings.warn(f"Stories missing from the data: {data_missing}")
    meta_missing = set(data['story'].values) - set(meta['story'].values)
    if meta_missing:
        warnings.warn(f"Stories missing from the meta: {meta_missing}")

    ignored_words = [None, '', '<s>', '</s>', '<s']
    annotated_data = []
    for story in tqdm(ordered_set(data['story'].values), desc='merge meta'):
        if story not in meta['story'].values:
            continue