def test_prior(): K = 10 T = 100 es = EventSegment(K) mp = es.model_prior(T)[0] p_bound = np.zeros((T, K - 1)) norm = comb(T - 1, K - 1) for t in range(T - 1): for k in range(K - 1): # See supplementary material of Neuron paper # https://doi.org/10.1016/j.neuron.2017.06.041 p_bound[t + 1, k] = comb(t, k) * comb(T - t - 2, K - k - 2) / norm p_bound = np.cumsum(p_bound, axis=0) mp_gt = np.zeros((T, K)) for k in range(K): if k == 0: mp_gt[:, k] = 1 - p_bound[:, 0] elif k == K - 1: mp_gt[:, k] = p_bound[:, k - 1] else: mp_gt[:, k] = p_bound[:, k - 1] - p_bound[:, k] assert np.all(np.isclose(mp, mp_gt)),\ "Prior does not match analytic solution"
def test_prior(): K = 10 T = 100 es = EventSegment(K) mp = es.model_prior(T)[0] p_bound = np.zeros((T, K-1)) norm = comb(T-1, K-1) for t in range(T-1): for k in range(K-1): # See supplementary material of Neuron paper # https://doi.org/10.1016/j.neuron.2017.06.041 p_bound[t+1, k] = comb(t, k) * comb(T-t-2, K-k-2) / norm p_bound = np.cumsum(p_bound, axis=0) mp_gt = np.zeros((T, K)) for k in range(K): if k == 0: mp_gt[:, k] = 1 - p_bound[:, 0] elif k == K - 1: mp_gt[:, k] = p_bound[:, k-1] else: mp_gt[:, k] = p_bound[:, k-1] - p_bound[:, k] assert np.all(np.isclose(mp, mp_gt)),\ "Prior does not match analytic solution"
def test_chains(): es = EventSegment(5, event_chains=np.array(['A', 'A', 'B', 'B', 'B'])) es.set_event_patterns(np.array([[1, 1, 0, 0, 0], [0, 0, 1, 1, 1]])) sample_data = np.array([[0, 0, 0], [1, 1, 1]]) seg = es.find_events(sample_data.T, 0.1)[0] ev = np.nonzero(seg > 0.99)[1] assert np.array_equal(ev, [2, 3, 4]),\ "Failed to fit with multiple chains"
def test_fit_shapes(): K = 5 V = 3 T = 10 es = EventSegment(K, n_iter=2) sample_data = np.random.rand(V, T) es.fit(sample_data.T) assert es.segments_[0].shape == (T, K), "Segmentation from fit " \ "has incorrect shape" assert np.isclose(np.sum(es.segments_[0], axis=1), np.ones(T)).all(), \ "Segmentation from learn_events not correctly normalized" T2 = 15 sample_data2 = np.random.rand(V, T2) test_segments, test_ll = es.find_events(sample_data2.T) assert test_segments.shape == (T2, K), "Segmentation from find_events " \ "has incorrect shape" assert np.isclose(np.sum(test_segments, axis=1), np.ones(T2)).all(), \ "Segmentation from find_events not correctly normalized" es_invalid = EventSegment(K) with pytest.raises(ValueError, message="T < K should cause error"): es_invalid.model_prior(K-1) with pytest.raises(ValueError, message="#Events < K should cause error"): es_invalid.set_event_patterns(np.zeros((V, K-1)))
def test_sym(): es = EventSegment(4) evpat = np.repeat(np.arange(10).reshape(-1, 1), 4, axis=1) es.set_event_patterns(evpat) D = np.repeat(np.arange(10).reshape(1, -1), 20, axis=0) ev = es.find_events(D, var=1)[0] # Check that events 1-4 and 2-3 are symmetric assert np.all(np.isclose(ev[:, :2], np.fliplr(np.flipud(ev[:, 2:])))),\ "Fit with constant data is not symmetric"
def test_simple_boundary(): es = EventSegment(2) random_state = np.random.RandomState(0) sample_data = np.array([[1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1]]) + \ random_state.rand(2, 7) * 10 es.fit(sample_data.T) events = np.argmax(es.segments_[0], axis=1) assert np.array_equal(events, [0, 0, 0, 1, 1, 1, 1]),\ "Failed to correctly segment two events" events_predict = es.predict(sample_data.T) assert np.array_equal(events_predict, [0, 0, 0, 1, 1, 1, 1]), \ "Error in predict interface"
def tj_fit(data, n_events=7): """Jointly fits HMM to multiple trials (repetitions) Parameters ---------- data : ndarray Data dimensions: Repetition x TR x Voxels n_events : int Number of events to fit Returns ------- list of ndarrays Resulting segmentations from model fit """ d = deepcopy(data) d = np.asarray(d) nan_idxs = np.where(np.isnan(d)) nan_idxs = list(set(nan_idxs[2])) d = np.delete(np.asarray(d), nan_idxs, axis=2) ev_obj = EventSegment(n_events).fit(list(d)) return ev_obj.segments_
def test_split_merge(): ev = np.array([ 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ]) random_state = np.random.RandomState(0) ev_pat = random_state.rand(5, 10) D = np.zeros((len(ev), 10)) for t in range(len(ev)): D[t, :] = ev_pat[ev[t], :] + 0.1 * random_state.rand(10) hmm_sm = EventSegment(5, split_merge=True, split_merge_proposals=2) hmm_sm.fit(D) hmm_events = np.argmax(hmm_sm.segments_[0], axis=1) assert np.all(ev == hmm_events),\ "Merge/split fails to find highly uneven events"
def compute_fits_hmm(data: np.ndarray, k: int, mindist: int, type='HMM', y=None, t1=None, ind1=None, zs=False): if type == 'HMM': hmm = HMM(k) elif type == 'HMMsplit': hmm = HMM(k, split_merge=True) if zs == True: data = zscore(data, axis=0, ddof=1) hmm.fit(data) if y is None: tdata = data else: if zs == True: y = zscore(y, axis=0, ddof=1) tdata = y _, LL_HMM = hmm.find_events(tdata) hmm_bounds = np.insert(np.diff(np.argmax(hmm.segments_[0], axis=1)), 0, 0).astype(int) if t1 is None and ind1 is None: ind = np.triu(np.ones(tdata.shape[0], bool), mindist) z = GSBS._zscore(tdata) t = np.cov(z)[ind] else: ind = ind1 t = t1 stateseq = deltas_states(deltas=hmm_bounds)[:, None] diff, same, alldiff = (lambda c: (c == 1, c == 0, c > 0))(cdist( stateseq, stateseq, "cityblock")[ind]) WAC_HMM = np.mean(t[same]) - np.mean(t[alldiff]) tdist_HMM = 0 if sum(same) < 2 else ttest_ind( t[same], t[diff], equal_var=False)[0] return LL_HMM, WAC_HMM, tdist_HMM, hmm_bounds, t, ind
def test_sym_ll(): ev = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2]) random_state = np.random.RandomState(0) ev_pat = random_state.rand(3, 10) D_forward = np.zeros((len(ev), 10)) for t in range(len(ev)): D_forward[t, :] = ev_pat[ev[t], :] + 0.1 * random_state.rand(10) D_backward = np.flip(D_forward, axis=0) hmm_forward = EventSegment(3) hmm_forward.set_event_patterns(ev_pat.T) _, ll_forward = hmm_forward.find_events(D_forward, var=1) hmm_backward = EventSegment(3) hmm_backward.set_event_patterns(np.flip(ev_pat.T, axis=1)) _, ll_backward = hmm_backward.find_events(D_backward, var=1) assert (ll_forward == ll_backward),\ "Log-likelihood not symmetric forward/backward"
def run_simulation_computation_time(self, nstates, rep): bounds, subData, _ = self.generate_simulated_data_HRF(rep=rep) res6 = dict() res6['duration_GSBS'] = np.zeros([nstates]) res6['duration_HMM_fixK'] = np.zeros([nstates]) res6['duration_HMMsm_fixK'] = np.zeros([nstates]) for i in range(2, nstates): print(rep, i) states = gsbs_extra.GSBS(x=subData[0, :, :], kmax=i) tic = timeit.default_timer() states.fit() res6['duration_GSBS'][i] = timeit.default_timer() - tic tic = timeit.default_timer() ev = HMM(i, split_merge=False) ev.fit(subData[0, :, :]) res6['duration_HMM_fixK'][i] = timeit.default_timer() - tic tic = timeit.default_timer() ev = HMM(i, split_merge=True) ev.fit(subData[0, :, :]) res6['duration_HMMsm_fixK'][i] = timeit.default_timer() - tic res6['duration_HMM_estK'] = np.cumsum(res6['duration_HMM_fixK']) res6['duration_HMMsm_estK'] = np.cumsum(res6['duration_HMMsm_fixK']) return res6
def test_weighted_var(): es = EventSegment(2) D = np.zeros((8, 4)) for t in range(4): D[t, :] = (1/np.sqrt(4/3)) * np.array([-1, -1, 1, 1]) for t in range(4, 8): D[t, :] = (1 / np.sqrt(4 / 3)) * np.array([1, 1, -1, -1]) mean_pat = D[[0, 4], :].T weights = np.zeros((8, 2)) weights[:, 0] = [1, 1, 1, 1, 0, 0, 0, 0] weights[:, 1] = [0, 0, 0, 0, 1, 1, 1, 1] assert np.array_equal( es.calc_weighted_event_var(D, weights, mean_pat), [0, 0]),\ "Failed to compute variance with 0/1 weights" weights[:, 0] = [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5] weights[:, 1] = [0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] true_var = (4 * 0.5 * 12)/(6 - 5/6) * np.ones(2) / 4 assert np.allclose( es.calc_weighted_event_var(D, weights, mean_pat), true_var),\ "Failed to compute variance with fractional weights"
def test_weighted_var(): es = EventSegment(2) D = np.zeros((8, 4)) for t in range(4): D[t, :] = (1 / np.sqrt(4 / 3)) * np.array([-1, -1, 1, 1]) for t in range(4, 8): D[t, :] = (1 / np.sqrt(4 / 3)) * np.array([1, 1, -1, -1]) mean_pat = D[[0, 4], :].T weights = np.zeros((8, 2)) weights[:, 0] = [1, 1, 1, 1, 0, 0, 0, 0] weights[:, 1] = [0, 0, 0, 0, 1, 1, 1, 1] assert np.array_equal( es.calc_weighted_event_var(D, weights, mean_pat), [0, 0]),\ "Failed to compute variance with 0/1 weights" weights[:, 0] = [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5] weights[:, 1] = [0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] true_var = (4 * 0.5 * 12) / (6 - 5 / 6) * np.ones(2) / 4 assert np.allclose( es.calc_weighted_event_var(D, weights, mean_pat), true_var),\ "Failed to compute variance with fractional weights"
def test_event_transfer(): es = EventSegment(2) sample_data = np.asarray([[1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1]]) with pytest.raises(NotFittedError, message="Should need to set variance"): seg = es.find_events(sample_data.T)[0] with pytest.raises(NotFittedError, message="Should need to set patterns"): seg = es.find_events(sample_data.T, np.asarray([1, 1]))[0] es.set_event_patterns(np.asarray([[1, 0], [0, 1]])) seg = es.find_events(sample_data.T, np.asarray([1, 1]))[0] events = np.argmax(seg, axis=1) assert np.array_equal(events, [0, 0, 0, 1, 1, 1, 1]),\ "Failed to correctly transfer two events to new data"
def test_chains(): es = EventSegment(5, event_chains=np.array(['A', 'A', 'B', 'B', 'B'])) sample_data = np.array([[0, 0, 0], [1, 1, 1]]) with pytest.raises(RuntimeError): seg = es.fit(sample_data.T)[0] pytest.fail("Can't use fit() with event chains") es.set_event_patterns(np.array([[1, 1, 0, 0, 0], [0, 0, 1, 1, 1]])) seg = es.find_events(sample_data.T, 0.1)[0] ev = np.nonzero(seg > 0.99)[1] assert np.array_equal(ev, [2, 3, 4]),\ "Failed to fit with multiple chains"
def heldout_ll(data, n_events, split): """Compute log-likelihood on heldout subjects Fits an event segmentation model with n_events to half of the subjects, then measures the log-likelihood of this model on the other half. The returned log-likehood averages across both choices of which half is used for training and which is used for testing. The boolean array split defines which subjects are in each half. Parameters ---------- data : ndarray subj x TR x Voxels data array n_events : int Number of events for event segmentation model split : ndarray Boolean vector, subj in one group are True and in the other are False Returns ------- float Average of log-likelihoods on testing groups """ d = deepcopy(data) # Remove nan voxels nan_idxs = np.where(np.isnan(d)) nan_idxs = list(set(nan_idxs[2])) d = np.delete(np.asarray(d), nan_idxs, axis=2) # Train and test event segmentation across groups group1 = d[split].mean(0) group2 = d[~split].mean(0) es = EventSegment(n_events).fit(group1) _, ll12 = es.find_events(group2) es = EventSegment(n_events).fit(group2) _, ll21 = es.find_events(group1) return (ll12 + ll21) / 2
#### 1.0 Warm-up: Event structure in activity patterns Before applying any model, a good first step is to plot the correlation between activity patterns for each pair of timepoints during the movie. In this dataset, this shows blocks along the diagonal, which indicates that activity patterns are remaining stable for periods of tens of timepoints. This is the kind of structure that the HMM and GSBS models will be looking for. plt.figure(figsize=(10,8)) plt.imshow(np.corrcoef(movie_group)) plt.xlabel('Timepoint') plt.ylabel('Timepoint') plt.colorbar() plt.title('Spatial pattern correlation'); #### 1.1 Fitting the HMM To use an HMM to find both the event timings and the patterns corresponding to each event, we can use the EventSegment class from the brainiak toolbox. We need to specify the number of events, which here we set to 29 (corresponding to the number of boundaries typically annotated by human subjects). movie_HMM = EventSegment(n_events = 29) movie_HMM.fit(movie_group); This fit produces: * The log-likelihood (measuring overall model fit) over training. (Note that the log-likelihood on held-out test data is often a better measure of model quality - see below). * The mean voxel pattern for each event. Here we show only 1% of the voxels since the ROI is large. * A matrix showing the probability of being in each event at each timepoint. We can use this to derive the most likely timepoints where boundaries occur, and plot these on top of the timepoint similarity matrix for comparison. # Plotting the log-likelihood (measuring overall model fit) plt.figure(figsize = (12, 4)) plt.plot(movie_HMM.ll_) plt.title('Log likelihood during training') plt.xlabel('Model fitting steps') # Plotting mean activity in each event for some example voxels plt.figure(figsize = (12, 4))
def test_create_event_segmentation(): es = EventSegment(5) assert es, "Invalid EventSegment instance"
def run_simulation_evlength(self, length_std, nstates_list, run_HMM, rep, TRfactor=1, finetune=1): res = dict() list2 = ['dists_GS', 'dists_HMM', 'dists_HMMsplit'] for key in list2: res[key] = np.zeros([ np.shape(length_std)[0], np.shape(nstates_list)[0], nstates_list[-1] ]) list = [ 'sim_GS', 'sim_HMM', 'sim_HMMsplit', 'simz_GS', 'simz_HMM', 'simz_HMMsplit' ] for key in list: res[key] = np.zeros( [np.shape(length_std)[0], np.shape(nstates_list)[0]]) res['statesreal'] = np.zeros( [np.shape(length_std)[0], np.shape(nstates_list)[0], self.ntime]) res['bounds'] = np.zeros( [np.shape(length_std)[0], np.shape(nstates_list)[0], self.ntime]) res['bounds_HMMsplit'] = np.zeros( [np.shape(length_std)[0], np.shape(nstates_list)[0], self.ntime]) for idxl, l in enumerate(length_std): for idxn, n in enumerate(nstates_list): print(rep, l) bounds, subData, _ = self.generate_simulated_data_HRF( length_std=l, nstates=n, TRfactor=TRfactor, rep=rep) res['statesreal'][idxl, idxn, :] = deltas_states(bounds) states = gsbs_extra.GSBS(kmax=n, x=subData[0, :, :], finetune=finetune) states.fit() res['sim_GS'][idxl, idxn], res['simz_GS'][ idxl, idxn], res['dists_GS'][idxl, idxn, 0:n] = fit_metrics_simulation( bounds, np.double( states.get_bounds(k=n) > 0)) res['bounds'][idxl, idxn, :] = states.bounds if run_HMM is True: ev = HMM(n, split_merge=False) ev.fit(subData[0, :, :]) hmm_bounds = np.insert( np.diff(np.argmax(ev.segments_[0], axis=1)), 0, 0).astype(int) ev = HMM(n, split_merge=True) ev.fit(subData[0, :, :]) hmm_bounds_split = np.insert( np.diff(np.argmax(ev.segments_[0], axis=1)), 0, 0).astype(int) res['sim_HMM'][idxl, idxn], res['simz_HMM'][ idxl, idxn], res['dists_HMM'][idxl, idxn, 0:n] = fit_metrics_simulation( bounds, hmm_bounds) res['sim_HMMsplit'][idxl, idxn], res['simz_HMMsplit'][ idxl, idxn], res['dists_HMMsplit'][ idxl, idxn, 0:n] = fit_metrics_simulation( bounds, hmm_bounds_split) res['bounds_HMMsplit'][idxl, idxn, :] = hmm_bounds_split return res
def test_fit_shapes(): K = 5 V = 3 T = 10 es = EventSegment(K, n_iter=2) sample_data = np.random.rand(V, T) es.fit(sample_data.T) assert es.segments_[0].shape == (T, K), "Segmentation from fit " \ "has incorrect shape" assert np.isclose(np.sum(es.segments_[0], axis=1), np.ones(T)).all(), \ "Segmentation from learn_events not correctly normalized" T2 = 15 sample_data2 = np.random.rand(V, T2) test_segments, test_ll = es.find_events(sample_data2.T) assert test_segments.shape == (T2, K), "Segmentation from find_events " \ "has incorrect shape" assert np.isclose(np.sum(test_segments, axis=1), np.ones(T2)).all(), \ "Segmentation from find_events not correctly normalized" es_invalid = EventSegment(K) with pytest.raises(ValueError, message="T < K should cause error"): es_invalid.model_prior(K - 1) with pytest.raises(ValueError, message="#Events < K should cause error"): es_invalid.set_event_patterns(np.zeros((V, K - 1)))
#other examples of HMM boundaries - this part of the script is very Brainiak tutorial-y V = D.shape[0] # number of voxels K = maxedges # number of possessions T = bold_roi[task_name].shape[0] # Time points bounds_subj_fixk=np.zeros((nS,T)) bounds_subj_smooth_fixk=np.zeros((nS,T)) print(V) for subj in range(nS): D = bold_roi[task_name][:,zzmat,subj].T zmat=D[:,1]!=0 D=D[zmat,:] # Find the events in this dataset if splitm: hmm_sim = EventSegment(K,split_merge=True) else: hmm_sim = EventSegment(K) hmm_sim.fit(D.T) pred_seg = hmm_sim.segments_[0] # extract the boundaries bs=np.where(np.diff(np.argmax(pred_seg, axis=1)))[0] bounds_subj_fixk[subj,bs] = 1 # mark the boundaries in the continuous space bounds_subj_smooth_fixk[subj,:]=running_mean(bounds_subj_fixk[subj,:],smoothf,smoothshift) if ipynby==1: if subj==0: # plot the data for sample subject f, ax = plt.subplots(1,1, figsize=(6, 2)) ax.imshow(D, interpolation='nearest', cmap='viridis', aspect='auto') ax.set_ylabel('Voxels') ax.set_xlabel('TRs')