def _perform_mcmc(self, sustainData, seq_init, f_init, n_iterations, seq_sigma, f_sigma): # Take MCMC samples of the uncertainty in the SuStaIn model parameters N = self.stage_zscore.shape[1] N_S = seq_init.shape[0] if isinstance(f_sigma, float): # FIXME: hack to enable multiplication f_sigma = np.array([f_sigma]) samples_sequence = np.zeros((N_S, N, n_iterations)) samples_f = np.zeros((N_S, n_iterations)) samples_likelihood = np.zeros((n_iterations, 1)) samples_sequence[:, :, 0] = seq_init # don't need to copy as we don't write to 0 index samples_f[:, 0] = f_init # Reduce frequency of tqdm update to 0.1% of total for larger iteration numbers tqdm_update_iters = int(n_iterations / 1000) if n_iterations > 100000 else None for i in tqdm(range(n_iterations), "MCMC Iteration", n_iterations, miniters=tqdm_update_iters): if i > 0: seq_order = self.global_rng.permutation( N_S ) # this function returns different random numbers to Matlab for s in seq_order: move_event_from = int(np.ceil( N * self.global_rng.random())) - 1 current_sequence = samples_sequence[s, :, i - 1] current_location = np.array([0] * N) current_location[current_sequence.astype(int)] = np.arange( N) selected_event = int(current_sequence[move_event_from]) this_stage_zscore = self.stage_zscore[0, selected_event] selected_biomarker = self.stage_biomarker_index[ 0, selected_event] possible_zscores_biomarker = self.stage_zscore[ self.stage_biomarker_index == selected_biomarker] # slightly different conditional check to matlab version to protect python from calling min,max on an empty array min_filter = possible_zscores_biomarker < this_stage_zscore max_filter = possible_zscores_biomarker > this_stage_zscore events = np.array(range(N)) if np.any(min_filter): min_zscore_bound = max( possible_zscores_biomarker[min_filter]) min_zscore_bound_event = events[( (self.stage_zscore[0] == min_zscore_bound ).astype(int) + (self.stage_biomarker_index[0] == selected_biomarker).astype(int)) == 2] move_event_to_lower_bound = current_location[ min_zscore_bound_event] + 1 else: move_event_to_lower_bound = 0 if np.any(max_filter): max_zscore_bound = min( possible_zscores_biomarker[max_filter]) max_zscore_bound_event = events[( (self.stage_zscore[0] == max_zscore_bound ).astype(int) + (self.stage_biomarker_index[0] == selected_biomarker).astype(int)) == 2] move_event_to_upper_bound = current_location[ max_zscore_bound_event] else: move_event_to_upper_bound = N # FIXME: hack because python won't produce an array in range (N,N), while matlab will produce an array (N)... urgh if move_event_to_lower_bound == move_event_to_upper_bound: possible_positions = np.array([0]) else: possible_positions = np.arange( move_event_to_lower_bound, move_event_to_upper_bound) distance = possible_positions - move_event_from if isinstance(seq_sigma, int): # FIXME: change to float this_seq_sigma = seq_sigma else: this_seq_sigma = seq_sigma[s, selected_event] # use own normal PDF because stats.norm is slow weight = AbstractSustain.calc_coeff( this_seq_sigma) * AbstractSustain.calc_exp( distance, 0., this_seq_sigma) weight /= np.sum(weight) index = self.global_rng.choice( range(len(possible_positions)), 1, replace=True, p=weight ) # FIXME: difficult to check this because random.choice is different to Matlab randsample move_event_to = possible_positions[index] current_sequence = np.delete(current_sequence, move_event_from, 0) new_sequence = np.concatenate([ current_sequence[np.arange(move_event_to)], [selected_event], current_sequence[np.arange(move_event_to, N - 1)] ]) samples_sequence[s, :, i] = new_sequence new_f = samples_f[:, i - 1] + f_sigma * self.global_rng.standard_normal( ) new_f = (np.fabs(new_f) / np.sum(np.fabs(new_f))) samples_f[:, i] = new_f S = samples_sequence[:, :, i] f = samples_f[:, i] likelihood_sample, _, _, _, _ = self._calculate_likelihood( sustainData, S, f) samples_likelihood[i] = likelihood_sample if i > 0: ratio = np.exp(samples_likelihood[i] - samples_likelihood[i - 1]) if ratio < self.global_rng.random(): samples_likelihood[i] = samples_likelihood[i - 1] samples_sequence[:, :, i] = samples_sequence[:, :, i - 1] samples_f[:, i] = samples_f[:, i - 1] perm_index = np.where(samples_likelihood == max(samples_likelihood)) perm_index = perm_index[0] ml_likelihood = max(samples_likelihood) ml_sequence = samples_sequence[:, :, perm_index] ml_f = samples_f[:, perm_index] return ml_sequence, ml_f, ml_likelihood, samples_sequence, samples_f, samples_likelihood
def _perform_mcmc(self, sustainData, seq_init, f_init, n_iterations, seq_sigma, f_sigma): # Take MCMC samples of the uncertainty in the SuStaIn model parameters M = sustainData.getNumSamples() N = sustainData.getNumStages() N_S = seq_init.shape[0] if isinstance(f_sigma, float): # FIXME: hack to enable multiplication f_sigma = np.array([f_sigma]) samples_sequence = np.zeros((N_S, N, n_iterations)) samples_f = np.zeros((N_S, n_iterations)) samples_likelihood = np.zeros((n_iterations, 1)) samples_sequence[:, :, 0] = seq_init # don't need to copy as we don't write to 0 index samples_f[:, 0] = f_init # Reduce frequency of tqdm update to 0.1% of total for larger iteration numbers tqdm_update_iters = int(n_iterations / 1000) if n_iterations > 100000 else None for i in tqdm(range(n_iterations), "MCMC Iteration", n_iterations, miniters=tqdm_update_iters): if i > 0: seq_order = self.global_rng.permutation(N_S) # this function returns different random numbers to Matlab # Abstract out seq_order loop move_event_from = np.ceil( N * self.global_rng.random(len(seq_order))).astype(int) - 1 current_sequence = samples_sequence[seq_order, :, i - 1] selected_event = current_sequence[ np.arange(current_sequence.shape[0]), move_event_from] possible_positions = np.arange(N) + np.zeros( (len(seq_order), 1)) distance = np.arange(N) + np.zeros( (len(seq_order), 1)) - move_event_from[:, np.newaxis] weight = AbstractSustain.calc_coeff( seq_sigma) * AbstractSustain.calc_exp( distance, 0., seq_sigma) weight = np.divide(weight, weight.sum(1)[:, None]) index = [ self.global_rng.choice(np.arange(len(row)), 1, replace=True, p=row)[0] for row in weight ] move_event_to = np.arange(N)[index] r = current_sequence.shape[0] # Don't need to copy, but doing it for clarity new_seq = current_sequence.copy() new_seq[np.arange(r), move_event_from] = new_seq[np.arange(r), move_event_to] new_seq[np.arange(r), move_event_to] = selected_event samples_sequence[seq_order, :, i] = new_seq new_f = samples_f[:, i - 1] + f_sigma * self.global_rng.standard_normal( ) # TEMP: MATLAB comparison #new_f = samples_f[:, i - 1] + f_sigma * stats.norm.ppf(np.random.rand(1,N_S)) new_f = (np.fabs(new_f) / np.sum(np.fabs(new_f))) samples_f[:, i] = new_f S = samples_sequence[:, :, i] #f = samples_f[:, i] #likelihood_sample, _, _, _, _ = self._calculate_likelihood(sustainData, S, f) p_perm_k = np.zeros((M, N + 1, N_S)) for s in range(N_S): p_perm_k[:, :, s] = self._calculate_likelihood_stage( sustainData, S[s, :]) #NOTE: added extra axes to get np.tile to work the same as Matlab's repmat in this 3D tiling f_val_mat = np.tile(samples_f[:, i, np.newaxis, np.newaxis], (1, N + 1, M)) f_val_mat = np.transpose(f_val_mat, (2, 1, 0)) total_prob_stage = np.sum(p_perm_k * f_val_mat, 2) total_prob_subj = np.sum(total_prob_stage, 1) likelihood_sample = np.sum(np.log(total_prob_subj + 1e-250)) samples_likelihood[i] = likelihood_sample if i > 0: ratio = np.exp(samples_likelihood[i] - samples_likelihood[i - 1]) if ratio < self.global_rng.random(): samples_likelihood[i] = samples_likelihood[i - 1] samples_sequence[:, :, i] = samples_sequence[:, :, i - 1] samples_f[:, i] = samples_f[:, i - 1] perm_index = np.where(samples_likelihood == np.max(samples_likelihood)) perm_index = perm_index[0][0] ml_likelihood = np.max(samples_likelihood) ml_sequence = samples_sequence[:, :, perm_index] ml_f = samples_f[:, perm_index] return ml_sequence, ml_f, ml_likelihood, samples_sequence, samples_f, samples_likelihood
def _perform_mcmc(self, sustainData, seq_init, f_init, n_iterations, seq_sigma, f_sigma): # Take MCMC samples of the uncertainty in the SuStaIn model parameters M = sustainData.getNumSamples() N = sustainData.getNumStages() N_S = seq_init.shape[0] if isinstance(f_sigma, float): # FIXME: hack to enable multiplication f_sigma = np.array([f_sigma]) samples_sequence = np.zeros((N_S, N, n_iterations)) samples_f = np.zeros((N_S, n_iterations)) samples_likelihood = np.zeros((n_iterations, 1)) samples_sequence[:, :, 0] = seq_init # don't need to copy as we don't write to 0 index samples_f[:, 0] = f_init for i in range(n_iterations): if i % (n_iterations / 10) == 0: print('Iteration', i, 'of', n_iterations, ',', int(float(i) / float(n_iterations) * 100.), '% complete') if i > 0: seq_order = MixtureSustain.randperm_local(N_S) #np.random.permutation(N_S) # this function returns different random numbers to Matlab for s in seq_order: move_event_from = int(np.ceil(N * np.random.rand())) - 1 current_sequence = samples_sequence[s, :, i - 1] current_location = np.array([0] * N) current_location[current_sequence.astype(int)] = np.arange(N) #select an event in the sequence to move selected_event = int(current_sequence[move_event_from]) possible_positions = np.arange(N) distance = possible_positions - move_event_from if isinstance(seq_sigma, int): # FIXME: change to float ##if ((seq_sigma.shape[0]==1) + (seq_sigma.shape[1]==1)) == 2: this_seq_sigma = seq_sigma else: this_seq_sigma = seq_sigma[s, selected_event] # use own normal PDF because stats.norm is slow weight = AbstractSustain.calc_coeff(this_seq_sigma) * AbstractSustain.calc_exp(distance, 0., this_seq_sigma) weight /= np.sum(weight) #TEMP: MATLAB comparison #index = 0 index = np.random.choice(range(len(possible_positions)), 1, replace=True, p=weight) # FIXME: difficult to check this because random.choice is different to Matlab randsample move_event_to = possible_positions[index] current_sequence = np.delete(current_sequence, move_event_from, 0) new_sequence = np.concatenate([current_sequence[np.arange(move_event_to)], [selected_event], current_sequence[np.arange(move_event_to, N - 1)]]) samples_sequence[s, :, i] = new_sequence new_f = samples_f[:, i - 1] + f_sigma * np.random.randn() # TEMP: MATLAB comparison #new_f = samples_f[:, i - 1] + f_sigma * stats.norm.ppf(np.random.rand(1,N_S)) new_f = (np.fabs(new_f) / np.sum(np.fabs(new_f))) samples_f[:, i] = new_f S = samples_sequence[:, :, i] #f = samples_f[:, i] #likelihood_sample, _, _, _, _ = self._calculate_likelihood(sustainData, S, f) p_perm_k = np.zeros((M, N+1, N_S)) for s in range(N_S): p_perm_k[:,:,s] = self._calculate_likelihood_stage(sustainData, S[s,:]) #NOTE: added extra axes to get np.tile to work the same as Matlab's repmat in this 3D tiling f_val_mat = np.tile(samples_f[:,i, np.newaxis, np.newaxis], (1, N+1, M)) f_val_mat = np.transpose(f_val_mat, (2, 1, 0)) total_prob_stage = np.sum(p_perm_k * f_val_mat, 2) total_prob_subj = np.sum(total_prob_stage, 1) likelihood_sample = sum(np.log(total_prob_subj + 1e-250)) samples_likelihood[i] = likelihood_sample if i > 0: ratio = np.exp(samples_likelihood[i] - samples_likelihood[i - 1]) if ratio < np.random.rand(): samples_likelihood[i] = samples_likelihood[i - 1] samples_sequence[:, :, i] = samples_sequence[:, :, i - 1] samples_f[:, i] = samples_f[:, i - 1] perm_index = np.where(samples_likelihood == max(samples_likelihood)) perm_index = perm_index[0][0] ml_likelihood = max(samples_likelihood) ml_sequence = samples_sequence[:, :, perm_index] ml_f = samples_f[:, perm_index] return ml_sequence, ml_f, ml_likelihood, samples_sequence, samples_f, samples_likelihood