def evaluate(self, train, val, test, dim, area): log.info('Learning Memory, NMF and hb NMF mfs on train only for mixing weights optimization') nmf_scores, hb_nmf_scores, mem_scores = self._train_mfs(['nmf', 'hbnmf', 'memory'], train, dim, area) log.info('Learning mix for MEM and NMF') mem_mult = normalize_mat_row(mem_scores) nmf_mult = normalize_mat_row(nmf_scores + 0.001) # Small flat prior to avoid 0. pis_mem_nmf = learn_mix_mult_on_individual(1.1, mem_mult, nmf_mult, val) log.info('Learning mix for MEM and hb NMF') hb_nmf_mult = normalize_mat_row(hb_nmf_scores + 0.001) # Small flat prior to avoid 0. pis_mem_hb_nmf = learn_mix_mult_on_individual(1.1, mem_mult, hb_nmf_mult, val) log.info('Learning Memory NMF and hier NMF mfs on train+val for evaluation') eval_train = train + val nmf_scores, hb_nmf_scores, mem_scores = self._train_mfs(['nmf', 'hbnmf', 'memory'], eval_train, dim, area) # The flat prior won't change the ranking so there's no need to add it here. log.info('Evaluating memory with NMF') mem_nmf_erank = self._compute_erank(test, mem_scores, nmf_scores, pis_mem_nmf) log.info('Evaluating memory with hb_NMF') mem_hb_nmf_erank = self._compute_erank(test, mem_scores, hb_nmf_scores, pis_mem_hb_nmf) results = {'mem_nmf': mem_nmf_erank, 'mem_hb_nmf': mem_hb_nmf_erank} self.pretty_print(results) return results
def evaluate(self, train, val, test, dim, area): ALPHA = [ 0, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 0.999, 1 ] mem_scores = self._train_mfs(['memory'], train, dim, area)[0] popularity_scores = self._train_mfs(['popularity'], train, dim, area)[0] mem_mult = normalize_mat_row(mem_scores) popularity_mult = normalize_mat_row(popularity_scores) log.info( 'Mem and popularity learnt from training data; searching alpha') results_val = dict() results_test = dict() for alpha in ALPHA: log.info('Ranking when alpha is %.2f' % alpha) scores = alpha * mem_mult + (1 - alpha) * popularity_mult erank_val = self._compute_logp(val, scores) erank_test = self._compute_logp(test, scores) results_val['%.2f' % alpha] = erank_val results_test['%.2f' % alpha] = erank_test log.info('Log likelihood on validation data') self.pretty_print(results_val) log.info('Log likelihood on test data') self.pretty_print(results_test) eval_train = train + val mem_scores = self._train_mfs(['memory'], eval_train, dim, area)[0] popularity_scores = self._train_mfs(['popularity'], eval_train, dim, area)[0] mem_mult = normalize_mat_row(mem_scores) popularity_mult = normalize_mat_row(popularity_scores) log.info( 'Mem and popularity learnt from training and val data; searching alpha' ) results_val = dict() results_test = dict() for alpha in ALPHA: log.info('Ranking when alpha is %.2f' % alpha) scores = alpha * mem_mult + (1 - alpha) * popularity_mult erank_val = self._compute_logp(val, scores) erank_test = self._compute_logp(test, scores) results_val['%.2f' % alpha] = erank_val results_test['%.2f' % alpha] = erank_test log.info('Log likelihood on validation data') self.pretty_print(results_val) log.info('Log likelihood on test data') self.pretty_print(results_test)
def evaluate(self, train, val, test, dim, area): mem_scores = self._train_mfs(['memory'],train, dim, area)[0] popularity_scores = self._train_mfs(['popularity'],train,dim,area)[0] mem_mult = normalize_mat_row(mem_scores) popularity_mult = normalize_mat_row(popularity_scores+0.001) pi_mem_pop = learn_mix_mult_on_individual(1.1, mem_mult, popularity_mult, val) # The flat prior won't change the ranking so there's no need to add it here. log.info('Evaluating memory with popularity') mem_pop_erank = self._compute_erank(test, mem_mult, popularity_mult, pi_mem_pop) results = {'MEMORY+POPULARITY': mem_pop_erank} self.pretty_print(results) return results
def evaluate(self, train, val, test, dim, area): ALPHA = [0,0.001,0.01,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99,0.999,1] mem_scores = self._train_mfs(['memory'],train, dim, area)[0] popularity_scores = self._train_mfs(['popularity'],train,dim,area)[0] mem_mult = normalize_mat_row(mem_scores) popularity_mult = normalize_mat_row(popularity_scores) log.info('Mem and popularity learnt from training data; searching alpha') results_val = dict() results_test = dict() for alpha in ALPHA: log.info('Ranking when alpha is %.2f' % alpha) scores = alpha * mem_mult + (1-alpha)*popularity_mult erank_val = self._compute_erank(val, scores) erank_test = self._compute_erank(test, scores) results_val['%.2f' % alpha] = erank_val results_test['%.2f' % alpha] = erank_test log.info('Erank on validation data') self.pretty_print(results_val) log.info('Erank on test data') self.pretty_print(results_test) eval_train = train + val mem_scores = self._train_mfs(['memory'],eval_train, dim, area)[0] popularity_scores = self._train_mfs(['popularity'],eval_train,dim,area)[0] mem_mult = normalize_mat_row(mem_scores) popularity_mult = normalize_mat_row(popularity_scores) log.info('Mem and popularity learnt from training and val data; searching alpha') results_val = dict() results_test = dict() for alpha in ALPHA: log.info('Ranking when alpha is %.2f' % alpha) scores = alpha * mem_mult + (1-alpha)*popularity_mult erank_val = self._compute_erank(val, scores) erank_test = self._compute_erank(test, scores) results_val['%.2f' % alpha] = erank_val results_test['%.2f' % alpha] = erank_test log.info('Erank on validation data') self.pretty_print(results_val) log.info('Erank on test data') self.pretty_print(results_test)
def evaluate(self, train, val, test, dim, area): mem_scores = self._train_mfs(['memory'],train, dim, area)[0] popularity_scores = self._train_mfs(['popularity'],train,dim,area)[0] mem_mult = normalize_mat_row(mem_scores) popularity_mult = normalize_mat_row(popularity_scores+0.001) pi_mem_pop = learn_mix_mult_global(1.1, mem_mult, popularity_mult, val) log.info('Global mixing weight is %f and %f' % (pi_mem_pop[0],pi_mem_pop[1])) print sum((pi_mem_pop).astype(float)) # The flat prior won't change the ranking so there's no need to add it here. log.info('Evaluating memory with popularity') mem_pop_erank = self._compute_erank(test, mem_mult, popularity_mult, pi_mem_pop) results = {'MEMORY+POPULARITY': mem_pop_erank} self.pretty_print(results) return results
def logP(score_mat, test): logp_p = np.zeros(int(test.sum())) logp_indiv = np.zeros(test.shape[0]) test_data = coo_matrix(test) temp = score_mat / np.sum(score_mat) idx = 0 for i, j, v in zip(test_data.row, test_data.col, test_data.data): logp_p[int(idx):int(idx + v)] = np.log(temp[i, j]) idx += v temp = normalize_mat_row(score_mat) for i, j, v in zip(test_data.row, test_data.col, test_data.data): logp_indiv[i] += v * np.log(temp[i, j]) n_train = np.array([int(test.sum(axis=1)[i][0]) for i in range(I)]) logp_indiv /= n_train return logp_p, logp_indiv
def _learn_mix_mult(alpha, mem_mult, mf_mult, val_data, num_em_iter=100, tol=0.00001): """ Learning the mixing weights for mixture of two multinomials. Each observation is considered as a data point and the mixing weights (\pi) are learned using all the points. NOTE: In order for the algorithm to work, there can be no location that can get 0 probability by both the mem_mult and the mf_mult. In my runs, I use MPE to estimate the mf_mult while using MLE for the mum_mul. That way the mf_mult has no 0 values. INPUT: ------- 1. alpha: <float / (2, ) ndarray> Dirichlet prior for the pi learning. If <float> is given it is treated as a flat prior. Has to be bigger than 1. 2. mem_mult: <(I, L) ndarray> each row is the multinomial parameter according to the "self" data 3. mf_mult: <(I, L) ndarray> each row is the multinomial parameter according to the matrix factorization 4. val_data: <(N, 3) ndarray> each row is [ind_id, loc_id, counts] 5. num_em_iter: <int> number of em iterations 6. tol: <float> convergence threshold OUTPUT: -------- 1. pi: <(2, ) ndarray> mixing weights. RAISE: ------- 1. ValueError: a. alphas are not bigger than 1 b. the multinomial's rows don't sum to 1 c. There is a location with both mults 0 (see NOTE) """ if np.any(alpha <= 1): raise ValueError('alpha values have to be bigger than 1') if np.any(np.abs(np.sum(mem_mult, axis=1) - 1) > 0.001): raise ValueError('mem_mult param is not a multinomial -- all rows must sum to 1') if np.any(np.abs(np.sum(mf_mult, axis=1) - 1) > 0.001): raise ValueError('mf_mult param is not a multinomial -- all rows must sum to 1') if type(alpha) == float or type(alpha) == int: alpha = np.array([alpha, alpha]) # Creating responsibility matrix and initializing it hard assignment on random log_like_tracker = [-np.inf] pi = np.array([0.5, 0.5]) start = time.time() for em_iter in range(1, num_em_iter + 1): # Evey 5 iteration we will compute the posterior log probability to see if we converged. if em_iter % 5 == 0: data_log_like = pi[0] * mem_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)] + \ pi[1] * mf_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)] # The data likelihood was computed for each location, but it should be in the power of the number # of observations there, or a product in the log space. data_likelihood = np.log(data_log_like) * val_data[:, 2] prior_probability = dirch.logpdf(pi, alpha=alpha) log_likelihood = np.mean(data_likelihood + prior_probability) if np.abs(log_likelihood - log_like_tracker[-1]) < tol: break log_like_tracker.append(log_likelihood) # E-Step resp = [pi[0] * mem_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)], pi[1] * mf_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)]] if np.all(resp == 0): raise ValueError('0 mix probability') resp = np.array(resp).T resp = normalize_mat_row(resp) # M-Step. Only on the \pi with Dirichlet prior alpha > 1 pi = np.sum(resp * col_vector(val_data[:, 2]), axis=0) pi += alpha - 1 pi /= np.sum(pi) total_time = time.time() - start log.debug('Finished EM. Total time = %d secs -- %.3f per iteration' % (total_time, total_time / em_iter)) return pi
def evaluate(self, train, val, test, dim, area): def logP(score_mat, test): logp_p = np.zeros(int(test.sum())) logp_indiv = np.zeros(test.shape[0]) test_data = coo_matrix(test) temp = score_mat / np.sum(score_mat) idx = 0 for i, j, v in zip(test_data.row, test_data.col, test_data.data): logp_p[int(idx):int(idx + v)] = np.log(temp[i, j]) idx += v temp = normalize_mat_row(score_mat) for i, j, v in zip(test_data.row, test_data.col, test_data.data): logp_indiv[i] += v * np.log(temp[i, j]) n_train = np.array([int(test.sum(axis=1)[i][0]) for i in range(I)]) logp_indiv /= n_train return logp_p, logp_indiv ALPHA = np.arange(0.1, 1.1, 0.1) mem_scores = self._train_mfs(['memory'], train, dim, area)[0] popularity_scores = self._train_mfs(['popularity'], train, dim, area)[0] + 0.0001 mem_mult = normalize_mat_row(mem_scores) popularity_mult = normalize_mat_row(popularity_scores) N = int(np.sum(mem_scores)) I, L = train.shape n_train = np.array([int(train.sum(axis=1)[i][0]) for i in range(I)]) results = dict() headers = [ 'EM global', 'EM indiv', 'S_mem', 'Dirichlet', 'Translation_JM', 'Translation_Dirichlet' ] logP_p = DataFrame(np.zeros((int(test.sum()), 6)), columns=headers) logP_indiv = DataFrame(np.zeros((I, 6)), columns=headers) mix_alpha = DataFrame(np.zeros((I, 6)), columns=headers) log.info('#####learning statistical translation model#######') log.info('computing sparse mutual information') binary = (train > 0) * 1 #I*L count_1d = binary.sum(axis=0) #1*L count_2d = np.dot(binary.T, binary) #L*L P_1d = count_1d / I # exists zeros P_2d = count_2d / I temp = P_2d / np.outer(P_1d, P_1d) temp[~np.isfinite(temp)] = 1 # zero / zero = zero temp[temp == 0] = 1 # avoid log_zero PPMI = np.log2(temp) PPMI[PPMI < 0] = 0 k = 50 idx = np.array([[ j for j in np.asarray(PPMI[i].argsort().T).reshape(-1)[-k:][::-1] if PPMI[i, j] > 0 ] for i in range(L)]) for u in range(L): if u not in idx[u]: idx[u].append(u) binary = (np.array(train.toarray()) > 0) * 1 #I*L MI = np.zeros((L, L)) from sklearn import metrics for u in range(L): for w in idx[u]: if MI[u, w] == 0: MI[u, w] = metrics.mutual_info_score( None, None, contingency=np.histogram2d(binary[:, u], binary[:, w])[0]) MI[w, u] = MI[u, w] MI = normalize_mat_row(MI) MI[~np.isfinite(MI)] = 1 / L ##########and self transition probability######## log.info( 'gridsearching on validation set (can be optimized) with JM smoothing' ) val_result = dict() for alpha in [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: for mu in [0, 0.1, 0.2, 0.3, 0.4, 0.5]: trans = MI * (1 - alpha) + np.identity(L) * alpha pref = np.dot( mem_mult, trans) # consider each trans[i] as a base vector temp = pref * mu + popularity_mult * (1 - mu) val_result[(alpha, mu)] = self._compute_logp_point(val, temp) #####choose alpha and mu that achieves best avg. point logP alpha, mu = max(val_result, key=val_result.get) trans = MI * (1 - alpha) + np.identity(L) * alpha pref = np.dot(mem_mult, trans) stm_scores = pref * mu + popularity_mult * (1 - mu) log.info('Evaluating MI based translation model with JM smoothing') stm_result = self._compute_erank_logp(test, stm_scores) results['Translation_JM'] = stm_result log.info("self transition weight and popularity weight: %f, %f" % (alpha, 1 - mu)) #####record results and mixture parameters######## logP_p['Translation_JM'], logP_indiv['Translation_JM'] = logP( stm_scores, test) mix_alpha['Translation_JM'] = np.zeros(I) + mu * alpha ##########and self transition probability######## log.info( 'gridsearching on validation set (can be optimized) with Dirichlet prior' ) val_result = dict() for alpha in [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: for mu in [0, 0.1, 0.2, 0.3, 0.4, 0.5]: trans = MI * (1 - alpha) + np.identity(L) * alpha pref = np.dot( mem_scores, trans) # consider each trans[i] as a base vector temp = pref + popularity_mult * mu * N / I val_result[(alpha, mu)] = self._compute_logp_point(val, temp) #####choose alpha and mu that achieves best avg. point logP alpha, mu = max(val_result, key=val_result.get) trans = MI * (1 - alpha) + np.identity(L) * alpha pref = np.dot(mem_scores, trans) stm_scores = pref + popularity_mult * mu * N / I log.info('Evaluating MI based translation model with Dirichlet prior') stm_result = self._compute_erank_logp(test, stm_scores) results['Translation_Dirichlet'] = stm_result log.info("self transition weight and prior strength: %f, %f" % (alpha, mu * N / I)) #####record results and mixture parameters######## logP_p['Translation_Dirichlet'], logP_indiv[ 'Translation_Dirichlet'] = logP(stm_scores, test) mix_alpha['Translation_Dirichlet'] = n_train * alpha / (n_train + mu * N / I) log.info('#############learning EM global#################') pi_mem_pop = learn_mix_mult_global(1.1, mem_mult, popularity_mult, val) log.info('Global mixing weight is %f and %f' % (pi_mem_pop[0], pi_mem_pop[1])) log.info('Evaluating EM global') em_global_scores = pi_mem_pop[0] * mem_mult + pi_mem_pop[ 1] * popularity_mult EM_global_result = self._compute_erank_logp(test, em_global_scores) results['EM global'] = EM_global_result logP_p['EM global'], logP_indiv['EM global'] = logP( em_global_scores, test) mix_alpha['EM global'] = pi_mem_pop[0] + np.zeros(I) log.info('#############learning EM individual##############') pi_mem_pop = learn_mix_mult_on_individual(1.1, mem_mult, popularity_mult, val) log.info('Evaluating EM indiv') em_indiv_scores = col_vector(pi_mem_pop[:, 0]) * mem_mult + col_vector( pi_mem_pop[:, 1]) * popularity_mult EM_indiv_result = self._compute_erank_logp(test, mem_mult, popularity_mult, pi_mem_pop) results['EM indiv'] = EM_indiv_result logP_p['EM indiv'], logP_indiv['EM indiv'] = logP( em_indiv_scores, test) mix_alpha['EM indiv'] = pi_mem_pop[:, 0] log.info('#############learning S_memory###################') log.info('gridsearching on validation set') val_result = dict() for alpha in ALPHA: temp = mem_scores * alpha + popularity_scores * (1 - alpha) val_result[alpha] = self._compute_logp_point(val, temp) #####choose alpha that achieves best avg. point logP alpha = max(val_result, key=val_result.get) print('alpha:', alpha) s_mem_scores = mem_scores * alpha + popularity_scores * (1 - alpha) log.info('Evaluating smoothed memory') s_mem_result = self._compute_erank_logp(test, s_mem_scores) results['S_Mem'] = s_mem_result n_train = np.array([int(train.sum(axis=1)[i][0]) for i in range(I)]) temp = n_train.mean() logP_p['S_mem'], logP_indiv['S_mem'] = logP(s_mem_scores, test) mix_alpha['S_mem'] = alpha * n_train / (alpha * n_train + (1 - alpha) * temp) log.info('############learning with Dirichlet prior#############') log.info('gridsearching on validation set') val_result = dict() for alpha in ALPHA: temp = mem_scores + popularity_mult * alpha * N / I val_result[alpha] = self._compute_logp_point(val, temp) #####choose alpha that achieves best avg. point logP alpha = max(val_result, key=val_result.get) print('alpha:', alpha) dirichlet_scores = mem_scores + popularity_mult * alpha * N / I log.info('Evaluating with Dirichlet prior') dirichlet_result = self._compute_erank_logp(test, dirichlet_scores) results['Dirichlet'] = dirichlet_result logP_p['Dirichlet'], logP_indiv['Dirichlet'] = logP( dirichlet_scores, test) mix_alpha['Dirichlet'] = n_train / (n_train + alpha * N / I) self.pretty_print(results) return logP_p, logP_indiv, mix_alpha