def PerformIca(X,Y,num_components,random_state):
    result = {}
    algo = FastICA(random_state=random_state,max_iter=800)
    algo.fit(X)
    full_mixing_matrix = algo.mixing_
    full_unmixing_matrix = algo.components_
    _x = algo.transform(X)
    kt_value = np.abs(kt(_x))
    largest_kt_values_idx = np.argsort(kt_value)[::-1]
    result["ica_kt_all"] = kt_value

    for n in num_components:
        prefix = "ica_" + str(n) + "_"
        component_idx_to_select = largest_kt_values_idx[0:n]
        mixing_matrix = full_mixing_matrix.T[component_idx_to_select,:].T
        unmixing_matrix = full_unmixing_matrix[component_idx_to_select,:]
        algo.components_ = unmixing_matrix
        algo.mixing_ = mixing_matrix

        result[prefix+"mm"] = mixing_matrix
        result[prefix+"umm"] = unmixing_matrix

        _x = algo.transform(X)
        result[prefix+"data"] = _x
        X_recons = algo.inverse_transform(_x)
        result[prefix+"reconstruction_error"] = ComputeReconstructionSSE(X,X_recons)
        n_kt_value = kt_value[component_idx_to_select]
        avg_kt = n_kt_value.mean()
        #print("ICA num dim {0} : reconstruction error {1} avg kt {2}".format(str(n),str(result[prefix+"reconstruction_error"]),str(avg_kt)))
        #print(np.sort(n_kt_value))
    return result
Пример #2
0
	def rank_loss_kt(self,test_fn,test_rows = []):
		preds = self.preds(test_fn)
		
		taus = []
		for u in range(self.nu):
			u_mask = self.test_data[:,0] == u
			u_ratings = self.test_data[u_mask,:]
			u_preds = preds[u_mask]
			nrat = u_ratings.shape[0]
			if nrat == 1:
				print "Insufficient ratings per user"
				return 0
			elif nrat == 0:
				continue
			elif np.all(u_ratings[:,2] == u_ratings[0,2]):
				continue
			elif np.all(u_preds == u_preds[0]):
				taus.append(0)
			
			taus.append(kt(u_preds,u_ratings[:,2])[0])
			
		print(len(taus))
		tau = np.mean(np.array(taus))
		
		return tau
Пример #3
0
def get_kt(rsm1, rsm2):
    '''Gets Kendall tau-a measurements between two RDM matrices, first vectorizes matrices
    and then computes kt using scipy kendall-tau function'''
    #vecRDM1 = vectorize(RDM1)
    #vecRDM2 = vectorize(RDM2)
    vec_rsm1 = scipy.spatial.distance.squareform(rdm1)
    vec_rsm1 = scipy.spatial.distance.squareform(rdm2)
    k = kt(vec_rdm1, vec_rdm1).correlation
    return k
Пример #4
0
 def get_kendall_tau_measures(self, rankings, competitors):
     original_rank = range(1, params.number_of_competitors + 1)
     kendall_tau = []
     for query in rankings:
         ranked_list = rankings[query]
         rank_vector = self.transition_to_rank_vector(
             query, competitors[query], ranked_list)
         kendall, p_value = kt(original_rank, rank_vector)
         kendall_tau.append(kendall)
     mean = np.mean(kendall_tau)
     std = np.std(kendall_tau)
     return mean, std
Пример #5
0
def get_kt(rsm1,rsm2):
    '''Gets Kendall tau-a measurements between two RDM matrices, first vectorizes matrices
    and then computes kt using scipy kendall-tau function'''
    np.fill_diagonal(rsm1,0)
    np.fill_diagonal(rsm2,0)
    vec_rsm1 = vectorize(rsm1)
    #
    vec_rsm2 = vectorize(rsm2)

    #vec_rsm1 = scipy.spatial.distance.squareform(rsm1)
    #vec_rsm2 = scipy.spatial.distance.squareform(rsm2)
    k = kt(vec_rsm1, vec_rsm2).correlation
    return k
def PerformIca2(X,Y,num_components,random_state):
    result = {}
    for n in num_components:
        prefix = "ica_" + str(n) + "_"
        algo = FastICA(n_components=n,random_state=random_state)
        algo.fit(X)
        result[prefix+"algo"] = algo
        _x = algo.transform(X)
        X_recons = algo.inverse_transform(_x)
        result[prefix+"reconstruction_error"] = ComputeReconstructionSSE(X,X_recons)
        kt_value = np.abs(kt(_x))
        avg_kt = kt_value.mean()
        print("ICA num dim {0} : reconstruction error {1} avg kt {2}".format(str(n),str(result[prefix+"reconstruction_error"]),str(avg_kt)))
        print(np.sort(kt_value))
    return result
Пример #7
0
 def test_kt(self):
     self.assertAlmostEqual(
         kt(self.data, fisher=False), get_kurtosis(self.data),
         msg="Kurtosis not within bounds",
         delta=1e-6
     )
Пример #8
0
    def competition(self, cost_model):
        results = {}
        query_tagged = {}
        competitors = self.budget_creator.get_competitors_for_query(
            self.score_file, self.number_of_competitors)
        reference_of_indexes = cp.loads(cp.dumps(competitors, 1))
        document_feature_index = self.budget_creator.index_features_for_competitors(
            competitors, self.data_set_location, True)
        original_vectors = cp.loads(cp.dumps(document_feature_index, -1))
        model_weights_per_fold_index = self.budget_creator.get_chosen_model_weights_for_fold(
            self.chosen_models)
        x_axis = []
        y_axis = []
        changed_winner_averages = []
        average_distances = []
        original_reference = []
        average_winner_rank = []
        average_feature_number = []
        last_winner_original_rank = {}
        original_winner_final_rank = {}
        for iteration in range(0, self.num_of_iterations):

            print "iteration number ", iteration + 1
            sum_of_kendalltau = 0
            average_distance = self.budget_creator.create_budget_per_query(
                self.fraction, document_feature_index)
            cost_index, value_for_change = self.budget_creator.create_items_for_knapsack(
                competitors, document_feature_index,
                model_weights_per_fold_index, self.query_per_fold,
                original_vectors)
            print "getting features to change"
            features_to_change, avg_feature_num = self.get_features_to_change(
                competitors, cost_index, value_for_change,
                document_feature_index, original_vectors)
            print "got features to change"
            average_feature_number.append(avg_feature_num)
            print "updating competitors"
            document_feature_index = self.update_competitors(
                features_to_change,
                cp.loads(cp.dumps(document_feature_index, -1)),
                value_for_change)
            print "update complete"
            print "getting new rankings"
            competitors_new = self.get_new_rankings(
                reference_of_indexes, document_feature_index,
                model_weights_per_fold_index, self.query_per_fold)
            print "finished new rankings"
            number_of_time_winner_changed = 0
            denominator = 0
            sum_of_original_kt = 0
            sum_rank_of_winner = 0
            for query in competitors_new:
                old_rank = self.transition_to_rank_vector(
                    query, reference_of_indexes, competitors[query])
                new_rank = self.transition_to_rank_vector(
                    query, reference_of_indexes, competitors_new[query])
                orig_rank = self.transition_to_rank_vector(
                    query, reference_of_indexes, reference_of_indexes[query])
                if iteration + 1 == self.num_of_iterations:
                    if not last_winner_original_rank.get(
                            new_rank.index(1) + 1, False):
                        last_winner_original_rank[new_rank.index(1) + 1] = 0
                    last_winner_original_rank[new_rank.index(1) + 1] += 1
                    if not original_winner_final_rank.get(new_rank[0], False):
                        original_winner_final_rank[new_rank[0]] = 0
                    original_winner_final_rank[new_rank[0]] += 1
                kendall_tau, p_value = kt(old_rank, new_rank)

                if not math.isnan(kendall_tau):
                    sum_of_kendalltau += kendall_tau
                    denominator += 1
                    if old_rank.index(1) != new_rank.index(1):
                        number_of_time_winner_changed += 1
                    sum_rank_of_winner += new_rank[0]

                original_kt, p_val = kt(new_rank, orig_rank)
                if not math.isnan(original_kt):
                    sum_of_original_kt += original_kt
            print "number of times winner changed ", number_of_time_winner_changed

            average = sum_of_kendalltau / denominator
            average_distances.append(average_distance)
            changed_winner_averages.append(
                float(number_of_time_winner_changed) / denominator)
            average_winner_rank.append(float(sum_rank_of_winner) / denominator)
            x_axis.append(iteration + 1)
            y_axis.append(average)
            original_reference.append(float(sum_of_original_kt) / denominator)
            competitors = cp.loads(cp.dumps(competitors_new, -1))

        results["kendall"] = (x_axis, y_axis)
        results["cos"] = (x_axis, average_distances)
        results["winner"] = (x_axis, changed_winner_averages)
        results["orig"] = (x_axis, original_reference)
        results["win_rank"] = (x_axis, average_winner_rank)
        results["whoisthewinner"] = last_winner_original_rank
        results["originalwinnerrank"] = original_winner_final_rank
        results["avg_f"] = (x_axis, average_feature_number)
        meta_results = {}
        meta_results[self.budget_creator.model] = results

        return meta_results
Пример #9
0
x = []
y_exp = []
y_log = []

for i, perm in enumerate(itertools.permutations(sent)):

    if gt[i] == 1:
        x.append(1)
        score = get_total_prob(model, perm)
        y_log.append(score)
        y_exp.append(math.exp(score))

f = open('bnc_grammatical.txt').read().split('\n')
for ff in f:
    try:
        score = get_total_prob(model, ff.strip().lower().split())
        x.append(0)
        y_log.append(score)
        y_exp.append(math.exp(score))
    except:
        print('error occur!')
# for j, perm in enumerate(itertools.permutations(sent_color)):
# 
#     if x[j] is not None:
#         y_color.append(math.exp(get_total_prob(model, perm)))


print('LOG', pb(x, y_log), kt(x, y_log))
print('EXP', pb(x, y_exp), kt(x, y_exp))