def true_log_liklihood_per_example(example, var_in_clique, true_parameters,
                                   cardinalities):
    log_liklihood = 0
    for each_clique in range(len(var_in_clique)):
        var_in_this_clique = list(var_in_clique[each_clique])
        cardinalities_of_var_in_this_clique = cardinalities[var_in_this_clique]
        values_of_tuple = example[var_in_this_clique]
        if len(var_in_this_clique) > 1:
            index = helper.get_index_given_truth_values(
                var_in_this_clique, values_of_tuple,
                cardinalities_of_var_in_this_clique)
            values_of_conjugate_tuple = values_of_tuple
            if true_parameters[each_clique][index] == 0:
                true_parameters[each_clique][index] += pow(10, -5)
                if values_of_conjugate_tuple[-1] == 0:
                    values_of_conjugate_tuple[-1] = 1
                elif values_of_conjugate_tuple[-1] == 1:
                    values_of_conjugate_tuple[-1] = 0
                index_for_other_child = helper.get_index_given_truth_values(
                    var_in_this_clique, values_of_conjugate_tuple,
                    cardinalities_of_var_in_this_clique)
                true_parameters[each_clique][index_for_other_child] -= pow(
                    10, -5)
            elif true_parameters[each_clique][index] == 1:
                true_parameters[each_clique][index] -= pow(10, -5)
                if values_of_conjugate_tuple[-1] == 0:
                    values_of_conjugate_tuple[-1] = 1
                elif values_of_conjugate_tuple[-1] == 1:
                    values_of_conjugate_tuple[-1] = 0
                index_for_other_child = helper.get_index_given_truth_values(
                    var_in_this_clique, values_of_conjugate_tuple,
                    cardinalities_of_var_in_this_clique)
                true_parameters[each_clique][index_for_other_child] += pow(
                    10, -5)

            log_liklihood += log10(true_parameters[each_clique][index])
        else:
            if true_parameters[each_clique][0] == 0:
                true_parameters[each_clique][0] += pow(10, -5)
                true_parameters[each_clique][1] -= pow(10, -5)
            elif true_parameters[each_clique][0] == 1:
                true_parameters[each_clique][0] -= pow(10, -5)
                true_parameters[each_clique][1] += pow(10, -5)
            log_liklihood += log10(
                true_parameters[each_clique][values_of_tuple[0]])
    return log_liklihood
def get_parameters_given_weighted_example(examples, weights, var_in_clique, cardinalities_of_var, number_of_k):
	parameters_for_network = [[] for i in range(number_of_k)]
	parameters_for_k = []
	# todo change the use of var_in_clique for each val of k
	cardinalities_of_var = array(cardinalities_of_var)
	examples = array(examples)
	var_in_clique_np = array(var_in_clique)
	for each_val_of_k in range(number_of_k):
		for each_clique in range(len(var_in_clique[each_val_of_k])):
			var_in_this_clique = var_in_clique_np[each_val_of_k][each_clique]
			vals_for_this_clique = examples[:, var_in_this_clique]
			val_of_k = examples[:, -1]
			cardinalities_of_var_in_this_clique = cardinalities_of_var[var_in_this_clique]
			total_tuples = product(cardinalities_of_var_in_this_clique)
			if len(var_in_this_clique) > 1:
				number_of_parameters = int(total_tuples / cardinalities_of_var_in_this_clique[-1])
				nums = zeros([number_of_parameters, ])
				denom = zeros([number_of_parameters, ])
				for each_vals_for_this_clique, weights_for_this_example, value_of_k_for_this_example in zip(
						vals_for_this_clique, weights, val_of_k):
					if value_of_k_for_this_example == each_val_of_k:
						# indexing of parameters_for_network is done on the basis of parent variables only
						index_val_without_child = get_index_given_truth_values(var_in_this_clique[:-1],
						                                                       each_vals_for_this_clique[:-1],
						                                                       cardinalities_of_var_in_this_clique[
						                                                       :-1])
						denom[index_val_without_child] += weights_for_this_example
						if each_vals_for_this_clique[-1] == 0:
							nums[index_val_without_child] += weights_for_this_example
				parameter_val = divide(nums, denom)
				parameter_val = nan_to_num(parameter_val)
				parameter_val[parameter_val == 0] = pow(10, -5)
				parameter_val[parameter_val == 1] = 1 - pow(10, -5)
				parameters_for_network[each_val_of_k].append(parameter_val)
			else:
				count_of_zero = 0
				total_count = 0
				for each_vals_for_this_clique, weights_for_this_example, value_of_k_for_this_example in zip(
						vals_for_this_clique, weights, val_of_k):
					if value_of_k_for_this_example == each_val_of_k:
						total_count += weights_for_this_example
						if each_vals_for_this_clique[0] == 0:
							count_of_zero += weights_for_this_example
				parameter = nan_to_num(divide(count_of_zero, total_count))
				if parameter == 0:
					parameter = pow(10, -5)
				elif parameter == 1:
					parameter = 1 - pow(10, -5)
				parameters_for_network[each_val_of_k].append(parameter)
	# parameters_for_network = array(parameters_for_network)
	nums_for_k = zeros([number_of_k, ])
	denom_for_k = 0
	for each_val_of_k, weight_for_k in zip(examples[:, -1], weights):
		nums_for_k[each_val_of_k] += weight_for_k
		denom_for_k += weight_for_k
	parameters_for_k = divide(nums_for_k, denom_for_k)
	return parameters_for_network, parameters_for_k
def train(examples, var_in_clique, markov, cardinalities_of_var):
    """

	:param examples:
	:param var_in_clique:
	:param markov:
	:param cardinalities_of_var:
	:return: parameters in double form
	"""
    parameters = []
    cardinalities_of_var = array(cardinalities_of_var)
    if not markov:
        var_in_clique_np = array(var_in_clique)
        for each_clique in range(len(var_in_clique)):
            var_in_this_clique = var_in_clique_np[each_clique]
            vals_for_this_clique = examples[:, var_in_this_clique]
            cardinalities_of_var_in_this_clique = cardinalities_of_var[
                var_in_this_clique]
            total_tuples = product(cardinalities_of_var_in_this_clique)
            unique_vals, counts = unique(vals_for_this_clique,
                                         return_counts=True,
                                         axis=0)
            if len(var_in_this_clique) > 1:
                # indexing of parameters is done on the basis of parent variables only
                number_of_parameters = int(
                    total_tuples / cardinalities_of_var_in_this_clique[-1])
                # Using 1-Laplace Smoothing
                nums = zeros([
                    number_of_parameters,
                ]) + 1
                denom = zeros([
                    number_of_parameters,
                ]) + cardinalities_of_var_in_this_clique[-1]
                # todo need to use parent only for parameters
                for each_tuple in zip(unique_vals, counts):
                    index_val_without_child = helper.get_index_given_truth_values(
                        var_in_this_clique[:-1], each_tuple[0][:-1],
                        cardinalities_of_var_in_this_clique[:-1])
                    denom[index_val_without_child] += each_tuple[1]
                    temp = each_tuple[0][-1]
                    if each_tuple[0][-1] == 0:
                        nums[index_val_without_child] += each_tuple[1]
                parameter_val = divide(nums, denom)
                parameter_val = nan_to_num(parameter_val)
                parameters.append(list(parameter_val))
            else:
                count_of_zero = 0
                total_count = 0
                for each_tuple in zip(unique_vals, counts):
                    each_tuple = array(each_tuple)
                    vals_for_this_clique = int(each_tuple[0])
                    total_count += each_tuple[1]
                    if vals_for_this_clique == 0:
                        count_of_zero += each_tuple[1]
                parameters.append(
                    nan_to_num(divide(count_of_zero, total_count)))
    return parameters
def complete_data(examples, cardinalities_of_var, parameters_for_network, parameters_for_k, var_in_clique):
	final_examples = []
	final_weights = []
	for each_example in examples:
		if -1 not in each_example:
			weight = 1
			final_examples.append(each_example)
			final_weights.append(weight)
		elif -1 in each_example:
			weights_for_completed_examples = []
			completed_examples = []
			cardinalities_of_var = array(cardinalities_of_var)
			var_not_present = len(cardinalities_of_var) - 1
			cardinalities_of_var_not_present = cardinalities_of_var[var_not_present]
			completions_for_var_not_present = list(range(cardinalities_of_var_not_present))
			for each_tuple_missing_var in completions_for_var_not_present:
				temp_example = deepcopy(each_example)
				put(temp_example, var_not_present, each_tuple_missing_var)
				weight_for_current_example = 1
				value_of_k = temp_example[-1]
				# for each tuple we have diff network thus weight will be different
				for each_clique in range(len(var_in_clique[each_tuple_missing_var])):
					if True:
						values_for_current_clique = temp_example[var_in_clique[each_tuple_missing_var][each_clique]]
						if len(var_in_clique[each_tuple_missing_var][each_clique]) > 1:
							index_for_current_example = get_index_given_truth_values(
									var_in_clique[each_tuple_missing_var][each_clique][:-1],
									values_for_current_clique[:-1],
									cardinalities_of_var[
										var_in_clique[each_tuple_missing_var][each_clique][
										:-1]])
							if values_for_current_clique[-1] == 0:
								weight_for_current_example *= parameters_for_network[value_of_k][each_clique][
									index_for_current_example]
							elif values_for_current_clique[-1] == 1:
								weight_for_current_example *= (1 - parameters_for_network[value_of_k][each_clique][
									index_for_current_example])
							else:
								print("Error")
						elif len(var_in_clique[each_tuple_missing_var][each_clique]) == 1:
							if values_for_current_clique[0] == 0:
								weight_for_current_example *= parameters_for_network[value_of_k][each_clique]
							elif values_for_current_clique[0] == 1:
								weight_for_current_example *= (1 - parameters_for_network[value_of_k][each_clique])
							else:
								print("Error")
				weight_for_current_example *= parameters_for_k[value_of_k]
				weights_for_completed_examples.append(weight_for_current_example)
				completed_examples.append(temp_example)
			denom = sum(weights_for_completed_examples)
			weights_for_completed_examples = divide(weights_for_completed_examples, denom)
			for weight, example in zip(weights_for_completed_examples, completed_examples):
				final_examples.append(example)
				final_weights.append(float(weight))
	return final_examples, final_weights
def find_log_liklihood_for_one_example(example, var_in_clique, parameters,
                                       cardinalities):
    log_liklihood = 0
    cardinalities = array(cardinalities)
    var_in_clique = array(var_in_clique)
    for each_clique in range(len(var_in_clique)):
        var_in_this_clique = list(var_in_clique[each_clique])
        cardinalities_of_var_in_this_clique = cardinalities[var_in_this_clique]
        values_of_tuple = example[var_in_this_clique]
        if len(var_in_this_clique) > 1:
            parents = values_of_tuple[:-1]
            parents_var_in_this_clique = var_in_this_clique[:-1]
            cardinalities_of_parents_in_this_clique = cardinalities_of_var_in_this_clique[:
                                                                                          -1]
            child = values_of_tuple[-1]
            index_for_parents = helper.get_index_given_truth_values(
                parents_var_in_this_clique, parents,
                cardinalities_of_parents_in_this_clique)
            if parameters[each_clique][index_for_parents] == 0:
                parameters[each_clique][index_for_parents] += pow(10, -5)
            elif parameters[each_clique][index_for_parents] == 1:
                parameters[each_clique][index_for_parents] -= pow(10, -5)
            if child == 0:
                log_liklihood += log10(
                    parameters[each_clique][index_for_parents])
            elif child == 1:
                log_liklihood += log10(
                    1 - parameters[each_clique][index_for_parents])
            else:
                print("Error line 37")
        elif len(var_in_this_clique) == 1:
            if parameters[each_clique] == 0:
                parameters[each_clique] += pow(10, -5)
            elif parameters[each_clique] == 1:
                parameters[each_clique] -= pow(10, -5)
            if values_of_tuple[0] == 0:
                log_liklihood += log10(parameters[each_clique])
            elif values_of_tuple[0] == 1:
                log_liklihood += log10(1 - parameters[each_clique])
            else:
                print("Error line 48")
        else:
            print("Error line ")
    return log_liklihood
def get_parameters_given_weighted_example(examples, weights, var_in_clique, cardinalities_of_var):
	parameters = []
	cardinalities_of_var = array(cardinalities_of_var)
	examples = array(examples)
	var_in_clique_np = array(var_in_clique)
	for each_clique in range(len(var_in_clique)):
		var_in_this_clique = var_in_clique_np[each_clique]
		vals_for_this_clique = examples[:, var_in_this_clique]
		cardinalities_of_var_in_this_clique = cardinalities_of_var[var_in_this_clique]
		total_tuples = product(cardinalities_of_var_in_this_clique)
		if len(var_in_this_clique) > 1:
			number_of_parameters = int(total_tuples / cardinalities_of_var_in_this_clique[-1])
			nums = zeros([number_of_parameters, ])
			denom = zeros([number_of_parameters, ])
			for each_vals_for_this_clique, weights_for_this_example in zip(vals_for_this_clique, weights):
				# indexing of parameters is done on the basis of parent variables only
				index_val_without_child = helper.get_index_given_truth_values(var_in_this_clique[:-1],
				                                                              each_vals_for_this_clique[:-1],
				                                                              cardinalities_of_var_in_this_clique[
				                                                              :-1])
				denom[index_val_without_child] += weights_for_this_example
				if each_vals_for_this_clique[-1] == 0:
					nums[index_val_without_child] += weights_for_this_example
			parameter_val = divide(nums, denom)
			parameter_val = nan_to_num(parameter_val)
			parameter_val[parameter_val == 0] = pow(10, -5)
			parameters.append(parameter_val)
		else:
			count_of_zero = 0
			total_count = 0
			for each_vals_for_this_clique, weights_for_this_example in zip(vals_for_this_clique, weights):
				total_count += weights_for_this_example
				if each_vals_for_this_clique[0] == 0:
					count_of_zero += weights_for_this_example
			parameter = nan_to_num(divide(count_of_zero, total_count))
			if parameter == 0:
				parameter += pow(10, -5)
			parameters.append(parameter)
	return parameters
def find_log_liklihood_for_one_example_mixture_model(example, var_in_clique,
                                                     parameters,
                                                     parameters_for_k,
                                                     cardinalities):
    log_liklihood = []
    cardinalities = array(cardinalities)
    var_in_clique = array(var_in_clique)
    for each_val_of_k in range(len(parameters_for_k)):
        log_liklihood_for_this_val_of_k = 1
        for each_clique in range(len(var_in_clique[each_val_of_k])):
            var_in_this_clique = var_in_clique[each_val_of_k][each_clique]
            cardinalities_of_var_in_this_clique = cardinalities[
                var_in_this_clique]
            values_of_tuple = example[var_in_this_clique]
            if len(var_in_this_clique) > 1:
                parents = values_of_tuple[:-1]
                parents_var_in_this_clique = var_in_this_clique[:-1]
                cardinalities_of_parents_in_this_clique = cardinalities_of_var_in_this_clique[:
                                                                                              -1]
                child = values_of_tuple[-1]
                index_for_parents = helper.get_index_given_truth_values(
                    parents_var_in_this_clique, parents,
                    cardinalities_of_parents_in_this_clique)
                if parameters[each_val_of_k][each_clique][
                        index_for_parents] == 0:
                    parameters[each_val_of_k][each_clique][
                        index_for_parents] += pow(10, -5)
                elif parameters[each_val_of_k][each_clique][
                        index_for_parents] == 1:
                    parameters[each_val_of_k][each_clique][
                        index_for_parents] -= pow(10, -5)
                if child == 0:
                    log_liklihood_for_this_val_of_k += log10(
                        parameters[each_val_of_k][each_clique]
                        [index_for_parents])
                elif child == 1:
                    log_liklihood_for_this_val_of_k += log10(
                        1 - parameters[each_val_of_k][each_clique]
                        [index_for_parents])
                else:
                    print("Error line 37")
            elif len(var_in_this_clique) == 1:
                if parameters[each_val_of_k][each_clique] == 0:
                    parameters[each_val_of_k][each_clique] += pow(10, -5)
                elif parameters[each_val_of_k][each_clique] == 1:
                    parameters[each_val_of_k][each_clique] -= pow(10, -5)
                if values_of_tuple[0] == 0:
                    log_liklihood_for_this_val_of_k += log10(
                        parameters[each_val_of_k][each_clique])
                elif values_of_tuple[0] == 1:
                    log_liklihood_for_this_val_of_k += log10(
                        1 - parameters[each_val_of_k][each_clique])
                else:
                    print("Error line 48")
            else:
                print("Error line ")
        log_liklihood_for_this_val_of_k += log10(
            parameters_for_k[each_val_of_k])
        log_liklihood.append(log_liklihood_for_this_val_of_k)
    log_liklihood_output = helper.log_sum_exp(log_liklihood)
    return log_liklihood_output
示例#8
0
def sampling_VE(num_of_var, cardinalities, num_of_cliques,
                num_of_var_in_clique, var_in_clique, distribution_array,
                w_cutset_bound, num_samples):
    """
    This is the main function used to do sampling VE
    @param num_of_var: The numbre of variables in the PGM
    @param cardinalities: The cardialities of the PGM
    @param num_of_cliques: The number of cliques in the PGM
    @param num_of_var_in_clique: The number of variables in each clique
    @param var_in_clique: The variables in each clique
    @param distribution_array: The distribution array for given PGM
    @param w_cutset_bound: The cutset bound for the PGM
    @param num_samples: The number of samples for the algorithm
    @return: The predicted value of Z or probability
    """
    z = 0
    X = w_cutset(num_of_var, var_in_clique, w_cutset_bound)
    cardinalities = np.array(cardinalities)
    num_of_var_in_x = len(X)
    cardinalities_of_x = cardinalities[X]
    num_of_cliques_in_X = 1
    var_in_clique_X = X
    if num_of_var_in_x != 0:
        sum_of_log_of_cardinalities = np.sum(np.log10(cardinalities_of_x))
        distribution_array_X = dict()
        distribution_array_X["FIRST"] = [1 / sum_of_log_of_cardinalities]
        uniform_dist = 1 / sum_of_log_of_cardinalities
    else:
        distribution_array_X = [1]
        uniform_dist = 1
    num = {}
    denom = 0
    weights = []
    for each_N in range(num_samples):
        distribution_array1 = distribution_array.copy()
        var_in_clique1 = var_in_clique.copy()
        num_of_var_in_clique1 = num_of_var_in_clique.copy()
        num_of_var1 = num_of_var
        evidence = []
        for each_evidence in range(num_of_var_in_x):
            evidence.append((var_in_clique_X[each_evidence],
                             randint(0,
                                     cardinalities_of_x[each_evidence] - 1)))
        # evidence = [(each_var, each_val) for (each_var, each_val) in zip(var_in_clique_X, sample)]
        var_in_clique1, distribution_array1 = helper.instantiate(
            num_of_var1, evidence, cardinalities, var_in_clique1,
            distribution_array1)
        var_elem_sol = variable_elimination.variable_elimination(
            num_of_var1, cardinalities, num_of_cliques, num_of_var_in_clique1,
            var_in_clique1, distribution_array1, evidence)
        sample = [one_value for (var, one_value) in evidence]
        sample_string = sample.__str__()
        evidence_tuple = helper.get_index_given_truth_values(
            var_in_clique_X, sample, cardinalities_of_x)
        if distribution_array_X is list:
            Q = uniform_dist
        else:
            if sample_string in distribution_array_X:
                Q = distribution_array_X[sample_string]
            else:
                Q = uniform_dist
        weight = var_elem_sol / Q
        weights.append(weight)
        denom += weight
        if sample_string not in num:
            num[sample_string] = weight
        else:
            num[sample_string] += weight
        if each_N % 100 == 0 and each_N != 0 and distribution_array_X is not list:
            distribution_array_X = {}
            for each in num:
                distribution_array_X[each] = num[each] / denom
    weights = np.array(weights)
    if (weights[1:] == weights[:-1]).all:
        z = np.sum(weights)
    else:
        z = helper.threshold(helper.logsumexp(weights))
    """if weight != float('inf'):
            z += weight
        else:
            z += sys.float_info.max * np.random.uniform(0,2)"""
    return z / num_samples