def true_log_liklihood_per_example(example, var_in_clique, true_parameters, cardinalities): log_liklihood = 0 for each_clique in range(len(var_in_clique)): var_in_this_clique = list(var_in_clique[each_clique]) cardinalities_of_var_in_this_clique = cardinalities[var_in_this_clique] values_of_tuple = example[var_in_this_clique] if len(var_in_this_clique) > 1: index = helper.get_index_given_truth_values( var_in_this_clique, values_of_tuple, cardinalities_of_var_in_this_clique) values_of_conjugate_tuple = values_of_tuple if true_parameters[each_clique][index] == 0: true_parameters[each_clique][index] += pow(10, -5) if values_of_conjugate_tuple[-1] == 0: values_of_conjugate_tuple[-1] = 1 elif values_of_conjugate_tuple[-1] == 1: values_of_conjugate_tuple[-1] = 0 index_for_other_child = helper.get_index_given_truth_values( var_in_this_clique, values_of_conjugate_tuple, cardinalities_of_var_in_this_clique) true_parameters[each_clique][index_for_other_child] -= pow( 10, -5) elif true_parameters[each_clique][index] == 1: true_parameters[each_clique][index] -= pow(10, -5) if values_of_conjugate_tuple[-1] == 0: values_of_conjugate_tuple[-1] = 1 elif values_of_conjugate_tuple[-1] == 1: values_of_conjugate_tuple[-1] = 0 index_for_other_child = helper.get_index_given_truth_values( var_in_this_clique, values_of_conjugate_tuple, cardinalities_of_var_in_this_clique) true_parameters[each_clique][index_for_other_child] += pow( 10, -5) log_liklihood += log10(true_parameters[each_clique][index]) else: if true_parameters[each_clique][0] == 0: true_parameters[each_clique][0] += pow(10, -5) true_parameters[each_clique][1] -= pow(10, -5) elif true_parameters[each_clique][0] == 1: true_parameters[each_clique][0] -= pow(10, -5) true_parameters[each_clique][1] += pow(10, -5) log_liklihood += log10( true_parameters[each_clique][values_of_tuple[0]]) return log_liklihood
def get_parameters_given_weighted_example(examples, weights, var_in_clique, cardinalities_of_var, number_of_k): parameters_for_network = [[] for i in range(number_of_k)] parameters_for_k = [] # todo change the use of var_in_clique for each val of k cardinalities_of_var = array(cardinalities_of_var) examples = array(examples) var_in_clique_np = array(var_in_clique) for each_val_of_k in range(number_of_k): for each_clique in range(len(var_in_clique[each_val_of_k])): var_in_this_clique = var_in_clique_np[each_val_of_k][each_clique] vals_for_this_clique = examples[:, var_in_this_clique] val_of_k = examples[:, -1] cardinalities_of_var_in_this_clique = cardinalities_of_var[var_in_this_clique] total_tuples = product(cardinalities_of_var_in_this_clique) if len(var_in_this_clique) > 1: number_of_parameters = int(total_tuples / cardinalities_of_var_in_this_clique[-1]) nums = zeros([number_of_parameters, ]) denom = zeros([number_of_parameters, ]) for each_vals_for_this_clique, weights_for_this_example, value_of_k_for_this_example in zip( vals_for_this_clique, weights, val_of_k): if value_of_k_for_this_example == each_val_of_k: # indexing of parameters_for_network is done on the basis of parent variables only index_val_without_child = get_index_given_truth_values(var_in_this_clique[:-1], each_vals_for_this_clique[:-1], cardinalities_of_var_in_this_clique[ :-1]) denom[index_val_without_child] += weights_for_this_example if each_vals_for_this_clique[-1] == 0: nums[index_val_without_child] += weights_for_this_example parameter_val = divide(nums, denom) parameter_val = nan_to_num(parameter_val) parameter_val[parameter_val == 0] = pow(10, -5) parameter_val[parameter_val == 1] = 1 - pow(10, -5) parameters_for_network[each_val_of_k].append(parameter_val) else: count_of_zero = 0 total_count = 0 for each_vals_for_this_clique, weights_for_this_example, value_of_k_for_this_example in zip( vals_for_this_clique, weights, val_of_k): if value_of_k_for_this_example == each_val_of_k: total_count += weights_for_this_example if each_vals_for_this_clique[0] == 0: count_of_zero += weights_for_this_example parameter = nan_to_num(divide(count_of_zero, total_count)) if parameter == 0: parameter = pow(10, -5) elif parameter == 1: parameter = 1 - pow(10, -5) parameters_for_network[each_val_of_k].append(parameter) # parameters_for_network = array(parameters_for_network) nums_for_k = zeros([number_of_k, ]) denom_for_k = 0 for each_val_of_k, weight_for_k in zip(examples[:, -1], weights): nums_for_k[each_val_of_k] += weight_for_k denom_for_k += weight_for_k parameters_for_k = divide(nums_for_k, denom_for_k) return parameters_for_network, parameters_for_k
def train(examples, var_in_clique, markov, cardinalities_of_var): """ :param examples: :param var_in_clique: :param markov: :param cardinalities_of_var: :return: parameters in double form """ parameters = [] cardinalities_of_var = array(cardinalities_of_var) if not markov: var_in_clique_np = array(var_in_clique) for each_clique in range(len(var_in_clique)): var_in_this_clique = var_in_clique_np[each_clique] vals_for_this_clique = examples[:, var_in_this_clique] cardinalities_of_var_in_this_clique = cardinalities_of_var[ var_in_this_clique] total_tuples = product(cardinalities_of_var_in_this_clique) unique_vals, counts = unique(vals_for_this_clique, return_counts=True, axis=0) if len(var_in_this_clique) > 1: # indexing of parameters is done on the basis of parent variables only number_of_parameters = int( total_tuples / cardinalities_of_var_in_this_clique[-1]) # Using 1-Laplace Smoothing nums = zeros([ number_of_parameters, ]) + 1 denom = zeros([ number_of_parameters, ]) + cardinalities_of_var_in_this_clique[-1] # todo need to use parent only for parameters for each_tuple in zip(unique_vals, counts): index_val_without_child = helper.get_index_given_truth_values( var_in_this_clique[:-1], each_tuple[0][:-1], cardinalities_of_var_in_this_clique[:-1]) denom[index_val_without_child] += each_tuple[1] temp = each_tuple[0][-1] if each_tuple[0][-1] == 0: nums[index_val_without_child] += each_tuple[1] parameter_val = divide(nums, denom) parameter_val = nan_to_num(parameter_val) parameters.append(list(parameter_val)) else: count_of_zero = 0 total_count = 0 for each_tuple in zip(unique_vals, counts): each_tuple = array(each_tuple) vals_for_this_clique = int(each_tuple[0]) total_count += each_tuple[1] if vals_for_this_clique == 0: count_of_zero += each_tuple[1] parameters.append( nan_to_num(divide(count_of_zero, total_count))) return parameters
def complete_data(examples, cardinalities_of_var, parameters_for_network, parameters_for_k, var_in_clique): final_examples = [] final_weights = [] for each_example in examples: if -1 not in each_example: weight = 1 final_examples.append(each_example) final_weights.append(weight) elif -1 in each_example: weights_for_completed_examples = [] completed_examples = [] cardinalities_of_var = array(cardinalities_of_var) var_not_present = len(cardinalities_of_var) - 1 cardinalities_of_var_not_present = cardinalities_of_var[var_not_present] completions_for_var_not_present = list(range(cardinalities_of_var_not_present)) for each_tuple_missing_var in completions_for_var_not_present: temp_example = deepcopy(each_example) put(temp_example, var_not_present, each_tuple_missing_var) weight_for_current_example = 1 value_of_k = temp_example[-1] # for each tuple we have diff network thus weight will be different for each_clique in range(len(var_in_clique[each_tuple_missing_var])): if True: values_for_current_clique = temp_example[var_in_clique[each_tuple_missing_var][each_clique]] if len(var_in_clique[each_tuple_missing_var][each_clique]) > 1: index_for_current_example = get_index_given_truth_values( var_in_clique[each_tuple_missing_var][each_clique][:-1], values_for_current_clique[:-1], cardinalities_of_var[ var_in_clique[each_tuple_missing_var][each_clique][ :-1]]) if values_for_current_clique[-1] == 0: weight_for_current_example *= parameters_for_network[value_of_k][each_clique][ index_for_current_example] elif values_for_current_clique[-1] == 1: weight_for_current_example *= (1 - parameters_for_network[value_of_k][each_clique][ index_for_current_example]) else: print("Error") elif len(var_in_clique[each_tuple_missing_var][each_clique]) == 1: if values_for_current_clique[0] == 0: weight_for_current_example *= parameters_for_network[value_of_k][each_clique] elif values_for_current_clique[0] == 1: weight_for_current_example *= (1 - parameters_for_network[value_of_k][each_clique]) else: print("Error") weight_for_current_example *= parameters_for_k[value_of_k] weights_for_completed_examples.append(weight_for_current_example) completed_examples.append(temp_example) denom = sum(weights_for_completed_examples) weights_for_completed_examples = divide(weights_for_completed_examples, denom) for weight, example in zip(weights_for_completed_examples, completed_examples): final_examples.append(example) final_weights.append(float(weight)) return final_examples, final_weights
def find_log_liklihood_for_one_example(example, var_in_clique, parameters, cardinalities): log_liklihood = 0 cardinalities = array(cardinalities) var_in_clique = array(var_in_clique) for each_clique in range(len(var_in_clique)): var_in_this_clique = list(var_in_clique[each_clique]) cardinalities_of_var_in_this_clique = cardinalities[var_in_this_clique] values_of_tuple = example[var_in_this_clique] if len(var_in_this_clique) > 1: parents = values_of_tuple[:-1] parents_var_in_this_clique = var_in_this_clique[:-1] cardinalities_of_parents_in_this_clique = cardinalities_of_var_in_this_clique[: -1] child = values_of_tuple[-1] index_for_parents = helper.get_index_given_truth_values( parents_var_in_this_clique, parents, cardinalities_of_parents_in_this_clique) if parameters[each_clique][index_for_parents] == 0: parameters[each_clique][index_for_parents] += pow(10, -5) elif parameters[each_clique][index_for_parents] == 1: parameters[each_clique][index_for_parents] -= pow(10, -5) if child == 0: log_liklihood += log10( parameters[each_clique][index_for_parents]) elif child == 1: log_liklihood += log10( 1 - parameters[each_clique][index_for_parents]) else: print("Error line 37") elif len(var_in_this_clique) == 1: if parameters[each_clique] == 0: parameters[each_clique] += pow(10, -5) elif parameters[each_clique] == 1: parameters[each_clique] -= pow(10, -5) if values_of_tuple[0] == 0: log_liklihood += log10(parameters[each_clique]) elif values_of_tuple[0] == 1: log_liklihood += log10(1 - parameters[each_clique]) else: print("Error line 48") else: print("Error line ") return log_liklihood
def get_parameters_given_weighted_example(examples, weights, var_in_clique, cardinalities_of_var): parameters = [] cardinalities_of_var = array(cardinalities_of_var) examples = array(examples) var_in_clique_np = array(var_in_clique) for each_clique in range(len(var_in_clique)): var_in_this_clique = var_in_clique_np[each_clique] vals_for_this_clique = examples[:, var_in_this_clique] cardinalities_of_var_in_this_clique = cardinalities_of_var[var_in_this_clique] total_tuples = product(cardinalities_of_var_in_this_clique) if len(var_in_this_clique) > 1: number_of_parameters = int(total_tuples / cardinalities_of_var_in_this_clique[-1]) nums = zeros([number_of_parameters, ]) denom = zeros([number_of_parameters, ]) for each_vals_for_this_clique, weights_for_this_example in zip(vals_for_this_clique, weights): # indexing of parameters is done on the basis of parent variables only index_val_without_child = helper.get_index_given_truth_values(var_in_this_clique[:-1], each_vals_for_this_clique[:-1], cardinalities_of_var_in_this_clique[ :-1]) denom[index_val_without_child] += weights_for_this_example if each_vals_for_this_clique[-1] == 0: nums[index_val_without_child] += weights_for_this_example parameter_val = divide(nums, denom) parameter_val = nan_to_num(parameter_val) parameter_val[parameter_val == 0] = pow(10, -5) parameters.append(parameter_val) else: count_of_zero = 0 total_count = 0 for each_vals_for_this_clique, weights_for_this_example in zip(vals_for_this_clique, weights): total_count += weights_for_this_example if each_vals_for_this_clique[0] == 0: count_of_zero += weights_for_this_example parameter = nan_to_num(divide(count_of_zero, total_count)) if parameter == 0: parameter += pow(10, -5) parameters.append(parameter) return parameters
def find_log_liklihood_for_one_example_mixture_model(example, var_in_clique, parameters, parameters_for_k, cardinalities): log_liklihood = [] cardinalities = array(cardinalities) var_in_clique = array(var_in_clique) for each_val_of_k in range(len(parameters_for_k)): log_liklihood_for_this_val_of_k = 1 for each_clique in range(len(var_in_clique[each_val_of_k])): var_in_this_clique = var_in_clique[each_val_of_k][each_clique] cardinalities_of_var_in_this_clique = cardinalities[ var_in_this_clique] values_of_tuple = example[var_in_this_clique] if len(var_in_this_clique) > 1: parents = values_of_tuple[:-1] parents_var_in_this_clique = var_in_this_clique[:-1] cardinalities_of_parents_in_this_clique = cardinalities_of_var_in_this_clique[: -1] child = values_of_tuple[-1] index_for_parents = helper.get_index_given_truth_values( parents_var_in_this_clique, parents, cardinalities_of_parents_in_this_clique) if parameters[each_val_of_k][each_clique][ index_for_parents] == 0: parameters[each_val_of_k][each_clique][ index_for_parents] += pow(10, -5) elif parameters[each_val_of_k][each_clique][ index_for_parents] == 1: parameters[each_val_of_k][each_clique][ index_for_parents] -= pow(10, -5) if child == 0: log_liklihood_for_this_val_of_k += log10( parameters[each_val_of_k][each_clique] [index_for_parents]) elif child == 1: log_liklihood_for_this_val_of_k += log10( 1 - parameters[each_val_of_k][each_clique] [index_for_parents]) else: print("Error line 37") elif len(var_in_this_clique) == 1: if parameters[each_val_of_k][each_clique] == 0: parameters[each_val_of_k][each_clique] += pow(10, -5) elif parameters[each_val_of_k][each_clique] == 1: parameters[each_val_of_k][each_clique] -= pow(10, -5) if values_of_tuple[0] == 0: log_liklihood_for_this_val_of_k += log10( parameters[each_val_of_k][each_clique]) elif values_of_tuple[0] == 1: log_liklihood_for_this_val_of_k += log10( 1 - parameters[each_val_of_k][each_clique]) else: print("Error line 48") else: print("Error line ") log_liklihood_for_this_val_of_k += log10( parameters_for_k[each_val_of_k]) log_liklihood.append(log_liklihood_for_this_val_of_k) log_liklihood_output = helper.log_sum_exp(log_liklihood) return log_liklihood_output
def sampling_VE(num_of_var, cardinalities, num_of_cliques, num_of_var_in_clique, var_in_clique, distribution_array, w_cutset_bound, num_samples): """ This is the main function used to do sampling VE @param num_of_var: The numbre of variables in the PGM @param cardinalities: The cardialities of the PGM @param num_of_cliques: The number of cliques in the PGM @param num_of_var_in_clique: The number of variables in each clique @param var_in_clique: The variables in each clique @param distribution_array: The distribution array for given PGM @param w_cutset_bound: The cutset bound for the PGM @param num_samples: The number of samples for the algorithm @return: The predicted value of Z or probability """ z = 0 X = w_cutset(num_of_var, var_in_clique, w_cutset_bound) cardinalities = np.array(cardinalities) num_of_var_in_x = len(X) cardinalities_of_x = cardinalities[X] num_of_cliques_in_X = 1 var_in_clique_X = X if num_of_var_in_x != 0: sum_of_log_of_cardinalities = np.sum(np.log10(cardinalities_of_x)) distribution_array_X = dict() distribution_array_X["FIRST"] = [1 / sum_of_log_of_cardinalities] uniform_dist = 1 / sum_of_log_of_cardinalities else: distribution_array_X = [1] uniform_dist = 1 num = {} denom = 0 weights = [] for each_N in range(num_samples): distribution_array1 = distribution_array.copy() var_in_clique1 = var_in_clique.copy() num_of_var_in_clique1 = num_of_var_in_clique.copy() num_of_var1 = num_of_var evidence = [] for each_evidence in range(num_of_var_in_x): evidence.append((var_in_clique_X[each_evidence], randint(0, cardinalities_of_x[each_evidence] - 1))) # evidence = [(each_var, each_val) for (each_var, each_val) in zip(var_in_clique_X, sample)] var_in_clique1, distribution_array1 = helper.instantiate( num_of_var1, evidence, cardinalities, var_in_clique1, distribution_array1) var_elem_sol = variable_elimination.variable_elimination( num_of_var1, cardinalities, num_of_cliques, num_of_var_in_clique1, var_in_clique1, distribution_array1, evidence) sample = [one_value for (var, one_value) in evidence] sample_string = sample.__str__() evidence_tuple = helper.get_index_given_truth_values( var_in_clique_X, sample, cardinalities_of_x) if distribution_array_X is list: Q = uniform_dist else: if sample_string in distribution_array_X: Q = distribution_array_X[sample_string] else: Q = uniform_dist weight = var_elem_sol / Q weights.append(weight) denom += weight if sample_string not in num: num[sample_string] = weight else: num[sample_string] += weight if each_N % 100 == 0 and each_N != 0 and distribution_array_X is not list: distribution_array_X = {} for each in num: distribution_array_X[each] = num[each] / denom weights = np.array(weights) if (weights[1:] == weights[:-1]).all: z = np.sum(weights) else: z = helper.threshold(helper.logsumexp(weights)) """if weight != float('inf'): z += weight else: z += sys.float_info.max * np.random.uniform(0,2)""" return z / num_samples