示例#1
0
def factor_marginalize(factor, var):
    """Sums over a list of variables.

    Args:
        factor (Factor): Input factor
        var (List): Variables to marginalize out

    Returns:
        out: Factor with variables in 'var' marginalized out.
    """
    out = Factor()
    """ YOUR CODE HERE
    Marginalize out the variables given in var
    """

    out.var = np.setxor1d(factor.var, np.array(var))

    for out_var in out.var:
        index = np.where(factor.var == out_var)
        out.card = np.append(out.card, factor.card[index])

    assignment = factor.get_all_assignments()
    index = []
    for single_var in var:
        index.append(np.where(factor.var == single_var))

    assignment = np.delete(assignment, index, axis=1)

    out.val = np.zeros(np.prod(out.card))
    for i in np.unique(assignment, axis=0):
        index_set = np.array(np.where(np.all(i == assignment, axis=1)))
        single_assignment = assignment_to_index(i, out.card)
        out.val[single_assignment] = np.sum(factor.val[index_set])
    return out
示例#2
0
def factor_sum(A, B):
    """Same as factor_product, but sums instead of multiplies
    """
    if A.is_empty():
        return B
    if B.is_empty():
        return A

    # Create output factor. Variables should be the union between of the
    # variables contained in the two input factors
    out = Factor()
    out.var = np.union1d(A.var, B.var)

    # Compute mapping between the variable ordering between the two factors
    # and the output to set the cardinality
    out.card = np.zeros(len(out.var), np.int64)
    mapA = np.argmax(out.var[None, :] == A.var[:, None], axis=-1)
    mapB = np.argmax(out.var[None, :] == B.var[:, None], axis=-1)
    out.card[mapA] = A.card
    out.card[mapB] = B.card

    # For each assignment in the output, compute which row of the input factors
    # it comes from
    out.val = np.zeros(np.prod(out.card))
    assignments = out.get_all_assignments()
    idxA = assignment_to_index(assignments[:, mapA], A.card)
    idxB = assignment_to_index(assignments[:, mapB], B.card)
    """ YOUR CODE HERE
    You should populate the .val field with the factor sum. The code for this
    should be very similar to the factor_product().
    """
    out.val = A.val[idxA] + B.val[idxB]

    return out
示例#3
0
def _get_conditional_probability(nodes, edges, factors, evidence, initial_samples, num_iterations, num_burn_in):
    """
    Returns the conditional probability p(Xf | Xe) where Xe is the set of observed nodes and Xf are the query nodes
    i.e. the unobserved nodes. The conditional probability is approximated using Gibbs sampling.

    Args:
        nodes: numpy array of nodes e.g. [x1, x2, ...].
        edges: numpy array of edges e.g. [i, j] implies that nodes[i] is the parent of nodes[j].
        factors: dictionary of Factors e.g. factors[x1] returns the conditional probability of x1 given all other nodes.
        evidence: dictionary of evidence e.g. evidence[x4] returns the provided evidence for x4.
        initial_samples: dictionary of initial samples to initialize Gibbs sampling.
        num_iterations: number of sampling iterations
        num_burn_in: number of burn-in iterations

    Returns:
        returns Factor of conditional probability.
    """
    assert num_iterations > num_burn_in
    conditional_prob = Factor()

    """ YOUR CODE HERE """
    for factor_index in factors:
        factors[factor_index] = factor_evidence(factors[factor_index],evidence)
    remove_nodes = list(evidence.keys())
    for node in remove_nodes:
        initial_samples.pop(node)
        index = np.argwhere(nodes == node)
        nodes = np.delete(nodes,index)

    total_run = num_burn_in + num_iterations
    sample_result = np.zeros([total_run, len(nodes)])
    for i in range(total_run):
        initial_samples = _sample_step(nodes, factors, initial_samples)
        sample_result[i] = np.array(list(initial_samples.values()))

    # set freq dict
    freq = {}
    for i in range(len(factors[0].val)):
        freq[i] = 0
    card = factors[0].card
    for i in range(num_burn_in,num_iterations):
        index = assignment_to_index(sample_result[i],card)
        freq[index] += 1

    freq_arr = np.array(list(freq.values()))
    freq_arr = freq_arr/np.sum(freq_arr)
    conditional_prob.var = factors[0].var
    conditional_prob.card = card
    conditional_prob.val = freq_arr
    """ END YOUR CODE HERE """

    return conditional_prob
示例#4
0
def factor_max_marginalize(factor, var):
    """Marginalize over a list of variables by taking the max.

    Args:
        factor (Factor): Input factor
        var (List): Variable to marginalize out.

    Returns:
        out: Factor with variables in 'var' marginalized out. The factor's
          .val_argmax field should be a list of dictionary that keep track
          of the maximizing values of the marginalized variables.
          e.g. when out.val_argmax[i][j] = k, this means that
            when assignments of out is index_to_assignment[i],
            variable j has a maximizing value of k.
          See test_lab1.py::test_factor_max_marginalize() for an example.
    """
    out = Factor()
    """ YOUR CODE HERE
    Marginalize out the variables given in var. 
    You should make use of val_argmax to keep track of the location with the
    maximum probability.
    """
    out.var = np.setxor1d(factor.var, np.array(var))

    for out_var in out.var:
        index = np.where(factor.var == out_var)
        out.card = np.append(out.card, factor.card[index])

    assignment = factor.get_all_assignments()
    index = []
    out.val_argmax = []
    for single_var in var:
        index.append(np.where(factor.var == single_var))

    delete_assignment = np.delete(assignment, index, axis=1)

    out.val = np.zeros(np.prod(out.card))
    for i in np.unique(delete_assignment, axis=0):
        index_set = np.array(np.where(np.all(i == delete_assignment, axis=1)))
        index_set_max_index = np.argmax(factor.val[index_set])
        max_index_assignment = index_set[:, index_set_max_index][0]
        single_assignment = assignment_to_index(i, out.card)
        out.val[single_assignment] = factor.val[max_index_assignment]
        temp_dict = {}
        for single_var in var:
            index = np.argwhere(factor.var == single_var)[0][0]
            temp_dict[single_var] = assignment[max_index_assignment][index]
        out.val_argmax.append(temp_dict)
    return out
示例#5
0
def factor_product(A, B):
    """Compute product of two factors.

    Suppose A = phi(X_1, X_2), B = phi(X_2, X_3), the function should return
    phi(X_1, X_2, X_3)
    """
    if A.is_empty():
        return B
    if B.is_empty():
        return A

    # Create output factor. Variables should be the union between of the
    # variables contained in the two input factors
    out = Factor()
    out.var = np.union1d(A.var, B.var)

    # Compute mapping between the variable ordering between the two factors
    # and the output to set the cardinality
    out.card = np.zeros(len(out.var), np.int64)
    mapA = np.argmax(out.var[None, :] == A.var[:, None], axis=-1)
    mapB = np.argmax(out.var[None, :] == B.var[:, None], axis=-1)
    out.card[mapA] = A.card
    out.card[mapB] = B.card

    # For each assignment in the output, compute which row of the input factors
    # it comes from
    out.val = np.zeros(np.prod(out.card))
    assignments = out.get_all_assignments()
    idxA = assignment_to_index(assignments[:, mapA], A.card)
    idxB = assignment_to_index(assignments[:, mapB], B.card)

    out.val = A.val[idxA] * B.val[idxB]
    """ YOUR CODE HERE
    You should populate the .val field with the factor product
    Hint: The code for this function should be very short (~1 line). Try to
      understand what the above lines are doing, in order to implement
      subsequent parts.
    """
    return out
示例#6
0
def factor_product(A, B):
    """
    Computes the factor product of A and B e.g. A = f(x1, x2); B = f(x1, x3); out=f(x1, x2, x3) = f(x1, x2)f(x1, x3)
    Args:
        A: first Factor
        B: second Factor
    Returns:
        Returns the factor product of A and B
    """
    out = Factor()
    """ YOUR CODE HERE,     HINT: copy from lab2 part 1! """
    if A.is_empty():
        return B
    if B.is_empty():
        return A

    out = Factor()

    # Set the variables of the output
    out.var = np.union1d(A.var, B.var)

    # Set the cardinality of the output
    out.card = np.zeros(len(out.var), np.int64)
    mapA = np.argmax(out.var[None, :] == A.var[:, None], axis=-1)
    mapB = np.argmax(out.var[None, :] == B.var[:, None], axis=-1)
    out.card[mapA] = A.card
    out.card[mapB] = B.card

    # Initialize the factor values to zero
    out.val = np.zeros(np.prod(out.card))
    assignments = out.get_all_assignments()
    idxA = assignment_to_index(assignments[:, mapA], A.card)
    idxB = assignment_to_index(assignments[:, mapB], B.card)

    # Populate the factor values
    out.val = A.val[idxA] * B.val[idxB]
    """ END YOUR CODE HERE """
    return out
示例#7
0
def _get_conditional_probability(target_factors, proposal_factors, evidence,
                                 num_iterations):
    """
    Performs multiple iterations of importance sampling and returns the conditional distribution p(Xf | Xe) where
    Xe are the evidence nodes and Xf are the query nodes (unobserved).

    Args:
        target_factors: dictionary of node:Factor pair where Factor is the target distribution of the node.
                        Other nodes in the Factor are parent nodes of the node. The product of the target
                        distribution gives our joint target distribution.
        proposal_factors: dictionary of node:Factor pair where Factor is the proposal distribution to sample node
                        observations. Other nodes in the Factor are parent nodes of the node
        evidence: dictionary of node:val pair where node is an evidence node while val is the evidence for the node.
        num_iterations: number of importance sampling iterations

    Returns:
        Approximate conditional distribution of p(Xf | Xe) where Xf is the set of query nodes (not observed) and
        Xe is the set of evidence nodes. Return result as a Factor
    """
    out = Factor()
    """ YOUR CODE HERE """
    # if evidence == {} I don't think we need to do the MC part, or you can invalid line 122-124.
    # if evidence == {}:
    #     out = cal_joint_dist(proposal_factors)
    #     return out

    nodes = find_topo_order(proposal_factors)

    #calculate proposal distribution
    evi_nodes = list(evidence.keys())
    for n in evi_nodes:
        nodes.remove(n)
    q_dist_factor = cal_prop_dist(nodes, proposal_factors)
    q_dist_factor = factor_evidence(q_dist_factor, evidence)
    # calculate the target distribution
    p_dist_factor = cal_joint_dist(target_factors)

    proposal_factors = after_evidence(evidence, proposal_factors)

    # r_i has a fixed number of value
    r_i = {}
    freq = {}
    all_assignments = q_dist_factor.get_all_assignments()
    for i in range(len(all_assignments)):
        index = assignment_to_index(all_assignments[i], q_dist_factor.card)
        q_prob = q_dist_factor.val[index]

        tmp_dict = {}
        for j in range(len(q_dist_factor.var)):
            tmp_dict[q_dist_factor.var[j]] = all_assignments[i][j]
        tmp_dict.update(evidence)
        p_prob = get_prob(tmp_dict, p_dist_factor)
        r_i[index] = p_prob / q_prob
        freq[index] = 0

    for _ in range(num_iterations):
        samples = _sample_step(nodes, proposal_factors)
        sample_res = sorted(samples.items(), key=lambda d: d[0])
        sorted_samples_results = [value for key, value in sample_res]
        index = assignment_to_index(sorted_samples_results, q_dist_factor.card)
        freq[index] += 1

    weights = [0] * len(all_assignments)
    sum_denominator = 0
    for i in range(len(weights)):
        sum_denominator += r_i[i] * freq[i]
    for i in range(len(weights)):
        weights[i] = r_i[i] / sum_denominator

    out_value = [0] * len(all_assignments)
    sum_denominator = 0
    for i in range(len(weights)):
        sum_denominator += weights[i] * freq[i]
    for i in range(len(weights)):
        out_value[i] = weights[i] * freq[i] / sum_denominator

    out.var = q_dist_factor.var
    out.card = q_dist_factor.card
    out.val = np.array(out_value)
    """ END YOUR CODE HERE """

    return out