Exemplo n.º 1
0
def epsilon_greedy(q, s, eps=0.5):
    n = len(q.actions)
    values = [1 / n] * n
    d = dict(zip(q.actions, values))
    dist = DDist(d)
    if random.random() < eps:  # True with prob eps, random action
        return dist.draw()
    else:
        return greedy(q, s)
Exemplo n.º 2
0
def tinyTrans2(s, a):
    if s == 0:
        return DDist({1: 1.0})
    elif s == 1:
        return DDist({2: 1.0})
    elif s == 2:
        return DDist({3: 1.0})
    elif s == 3:
        return DDist({4: 1.0})
    elif s == 4:
        return DDist({4: 1.0})
 def generate_dists(self, alphabet_file, letter_file, matrix_file):
     alphabet = np.genfromtxt(alphabet_file, delimiter=",", dtype="unicode")
     letter_prob = np.genfromtxt(letter_file, delimiter=",")
     matrix = np.genfromtxt(matrix_file, delimiter=",")
     self.alphabet = alphabet
     self.letter_to_index = {
         self.alphabet[i]: i
         for i in range(len(self.alphabet))
     }
     self.letter_dist = DDist(
         {alphabet[i]: letter_prob[i]
          for i in range(len(alphabet))})
     self.transition_matrix = matrix
 def conditional_dist(self, x):
     index = self.letter_to_index[x]
     probabilities = self.transition_matrix[:, index]
     return DDist({
         self.alphabet[i]: probabilities[i]
         for i in range(len(self.alphabet))
     })
Exemplo n.º 5
0
def syntactic_bootstrapping_test(verbose=False):
    """Tests the syntatic bootstrapping results against those from the paper."""

    hypothesis_space = ['0', '1', '*']
    p_c_v_table = {\
    ('0','0'): 0.22, ('0','1'): 0.01, ('0','*'): 0.11,\
    ('1','0'): 0.01, ('1','1'): 0.22, ('1','*'): 0.11,\
    ('-','0'): 0.11, ('-','1'): 0.11, ('-','*'): 0.12\
    }
    p_c_v_table = DDist.normalize_probability_table(
        p_c_v_table)  # values from paper don't sum to 1
    p_c_v_dist = DDist(p_c_v_table)
    priors_table = {'0': 1.0 / 3.0, '1': 1.0 / 3.0, '*': 1.0 / 3.0}
    priors_dist = DDist(priors_table)

    test_1_obs = ['0', '0', '0', '0']
    test_2_obs = ['-', '-', '-', '-']
    test_3_obs = ['0', '0', '1', '1']
    test_4_obs = ['-', '-', '0', '0']
    test_5_obs = ['0' for i in range(23)] + ['-' for i in range(10)]
    test_6_obs = ['0' for i in range(23)] + ['-' for i in range(10)
                                             ] + ['1' for i in range(5)]

    all_observations = [
        test_1_obs, test_2_obs, test_3_obs, test_4_obs, test_5_obs, test_6_obs
    ]

    paper_res_1 = {'0': 0.941, '1': 0.000, '*': 0.059}
    paper_res_2 = {'0': 0.292, '1': 0.292, '*': 0.416}
    paper_res_3 = {'0': 0.032, '1': 0.032, '*': 0.936}
    paper_res_4 = {'0': 0.769, '1': 0.000, '*': 0.230}
    paper_res_5 = {'0': 1.000, '1': 0.000, '*': 0.000}
    paper_res_6 = {'0': 0.960, '1': 0.000, '*': 0.040}

    all_paper_results = [
        paper_res_1, paper_res_2, paper_res_3, paper_res_4, paper_res_5,
        paper_res_6
    ]
    all_our_results = [
        syntactic_probabilities(hypothesis_space, obs, p_c_v_dist,
                                priors_dist).dictionary
        for obs in all_observations
    ]
    print "Testing the syntactic bootstrapping results against those from the paper."
    run_test(all_our_results, all_paper_results, all_observations, verbose)
Exemplo n.º 6
0
def syntactic_probabilities(hypothesis_space, observations, p_c_v_joint,
                            priors):
    """Calculates posterior probabilities for a syntatic hypothesis space given observations using Bayes Theorem"""
    p_c_given_v_dist = DDist.make_conditional_from_joint(p_c_v_joint)
    posteriors_table = {}
    for hypothesis in hypothesis_space:
        posterior = 1
        for oi in observations:
            prob_h = priors.prob(hypothesis)
            prob_x_given_h = p_c_given_v_dist.prob((oi, hypothesis))
            print "(oi, hyp)", (oi, hypothesis)
            prob = prob_h * prob_x_given_h
            posterior *= prob
        posteriors_table[hypothesis] = posterior

    posteriors_table = DDist.normalize_probability_table(posteriors_table)
    posteriors = DDist(posteriors_table)
    return posteriors
Exemplo n.º 7
0
def integrated_probabilities(hypothesis_space, observations, priors_table):
    """Calculates posterior probabilities for a syntatic hypothesis space given observations using Bayes Theorem"""
    posteriors_table = {}
    p_c_v_joint_table = {\
    ('0','0'): 0.22, ('0','1'): 0.01, ('0','*'): 0.11,\
    ('1','0'): 0.01, ('1','1'): 0.22, ('1','*'): 0.11,\
    ('*','0'): 0.11, ('*','1'): 0.11, ('*','*'): 0.12\
    }
    p_u_given_h_dist = DDist.make_conditional_from_joint(
        DDist(DDist.normalize_probability_table(p_c_v_joint_table)))
    for hypothesis_key in hypothesis_space.keys():
        posterior = 1
        hypothesis_features = hypothesis_space[hypothesis_key]
        for oi in observations:
            s_j = oi.get("s", None)
            u_j = oi.get("u")
            a_j = get_attention_from_u_j(u_j)
            hypothesis_fig = hypothesis_features[0]
            p_u_given_h = multiply([
                p_u_given_h_dist.prob((hypothesis_fig, u_j[k]))
                for k in range(len(u_j))
            ])
            p_s_given_h = 1.0
            if s_j != None:
                if a_j == '-':
                    attentions = ["G", "W"]
                    p_s_given_h = average([
                        compute_p_s_given_h(hypothesis_features, s_j[a])
                        for a in attentions
                    ])
                else:
                    p_s_given_h = compute_p_s_given_h(hypothesis_features,
                                                      s_j[a_j])
            else:
                p_s_given_h = 1.0 / 3.0
            prob = p_u_given_h * p_s_given_h * priors_table[hypothesis_key]
            posterior *= prob
        posteriors_table[hypothesis_key] = posterior

    posteriors_table = DDist.normalize_probability_table(posteriors_table)
    posteriors = DDist(posteriors_table)
    return posteriors
Exemplo n.º 8
0
def semantic_probabilities(hypothesis_space, observations):
    """Returns a DDist of posterior probabilities for a hypothesis space given observations using Bayes Theorem"""
    M = len(hypothesis_space[0]) + 1
    posteriors = {}
    for hypothesis in hypothesis_space:
        q = q_from_hypothesis(hypothesis)
        posterior = 1
        for obs in observations:
            prob_h = 1.0 / pow(3.0, M)
            prob_x_given_h = 0.0
            if observation_is_in_hypothesis(obs, hypothesis):
                prob_x_given_h = 1.0 / (pow(2.0, q))
            posterior *= prob_x_given_h * prob_h
        if posterior == 0:
            continue
        posteriors[hypothesis] = posterior

    # Normalize posteriors to sum to 1
    posteriors = DDist.normalize_probability_table(posteriors)
    posteriors = DDist(posteriors)
    return posteriors
Exemplo n.º 9
0
def tinyTrans(s, a):
    if s == 0:
        if a == 'a':
            return DDist({1: 0.9, 2: 0.1})
        else:
            return DDist({1: 0.1, 2: 0.9})
    elif s == 1:
        return DDist({1: 0.1, 0: 0.9})
    elif s == 2:
        return DDist({2: 0.1, 3: 0.9})
    elif s == 3:
        return DDist({3: 0.1, 0: 0.5, 4: 0.4})
    elif s == 4:
        return DDist({4: 1.0})
class AlphabetDist:
    # 	Defines a distribution over elements in the alphabet,
    #		a conditional distribution conditioned on a letter
    #		in the alphabet, and a likelihood function for an
    #		encoded string given an encoding
    #	Assumes f is a table
    def __init__(self, floor=-10, letter_dist=None, transition_matrix=None):
        self.letter_dist = letter_dist
        self.tranisition_matrix = transition_matrix  #function
        self.alphabet = letter_dist.alphabet() if letter_dist else None
        self.letter_to_index = {
            self.alphabet[i]: i
            for i in range(len(self.alphabet))
        } if letter_dist else None
        self.floor = floor

    def handle_inf(self, x):
        return x if x != -np.inf else self.floor

    #	Generates the letter_dist from a csv
    def generate_dists(self, alphabet_file, letter_file, matrix_file):
        alphabet = np.genfromtxt(alphabet_file, delimiter=",", dtype="unicode")
        letter_prob = np.genfromtxt(letter_file, delimiter=",")
        matrix = np.genfromtxt(matrix_file, delimiter=",")
        self.alphabet = alphabet
        self.letter_to_index = {
            self.alphabet[i]: i
            for i in range(len(self.alphabet))
        }
        self.letter_dist = DDist(
            {alphabet[i]: letter_prob[i]
             for i in range(len(alphabet))})
        self.transition_matrix = matrix

    #	Returns the probability of observing x
    def prob_letter(self, x):
        return self.handle_inf(np.log(self.letter_dist.prob(x)))

    #	Returns the probability of observing x_prime after x
    def conditional_prob(self, x, x_prime):
        return self.handle_inf(
            np.log(self.transition_matrix[self.letter_to_index[x_prime],
                                          self.letter_to_index[x]]))

    #	Returns the conditional distribution of x_prime given x
    #	O(m)
    def conditional_dist(self, x):
        index = self.letter_to_index[x]
        probabilities = self.transition_matrix[:, index]
        return DDist({
            self.alphabet[i]: probabilities[i]
            for i in range(len(self.alphabet))
        })

    #	Returns the likelihood of an encoded sequence given the encoding table f
    #	creates the inverted table, computes product of conditional probabilities
    # 	O(max(m, |encoded_seq|))
    def log_likelihood(self, encoded_seq, f):
        n = len(encoded_seq)
        f_inv = invert_mapping(f, self.alphabet)
        decoded_seq = [f_inv[x] for x in encoded_seq]
        output = self.prob_letter(decoded_seq[0])
        for i in range(1, n):
            output = output + self.conditional_prob(decoded_seq[i - 1],
                                                    decoded_seq[i])
        return output

    #	Returns a mapping that results from doing frequency analysis on the ciphertext
    def frequency_analysis(self, ciphertext):
        n = len(ciphertext)
        empirical_counts = {letter: 0 for letter in self.alphabet}
        for letter in ciphertext:
            empirical_counts[letter] += 1
        empirical_distribution = sorted(list({
            letter: float(empirical_counts[letter]) / n
            for letter in self.alphabet
        }.items()),
                                        key=lambda x: x[1])
        true_distribution = sorted(list(
            self.letter_dist.to_dictionary().items()),
                                   key=lambda x: x[1])
        frequency_mapping = {}
        for i in range(len(true_distribution)):
            frequency_mapping[true_distribution[i]
                              [0]] = empirical_distribution[i][0]
        return frequency_mapping