예제 #1
0
def main():
    run_data = {}
    run_id = 0

    scale = 0.5
    emissions_normal = { 1: Normal(0, 2.0 * scale),
                         2: Normal(3.5, 3.0 * scale),
                         3: Normal(6.5, 1.0 * scale) }
    emissions_laplace = { 1: Laplace(0, 2.0 * scale),
                          2: Laplace(3.5, 3.0 * scale),
                          3: Laplace(6.5, 1.0 * scale) }
    emission_spec = emissions_normal
    dists = [Normal(max_sigma = 6.0) for n in range(3)]
    num_state_reps = 50
    num_emission_reps = 4
    num_gamma_init_reps = 4
    num_blocks = [1, 2, 5, 10, 20, 50]
    verbose = False
    graphics_on = False

    total_work = (num_state_reps * num_emission_reps *
                  2 * num_gamma_init_reps * len(num_blocks))

    work = 0
    for state_rep in range(num_state_reps):
        print 'State repetition %d' % state_rep

        # Generate HMM states
        while True:
            model = HMM([('Start', (1,),          (1.0,)),
                         (1,       (1,2,3),       (0.98, 0.02, 0.0)),
                         (2,       (1,2,3),       (0.02, 0.95,  0.03)),
                         (3,       (1,2,3,'End'), (0.03,  0.03,  0.93, 0.01))],
                    emission_spec)
            model.simulate()
            num_data = len(model.state_vec)
            if num_data < 5000 and num_data > 100: break

        counts = {}
        for state in model.state_vec:
            if not state in counts:
                counts[state] = 0
            counts[state] += 1
        if verbose: print 'Counts: %s' % str(counts)

        # Generate shuffled indices for repeatable shuffling
        shuffling = np.arange(num_data)
        np.random.shuffle(shuffling)
        
        for emission_rep in range(num_emission_reps):
            if verbose: print 'Emission repetition %d' % emission_rep
            model.emit()

            for shuffled in [False, True]:
                if verbose: print 'Shuffling HMM run: %s' % str(shuffled)
                states = np.array(model.state_vec)
                emissions = np.array(model.emission_vec)
                if shuffled:
                    states = states[shuffling]
                    emissions = emissions[shuffling]
                
                for num_block in num_blocks:
                    if verbose: print 'Blocks: %d' % num_block

                    blocks = np.array_split(np.arange(num_data), num_block)
                    
                    for gamma_rep in range(num_gamma_init_reps):
                        if verbose: print 'Initial gamma seed: %d' % gamma_rep

                        init_gamma = np.array(states) - 1

                        run_id += 1
                        this_run = {}

                        this_run['num data'] = num_data
                        this_run['state rep'] = state_rep
                        this_run['emission rep'] = emission_rep
                        this_run['shuffled'] = shuffled
                        this_run['blocks'] = num_block
                        this_run['gamma init rep'] = gamma_rep

                        start_time = time.clock()
                        results = em(emissions,
                                     dists,
                                     blocks = blocks,
                                     gamma_seed = gamma_rep,
                                     init_gamma = init_gamma,
                                     count_restart = 0.0)
                        pi = results['pi']
                        dists = results['dists']
                        reps = results['reps']
                        conv = results['converged']
                        run_time = time.clock() - start_time
                        this_run['run time'] = run_time
                        this_run['reps'] = reps

                        conv_status = conv and 'converged' or 'not converged'
                        this_run['convergence'] = conv_status

                        print 'Reps: %d (%s)' % (reps, conv_status)
                        print 'Time elapsed: %.2f' % run_time
                        if verbose: print_mixture(pi, dists)

                        if graphics_on:
                            display_densities(emissions, dists)
                            display_hist(emissions, dists)

                        act = emission_spec.values()
                        this_run['err mean max'] = max_error_mean(dists, act)
                        this_run['err mean mean'] = mean_error_mean(dists, act)

                        like = np.zeros(num_data)
                        pi_overall = np.mean(pi, 0)
                        for p, dist in zip(pi_overall, dists):
                            like += p * dist.density(states)
                        this_run['log likelihood'] = np.sum(np.log(like))

                        like = np.zeros(num_data)
                        for i, block in enumerate(blocks):
                            for p, dist in zip(pi[i], dists):
                                comp = p * dist.density(states[block])
                                like[block] += comp
                        this_run['log likelihood local'] = np.sum(np.log(like))

                        run_data[run_id] = this_run

                        work += 1
                        print 'Finished run %d/%d' % (work, total_work)

    # Output data to CSV
    cols = set()
    for id in run_data:
        for k in run_data[id]:
            cols.add(k)
    with open('outfile.csv', 'wb') as f:
        writer = csv.writer(f)
        writer.writerow(list(cols))
        writer.writerows([[run_data[id][c] for c in cols] for id in run_data])
예제 #2
0
# Do EM
results = em(noisy_emissions,
             [NormalFixedMean(m, max_sigma=max_sigma) for m in range(256)],
             count_restart=count_restart,
             blocks=blocks)
dists = results['dists']
pi = results['pi']
print 'Iterations: %(reps)d' % results

gamma = np.transpose(results['gamma'])
means = np.array([d.mean() for d in dists])
sds = np.array([d.sd() for d in dists])

# Display summary figures
display_densities(real_emissions, dists)

# Reconstruct with argmax
im_argmax = Image.new('L', (width, height))
reconstruct_argmax = means[np.argmax(gamma, axis=1)]
im_argmax.putdata(reconstruct_argmax)
summary.paste(im_argmax, (10, 40 + height))

# Reconstruct with weighted average
im_avg = Image.new('L', (width, height))
reconstruct_avg = [np.average(means, weights=g, axis=0) for g in gamma]
im_avg.putdata(reconstruct_avg)
summary.paste(im_avg, (30 + width, 40 + height))

# Show summary image
summary.show()
예제 #3
0
                         model,
                         count_restart = count_restart,
                         blocks = blocks,
                         init_gamma = init_gamma,
                         pi_max = pi_max)
            print 'Iterations: %d (%s)' % (results['reps'], block_strategy)
            dists = results['dists']
            pi = results['pi']

            # Display results
            if show_each:
                for p, d in zip(np.transpose(pi), dists):
                    print '%s: %s' % (p, d.display())
                print
            if graphics:
                display_densities(data.reshape((dim*dim,)), dists)
                display_hist(data.reshape((dim*dim,)), dists)

            # Compute errors
            errors[mu][block_strategy].append(rmse(dists, mu))

# Summarize runs
errs, confs = {}, {}
for block_strategy in block_strategies:
    errs[block_strategy], confs[block_strategy] = [], []
for mu in mus:
    print 'mu = %.2f' % mu
    for block_strategy in block_strategies:
        error = np.array(errors[mu][block_strategy])
        err, conf = np.mean(error), 2.0 * np.std(error) / np.sqrt(reps)
        errs[block_strategy].append(err)
예제 #4
0
                 init_reps = em_steps,
                 max_reps = em_steps,
                 pi_max = pi_max,
                 trace = True)
    if show_each:
        print 'Iterations: %(reps)d' % results
    dists, dists_trace = results['dists'], results['dists_trace']
    pi, pi_trace = results['pi'], results['pi_trace']

    # Display results
    if show_each:
        for p, d in zip(np.transpose(pi), dists):
            print '%s: %s' % (p, d.display())
        print
    if graphics:
        display_densities(data, dists)
        display_hist(data, dists)
    if plot_trace:
        pi_trace = np.array(pi_trace)
        v = np.transpose(pi_trace[:,:,0])

        u = np.empty((em_steps+1))
        w = np.empty((num_blocks, (em_steps+1)))
        m = np.empty((2, em_steps+1))
        for t in range((em_steps+1)):
            d = dists_trace[t]
            m1, m2 = d[0].mean(), d[1].mean()
            u[t] = m1 - m2
            m[0,t] = m1
            m[1,t] = m2
            for b in range(num_blocks):
예제 #5
0
                         model,
                         count_restart=count_restart,
                         blocks=blocks,
                         init_gamma=init_gamma,
                         pi_max=pi_max)
            print 'Iterations: %d (%s)' % (results['reps'], block_strategy)
            dists = results['dists']
            pi = results['pi']

            # Display results
            if show_each:
                for p, d in zip(np.transpose(pi), dists):
                    print '%s: %s' % (p, d.display())
                print
            if graphics:
                display_densities(data.reshape((dim * dim, )), dists)
                display_hist(data.reshape((dim * dim, )), dists)

            # Compute errors
            errors[mu][block_strategy].append(rmse(dists, mu))

# Summarize runs
errs, confs = {}, {}
for block_strategy in block_strategies:
    errs[block_strategy], confs[block_strategy] = [], []
for mu in mus:
    print 'mu = %.2f' % mu
    for block_strategy in block_strategies:
        error = np.array(errors[mu][block_strategy])
        err, conf = np.mean(error), 2.0 * np.std(error) / np.sqrt(reps)
        errs[block_strategy].append(err)
예제 #6
0
# Do EM
results = em(noisy_emissions,
             [NormalFixedMean(m, max_sigma = max_sigma) for m in range(256)],
             count_restart = count_restart,
             blocks = blocks)
dists = results['dists']
pi = results['pi']
print 'Iterations: %(reps)d' % results

gamma = np.transpose(results['gamma'])
means = np.array([d.mean() for d in dists])
sds = np.array([d.sd() for d in dists])

# Display summary figures
display_densities(real_emissions, dists)

# Reconstruct with argmax
im_argmax = Image.new('L', (width, height))
reconstruct_argmax = means[np.argmax(gamma, axis=1)]
im_argmax.putdata(reconstruct_argmax)
summary.paste(im_argmax, (10, 40 + height))

# Reconstruct with weighted average
im_avg = Image.new('L', (width, height))
reconstruct_avg = [np.average(means, weights=g, axis=0) for g in gamma]
im_avg.putdata(reconstruct_avg)
summary.paste(im_avg, (30 + width, 40 + height))

# Show summary image
summary.show()
예제 #7
0
                 init_reps=em_steps,
                 max_reps=em_steps,
                 pi_max=pi_max,
                 trace=True)
    if show_each:
        print 'Iterations: %(reps)d' % results
    dists, dists_trace = results['dists'], results['dists_trace']
    pi, pi_trace = results['pi'], results['pi_trace']

    # Display results
    if show_each:
        for p, d in zip(np.transpose(pi), dists):
            print '%s: %s' % (p, d.display())
        print
    if graphics:
        display_densities(data, dists)
        display_hist(data, dists)
    if plot_trace:
        pi_trace = np.array(pi_trace)
        v = np.transpose(pi_trace[:, :, 0])

        u = np.empty((em_steps + 1))
        w = np.empty((num_blocks, (em_steps + 1)))
        m = np.empty((2, em_steps + 1))
        for t in range((em_steps + 1)):
            d = dists_trace[t]
            m1, m2 = d[0].mean(), d[1].mean()
            u[t] = m1 - m2
            m[0, t] = m1
            m[1, t] = m2
            for b in range(num_blocks):