Exemplo n.º 1
0
def dHdt(ps,Mut=None,dt=10**-6):
    """Return instantaneous rate of change of entropy"""
    L = int(log(len(ps),4))
    if Mut is None:
        Mut = mutation_matrix_ref(mu=1,w=L)
    psp = mutate(ps,dt,Mut)
    return (h(psp)-h(ps))/dt
Exemplo n.º 2
0
def flow_to_h(hf,p0,tol=10**-2,eta=10**-6):
    """Given initial point p0, pursue gradient flow until reaching final entropy hf"""
    p = np.copy(p0)
    hp = h(p0)
    iterations = 0
    while abs(hp-hf) > tol:
        g = grad(p)
        p += g*(hf-hp) * eta
        hp = h(p)
        # if iterations % 1000 == 0:
        #     print "p:",p,"g:",g*eta,hp,np.linalg.norm(g*eta)
        iterations += 1
        if np.any(np.isnan(p)):
            return None
    return p
def flow_to_h(hf,p0,tol=10**-2,eta=10**-6):
    """Given initial point p0, pursue gradient flow until reaching final entropy hf"""
    p = np.copy(p0)
    hp = h(p0)
    iterations = 0
    while abs(hp-hf) > tol:
        g = grad(p)
        p += g*(hf-hp) * eta
        hp = h(p)
        # if iterations % 1000 == 0:
        #     print "p:",p,"g:",g*eta,hp,np.linalg.norm(g*eta)
        iterations += 1
        if np.any(np.isnan(p)):
            return None
    return p
Exemplo n.º 4
0
def plot_h_vs_ic(L,sigmas=interpolate(0.1,10,100),max_h=None,M=None,xfunc=lambda ps:2*L):
    if max_h is None:
        print "generating samples"
        pss = [simplexify_sample(4**L,sigma=sigma)
               for sigma in tqdm(sigmas)]
    else:
        pss = []
        while len(pss) < trials:
            ps = sample(L)
            if h(ps) < max_h:
                pss.append(ps)
                print len(pss)
    print "computing M"
    if M is None:
        M = marginalization_matrix(L)
    icq_s = map(lambda ps:ic(ps,M),tqdm(pss))
    print "computing entropy"
    icp_s = map(lambda ps:2*L - h_np(ps),tqdm(pss))
    # print "computing total mi"
    # mis = map(lambda ps:total_mi(ps,M),tqdm(pss))
    # print "computing columnwise entropies"
    # hqs = map(lambda ps:psfm_entropy(ps,M),tqdm(pss))
    # plt.scatter(hs,hqs)
    plt.scatter(icp_s,icq_s)
    #plt.plot([0,2*L],[2*L,0])
    #plt.plot([0,2*L],[0,2*L])
    # plt.plot([0,2],[0,4])
    # plt.plot([0,2],[0,2*L])
    # print pearsonr(ics,hs)
    # print spearmanr(ics,hs)
    plt.plot([0,2*L],[0,2*L])
    plt.plot(*pl(lambda icp:L*icp+2*(L-L**2),[2*(L-1),2*L]),color='b')
    plt.xlabel("Distribution IC")
    plt.ylabel("PSFM IC")
    plt.title("Distribution vs. Columnwise IC, Length=%s" % L)
Exemplo n.º 5
0
    def _perform_error_estimation(self):
        # print('Performing error estimation...', end='\r')

        error_estimation_indices = communication.receive_list(self.cqc, self.sender_pkey)
        sender_key_part = communication.receive_binary_list(self.cqc, self.sender_pkey)

        num_errors = 0.0
        key_part = []
        for i in range(0, len(error_estimation_indices)):
            key_part.append(self.sifted_key[error_estimation_indices[i]])
            if sender_key_part[i] != key_part[i]:
                num_errors += 1.0

        communication.send_binary_list(self.cqc, self.sender, self.skey, key_part)

        self.error_estimation = num_errors / len(key_part)
        # print('B Performing error estimation... Done!')
        # print('B Error rate = {}'.format(self.error_estimation))

        error_estimation_indices.sort()
        self.sifted_key = utils.remove_indices(self.sifted_key, error_estimation_indices)
        remaining_bits = len(self.sifted_key)
        min_entropy = remaining_bits * (1 - utils.h(self.error_estimation))
        max_key = min_entropy - 2 * utils.log(1/self.security_param, 2) - 1

        return self.n <= max_key
Exemplo n.º 6
0
def hdist(desired_ent,n):
    """Return distribution on n outcomes with entropy <= h (bits)"""
    ent = n
    while(ent > desired_ent):
        ps = simplex_sample(n)
        ent = h(ps)
    return ps
Exemplo n.º 7
0
    def _perform_error_estimation(self):
        print('Performing error estimation...', end='\r')
        error_estimation_indices = []
        key_part = []
        for i in range(0, self.n):
            r = random.randint(0, len(self.sifted_key) - 1)
            while r in error_estimation_indices:
                r = random.randint(0, len(self.sifted_key) - 1)
            error_estimation_indices.append(r)
            key_part.append(self.sifted_key[r])

        communication.send_message(self.cqc, self.receiver, self.skey,
                                   error_estimation_indices)
        communication.send_binary_list(self.cqc, self.receiver, self.skey,
                                       key_part)
        receiver_key_part = communication.receive_binary_list(
            self.cqc, self.receiver_pkey)

        num_errors = 0.0
        for i in range(0, len(key_part)):
            if receiver_key_part[i] != key_part[i]:
                num_errors += 1.0

        self.error_estimation = num_errors / len(key_part)
        print('Performing error estimation... Done!')
        print('Error rate = {}'.format(self.error_estimation))

        error_estimation_indices.sort()
        self.sifted_key = utils.remove_indices(self.sifted_key,
                                               error_estimation_indices)
        remaining_bits = len(self.sifted_key)
        min_entropy = remaining_bits * (1 - utils.h(self.error_estimation))
        max_key = min_entropy - 2 * utils.log(1 / self.security_param, 2) - 1

        return self.n <= max_key
def stationary_stat_neglect_fg(matrix,n,Ne,T,samples=1000):
    acc = 0
    Z = 0
    ws = []
    for sample in trange(samples):
        motif = sample_motif_neglect_fg(matrix,n,Ne)
        log_f = log_fitness(matrix,motif,G)
        log_q = dsample_motif_neglect_fg(matrix,motif,Ne)
        t = T(motif)
        w = exp(log_f - log_q)
        acc += t * w
        Z += w
        print acc/Z
        ws.append(w)
    print "entropy of samples:",h(normalize(ws)), log(samples) - h(normalize(ws))
    return acc/Z
def entropy_from_ps(ps, N):
    K = len(ps)
    ns = [0] * K
    xs = range(K)
    for i in xrange(N):
        j = inverse_cdf_sample(xs, ps)
        ns[j] += 1
    return h([n/float(N) for n in ns])
def rvector(beta, K=4):
    #trials = 0
    while True:
        #trials += 1
        p = simplex_sample(K)
        if random.random() < exp(-beta*h(p)):
            #print "acceptance rate:",1/float(trials)
            return p
def rvector(beta, K=4):
    #trials = 0
    while True:
        #trials += 1
        p = simplex_sample(K)
        if random.random() < exp(-beta * h(p)):
            #print "acceptance rate:",1/float(trials)
            return p
Exemplo n.º 12
0
def mean_ic_from_eps(cs,n,L):
    """compute approximate information content of motif with c mismatches in each site"""
        #"""Should depend only on permutations, not on substitutions"""
        # cs counts number of MISMATCHES in each site
    p_mismatch = 1.0/(n*L)*sum(cs)
    p_match = 1 - p_mismatch
    col_ent = h([p_match,p_mismatch/3.0,p_mismatch/3.0,p_mismatch/3.0])
    return L*(2 - col_ent)
def entropy_from_ps(ps, N):
    K = len(ps)
    ns = [0] * K
    xs = range(K)
    for i in xrange(N):
        j = inverse_cdf_sample(xs, ps)
        ns[j] += 1
    return h([n / float(N) for n in ns])
Exemplo n.º 14
0
def astar(draw, grid, start, end):
    count = 0
    frontier = PriorityQueue()
    frontier.put((0, count, start))
    frontier_hash = {start}
    g_score = {spot: float("inf") for row in grid for spot in row}
    g_score[start] = 0
    f_score = {spot: float("inf") for row in grid for spot in row}
    f_score[start] = utils.h(start.get_pos(), end.get_pos())
    came_from = {}

    while not frontier.empty():
        for event in pygame.event.get():
            if utils.is_quit(event):
                pygame.quit()

        current = frontier.get()[2]
        frontier_hash.remove(current)

        if current == end:
            utils.reconstruct_path(came_from, end, draw)
            start.make_start()
            end.make_end()
            return True

        for neighbor in current.neighbors:
            temp_g_score = g_score[current] + 1

            if temp_g_score < g_score[neighbor]:
                came_from[neighbor] = current
                g_score[neighbor] = temp_g_score
                f_score[neighbor] = temp_g_score + utils.h(
                    neighbor.get_pos(), end.get_pos())
                if neighbor not in frontier_hash:
                    count += 1
                    frontier.put((f_score[neighbor], count, neighbor))
                    frontier_hash.add(neighbor)
                    neighbor.make_visited()

        draw()

        if current != start:
            current.make_expanded()

    return False
Exemplo n.º 15
0
def mean_ic_from_eps(cs, n, L):
    """compute approximate information content of motif with c mismatches in each site"""
    #"""Should depend only on permutations, not on substitutions"""
    # cs counts number of MISMATCHES in each site
    p_mismatch = 1.0 / (n * L) * sum(cs)
    p_match = 1 - p_mismatch
    col_ent = h(
        [p_match, p_mismatch / 3.0, p_mismatch / 3.0, p_mismatch / 3.0])
    return L * (2 - col_ent)
Exemplo n.º 16
0
def minimize_dHdt_test():
    L = 4
    K = int(4**L)
    for i in range(10):
        ps = np.array(simplex_sample(K))
        print "marginalizing"
        qs = qs_from_psfm(marginalize(ps))
        print "sampling wtih given entropy"
        rs = sample_with_given_entropy(K,h(qs),tol_factor=10**-6)
        print "minimizing"
        qsp = minimize_dHdt(qs)
        rsp = minimize_dHdt(rs)
        print "qs:",dHdt(qs),dHdt(qsp)
        print "rs:",dHdt(rs),dHdt(rsp)
Exemplo n.º 17
0
def minimize_dHdt(ps):
    converged = False
    eta = 10**-10
    dt = 10**-6
    hist = []
    while not converged:
        print dHdt(ps),sum(ps),h(ps),min(ps)
        bvs = entropic_isocontour(ps)
        dHp = dHdt(ps)
        grad = [(dHdt(ps + bv*dt)-dHp)/dt for bv in bvs]
        dp = sum(bv*g for (bv,g) in zip(bvs,grad))
        print "sum dp:",sum(dp)
        ps = ps + -dp*eta
        if abs(sum(dp)) < 10**-100:
            converged = True
    return ps
def plot_h_vs_ic(num_cols,trials,max_h=None):
    if max_h is None:
        pss = [sample(num_cols) for i in tqdm(range(trials))]
    else:
        pss = []
        while len(pss) < trials:
            ps = sample(num_cols)
            if h(ps) < max_h:
                pss.append(ps)
                print len(pss)
    ics = map(ic,tqdm(pss))
    hs = map(h,tqdm(pss))
    plt.scatter(hs,ics)
    plt.plot([0,2*num_cols],[2*num_cols,0])
    print pearsonr(ics,hs)
    plt.xlabel("Entropy")
    plt.ylabel("IC")
Exemplo n.º 19
0
def predict_ic(matrix, mu, Ne, N=100):
    nu = Ne - 1
    ep_min, ep_max, L = sum(map(min, matrix)), sum(map(max,
                                                       matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) * dnorm(
        ep, 0, site_sigma) * (ep_min <= ep <= ep_max)
    d_density = lambda ep: ep / site_sigma**2 + nu / (1 + exp(mu - ep))
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    eps = []
    while len(eps) < N:
        ep = random.random() * (ep_max - ep_min) + ep_min
        if random.random() < density(ep) / dmode:
            eps.append(ep)
    #return eps
    des_mean_ep = mean(eps)
    des_mean_ep_analytic = integrate.quad(lambda ep: ep * density(ep), ep_min,
                                          ep_max)

    # print "des_means:", des_mean_ep, des_mean_ep_analytic
    # print "min ep: %s max_ep: %s des_mean_ep: %s" % (ep_min, ep_max, des_mean_ep)
    def mean_ep(lamb):
        try:
            psfm = psfm_from_matrix(matrix, lamb=lamb)
            return sum([
                ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                for (ep, p) in zip(mat_row, psfm_row)
            ])
        except:
            print matrix, lamb
            raise Exception

    try:
        lamb = bisect_interval(lambda l: mean_ep(l) - des_mean_ep, -20, 20)
    except:
        print matrix, mu, Ne
        raise Exception
    tilted_psfm = psfm_from_matrix(matrix, lamb)
    return sum([2 - h(col) for col in tilted_psfm])
Exemplo n.º 20
0
def plot_ic_vs_pairwise_mi(L,sigmas=interpolate(0.01,10,100),max_h=None,M=None):
    if max_h is None:
        print "generating samples"
        pss = [simplexify_sample(4**L,sigma=sigma)
               for sigma in sigmas]
    else:
        pss = []
        while len(pss) < trials:
            ps = sample(L)
            if h(ps) < max_h:
                pss.append(ps)
                print len(pss)
    print "computing M"
    if M is None:
        M = marginalization_matrix(L)
    print "computing ic"
    total_ics = map(lambda ps:total_ic(ps),tqdm(pss))
    psfm_ics = map(lambda ps:2*L-psfm_entropy(ps,M),tqdm(pss))
    print "computing pairwise mi"
    pair_mis = map(lambda ps:pairwise_mi_ref(ps,M),tqdm(pss))
    print "computing total mi"
    total_mis = map(lambda ps:total_mi(ps,M),tqdm(pss))
    # print "computing columnwise entropies"
    # plt.scatter(hs,hqs)
    plt.scatter(psfm_ics,pair_mis,color='g',label='pair MI')
    plt.scatter(psfm_ics,total_mis,label='total MI')
    plt.scatter(psfm_ics,total_ics,color='r',label='Total IC')
    #plt.plot([0,2*L],[2*L,0])
    #plt.plot([0,2*L],[0,2*L])
    # plt.plot([0,2],[0,4])
    # plt.plot([0,2],[0,2*L])
    # print pearsonr(ics,hs)
    # print spearmanr(ics,hs)
    # plt.plot([0,2*L],[0,2*L])
    # plt.plot(*pl(lambda icp:L*icp+2*(L-L**2),[2*(L-1),2*L]))
    plt.xlabel("PSFM IC")
    plt.ylabel("Bits")
    plt.title("Length=%s" % L)
    plt.legend()
Exemplo n.º 21
0
def entropy_hessian_experiment():
    L = 3
    K = int(4**L)
    eps = 10**-6
    ps = np.array(simplex_sample(K))
    qs = qs_from_psfm(marginalize(ps))
    rs = sample_with_given_entropy(K,h(qs),tol_factor=10**-2)
    bvs_p = entropic_isocontour(ps)
    bvs_q = entropic_isocontour(qs)
    bvs_r = entropic_isocontour(rs)
    dHp = dHdt(ps)
    dHps = [dHdt(normalize1(ps + bvp*eps)) for bvp in bvs_p]
    dHq = dHdt(qs)
    dHqs = [dHdt(normalize1(qs + bvq*eps)) for bvq in bvs_q]
    dHr = dHdt(rs)
    dHrs = [dHdt(normalize1(rs + bvr*eps)) for bvr in bvs_r]
    plt.plot(dHps,color='r')
    plt.plot([dHp]*(K-2),color='r',linestyle='--')
    plt.plot(dHqs,color='g')
    plt.plot([dHq]*(K-2),color='g',linestyle='--')
    plt.plot(dHrs,color='b')
    plt.plot([dHr]*(K-2),color='b',linestyle='--')
    plt.show()
Exemplo n.º 22
0
def predict_ic(matrix, mu, Ne, N=100):
    nu = Ne - 1
    ep_min, ep_max, L = sum(map(min,matrix)), sum(map(max,matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) * dnorm(ep,0,site_sigma)*(ep_min <= ep <= ep_max)
    d_density = lambda ep:ep/site_sigma**2 + nu/(1+exp(mu-ep))
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    eps = []
    while len(eps) < N:
        ep = random.random() * (ep_max - ep_min) + ep_min
        if random.random() < density(ep)/dmode:
            eps.append(ep)
    #return eps
    des_mean_ep = mean(eps)
    des_mean_ep_analytic = integrate.quad(lambda ep:ep*density(ep), ep_min, ep_max)
    # print "des_means:", des_mean_ep, des_mean_ep_analytic
    # print "min ep: %s max_ep: %s des_mean_ep: %s" % (ep_min, ep_max, des_mean_ep)
    def mean_ep(lamb):
        try:
            psfm = psfm_from_matrix(matrix, lamb=lamb)
            return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                        for (ep, p) in zip(mat_row, psfm_row)])
        except:
            print matrix, lamb
            raise Exception
    try:
        lamb = bisect_interval(lambda l:mean_ep(l) - des_mean_ep, -20, 20)
    except:
        print matrix, mu, Ne
        raise Exception
    tilted_psfm = psfm_from_matrix(matrix, lamb)
    return sum([2 - h(col) for col in tilted_psfm])
 def f(beta):
     return (2 - (h(rvector(beta)) + K))*L
Exemplo n.º 24
0
 def resample_pop():
     re_obs = Counter(bs(sample)).values()
     return 2**h([v/N for v in re_obs])
Exemplo n.º 25
0
def ic(ps,M=None):
    psfm = marginalize(ps,M)
    return sum(2-h(col) for col in psfm)
Exemplo n.º 26
0
def psfm_entropy(ps,M=None):
    psfm = marginalize(ps,M)
    return sum(h(col) for col in psfm)
def ic(ps):
    psfm = marginalize(ps)
    return sum(2-h(col) for col in psfm)
Exemplo n.º 28
0
def rec_h(rec):
    p = np.sum(rec)/float(len(rec))
    return h([p,1-p])
Exemplo n.º 29
0
def dHdt2(ps):
    L = num_cols_from_vector(ps)
    fs = ps - norm_lap(ps)
    term1 = cross_h(fs,ps)
    term2 = h(ps)
    return  3*L*(term1 - term2)
Exemplo n.º 30
0
def back_propagate(theta1,
                   theta2,
                   train_images,
                   train_labels,
                   nclass,
                   alpha=0.001,
                   lambdaa=0.0007,
                   max_iter=50,
                   act='sig',
                   batch_size=32,
                   logging=1):
    """
    Method of updating the weights in NN Model
    by taking gradients of theta using cost function
    thata = theta - f('theta)

    Mini-batch gradient descent, applied to get the 
    gradient of the theta.
    Here updation of weights use momentum factor(gamma)
    so as to approach global minima faster
    Core of ANN, BackProp..

    @ Parameters:
    -------------
    test_images: np.array
        Contains the test_images whose labels need
        to be predicted
    test_labels: np.array
        Contains the labels(1/0)
        corresponding to selected images
    theta1: np.array
        Contains the trained theta weights
        corresponding to input->hidden layer
    theta2: np.array
        Contains the trained theta weights
        corresponding to hidden->output layer
    train_images: np.array
        Contains the train_images used to learn
        the weights of networks
    train_labels: np.array
        Contains the labels(1/0)
        corresponding to train_images
    nclass: int
        No of unique class present in the 
        training dataset
    alpha: float
        Learning rate, rate at which each gradient
        update take place
    lambdaa: float
        Regularization term which penalizes
        the cost function
    max_iter: int
        No of epochs to be performed on 
        data to learn the weights
    act: str
        Activation function which is applied to 
        the neurons in forward propagation
    batch_size: int
        No of images,labels to be fetched from
        overall data at each iterations for
        updation of weights   
    logging: int
        Steps at which logs are displayed
        or recorded

    @ Returns:
    ----------
    parameters: dict
        trained theta1,theta2 
        and per epoch Loss values

    """

    # Used to store theta1 & theta2
    parameters = {}
    # Momentum Factor
    gamma = 0.9
    # Intial dtheta values used for
    # momentum
    dtheta1, dtheta2 = 0.0, 0.0
    # One-Hot labelling the labels of data
    one_hot = output_encoding(train_labels, nclass)

    # Used to store best theta1 and theta2 values
    # whose error was least in whole epochs
    best_theta1, best_theta2 = (np.zeros((theta1.shape[0], theta1.shape[1])),
                                np.zeros((theta2.shape[0], theta2.shape[1])))

    # Store the value of cost in each epochs
    cost_list = []

    # Global Min Error term
    err = 100.0

    for epoch in np.arange(0, max_iter):
        # Used to print results of result summary
        k = 0
        print
        print('\nOverall Min. Error rate : ' + str(err))
        print
        # Softmax in Final Layer
        for batchX, batchY in get_batch(train_images, one_hot, batch_size):
            m, n = batchX.shape
            a2 = h(theta1, batchX, act)
            a2 = np.insert(a2, 0, 1, axis=0)
            a3 = h(theta2, a2.T, func='softmax')
            eps = alpha / float(m)

            # Error in Hidden and Output Layer
            delta3 = (a3 - batchY) * derivative(a3, 'none')
            delta2 = ((theta2.T).dot(delta3)) * derivative(a2, act)

            # Gradient of Theta Matrices
            ktheta1 = np.dot(delta2[1:, :], batchX)
            ktheta2 = np.dot(delta3, a2.T)

            # Momemtum Part to Accelerate the Learning Rate
            dtheta1 = eps * (ktheta1 + lambdaa * theta1) + gamma * dtheta1
            dtheta2 = eps * (ktheta2 + lambdaa * theta2) + gamma * dtheta2
            theta1 = theta1 - dtheta1
            theta2 = theta2 - dtheta2

            # Cost Per Batch iteration
            cost_epoch = cost(a3, batchY, {
                'Theta1': theta1,
                'Theta2': theta2
            }, lambdaa)
            cost_list.append(cost_epoch)

            # Summary of Back Prop
            if (k % LOGGING_STEPS == 0):
                accuracy = model_score({
                    'Theta1': theta1,
                    'Theta2': theta2
                }, train_images, train_labels, act)
                error = 100.0 - accuracy

                # Error Updation if LEss Error is Discovered
                if (error < err):
                    err = error
                    # Store the best theta of least error
                    best_theta1 = theta1
                    best_theta2 = theta2

# Info of Learning of NN
                print("Epoch " + str(epoch + 1) + " in " + str(k + 1) +
                      " iter" + " | "
                      "Train Error rate: " + str(error) + "%" +
                      " | Batch loss: " + str(cost_epoch))

            k = k + 1

    parameters = {
        'Theta1': best_theta1,
        'Theta2': best_theta2,
        'Loss': cost_list
    }
    return parameters
def entropy_from_partition(part):
    Z = float(sum(part))
    ps = [c/Z for c in part]
    return h(ps)
 def f(beta):
     return (2 - (h(rvector(beta)) + K)) * L
Exemplo n.º 33
0
def mh_sample(K,iterations=50000):
    p0 = np.array(simplex_sample(K))
    f = lambda p:1/h(p)**K
    proposal = lambda p:propose(p,sigma=1)
    return mh(f,proposal,p0,iterations=iterations)
Exemplo n.º 34
0
def entropy_from_counts(counts):
    N = float(sum(counts))
    ps = [c / N for c in counts]
    return h(ps)
Exemplo n.º 35
0
def ic_from_matrix(matrix):
    psfm = psfm_from_matrix(matrix)
    return sum(2 - h(col) for col in psfm)