Exemplo n.º 1
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    ave_xs = mean(xs)
    ave_ys = mean(ys)
    s_xx = sum([(x - ave_xs) ** 2 for x in xs])
    s_yy = sum([(y - ave_ys) ** 2 for y in ys])
    s_xy = sum([(x - ave_xs) * (y - ave_ys) for x, y in zip(xs, ys)])
    b, r_squared = s_xy / s_xx, s_xy ** 2 / (s_xx * s_yy)
    a = ave_ys - b * ave_xs
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
def experiment3(trials=10):
    mu = -10
    Ne = 5
    L = 10
    sigma = 1
    codes = [sample_code(L, sigma) for i in range(trials)]
    pssms = [sample_matrix(L, sigma) for i in range(trials)]
    sites = [random_site(L) for i in xrange(10000)]
    apw_site_sigmas = [sd([score(code,site) for site in sites]) for code in codes]
    linear_site_sigmas = [sd([score_seq(pssm,site) for site in sites]) for pssm in pssms]
    def apw_phat(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def apw_occ(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))
    def linear_phat(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_occ(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))
    apw_mean_fits = [exp(mean(map(log10, mh(lambda s:apw_phat(code, s), proposal=mutate_site, x0=random_site(L),
                                          capture_state = lambda s:apw_occ(code, s))[1:])))
                         for code in tqdm(codes)]
    linear_mean_fits = [exp(mean(map(log10, mh(lambda s:linear_phat(pssm, s), proposal=mutate_site, x0=random_site(L),
                                             capture_state = lambda s:linear_occ(pssm, s))[1:])))
                        for pssm in tqdm(pssms)]
    plt.scatter(apw_site_sigmas, apw_mean_fits, label='apw')
    plt.scatter(linear_site_sigmas, linear_mean_fits, color='g',label='linear')
    plt.legend(loc='lower right')
def plot_results_dict_gini_qq(results_dict,filename=None):
    bios = []
    maxents = []
    uniforms = []
    for i,k in enumerate(results_dict):
        g1,g2,tf = k.split("_")
        genome = g1 + "_" + g2
        bio_motif = extract_tfdf_sites(genome,tf)
        bio_ic = motif_ic(bio_motif)
        bio_gini = motif_gini(bio_motif)
        d = results_dict[k]
    minval = min(bios+maxents+uniforms)
    maxval = max(bios+maxents+uniforms)
    plt.xlabel("Observed Gini Coefficient")
    plt.ylabel("Mean Sampled Gini Coefficient")
    plt.legend(loc='upper left')
    print "bio vs maxent:",pearsonr(bios,maxents)
    print "bio vs uniform:",pearsonr(bios,uniforms)
def sigma_Ne_contour_plot(filename=None):
    sigmas = np.linspace(0,5,20)
    Nes = np.linspace(1,20,20)
    L = 10
    n = 50
    copies = 10*n
    trials = 100
    motifss = [[[(sample_motif(sigma, Ne, L, copies, n))
               for i in range(trials)]
          for sigma in sigmas] for Ne in tqdm(Nes)]
    occ_M = [[expected_occupancy(sigma, Ne, L, copies)
          for sigma in sigmas] for Ne in tqdm(Nes)]
    print "ic_M"
    ic_M = mmap(lambda ms:mean(map(motif_ic,ms)),motifss)
    print "gini_M"
    gini_M = mmap(lambda ms:mean(map(motif_gini,ms)),motifss)
    print "mi_M"
    mi_M = mmap(lambda ms:mean(map(total_motif_mi,ms)),tqdm(motifss))
def moran_process(N=1000,turns=10000,mean_site_muts=1,mean_rec_muts=1,init=sample_species,mutate=mutate,
    #ringer = (np.array([1]+[0]*(K-1)),sample_eps())
    if pop is None:
        pop = [(lambda spec:(spec,fitness(spec)))(init())
               for _ in trange(N)]
    # ringer = make_ringer()
    # pop[0] = (ringer,fitness(ringer))
    #pop = [(ringer,fitness(ringer)) for _ in xrange(N)]
    site_mu = min(1/float(n*L) * mean_site_muts,1)
    rec_mu = min(1/float(K) * mean_rec_muts,1)
    hist = []
    for turn in xrange(turns):
        fits = [f for (s,f) in pop]
        #print fits
        birth_idx = inverse_cdf_sample(range(N),fits,normalized=False)
        if birth_idx is None:
            return pop
        death_idx = random.randrange(N)
        #print birth_idx,death_idx
        mother,f = pop[birth_idx]
        daughter = mutate(mother,site_mu,rec_mu)
        #print "mutated"
        pop[death_idx] = (daughter,fitness(daughter))
        mean_fits = mean(fits)
        if turn % hist_modulus == 0:
            mean_dna_ic = mean([motif_ic(sites,correct=False) for ((sites,eps),_) in pop])
            mean_rec = mean([recognizer_promiscuity(x) for (x,f) in pop])
            mean_recced = mean([sites_recognized((dna,rec)) for ((dna,rec),_) in pop])
            if turn % print_modulus == 0:
                print turn,"sel_fit:",f,"mean_fit:",mean_fits,"mean_dna_ic:",mean_dna_ic,"mean_rec_prom:",mean_rec
    return pop,hist
 def test_mean(self):
     Test calculating arithmetic mean.
     self.assertEqual(0, utils.mean([]))
     self.assertEqual(2.5, utils.mean([5, 0]))
     self.assertAlmostEqual(1.914213, utils.mean([8**0.5, 1]), places=5)
def evo_sim_experiment(filename=None):
    """compare bio motifs to on-off evosims"""
    tfdf = extract_motif_object_from_tfdf()
    bio_motifs = [getattr(tfdf,tf) for tf in tfdf.tfs]
    evosims = [spoof_motif(motif,num_motifs=100,Ne_tol=10**-4)
               for motif in tqdm(bio_motifs)]
    evo_ics = [mean(map(motif_ic,sm)) for sm in tqdm(evosims)]
    evo_ginis = [mean(map(motif_gini,sm)) for sm in tqdm(evosims)]
    evo_mis = [mean(map(total_motif_mi,sm)) for sm in tqdm(evosims)]
    plt.title("Motif IC (bits)")
    plt.xlabel("Biological Value")
    plt.ylabel("Simulated Value")
    plt.title("Motif Gini Coefficient")
    plt.xlabel("Biological Value")
    plt.ylabel("Simulated Value")
    plt.xlabel("Biological Value")
    plt.ylabel("Simulated Value")
    plt.title("Pairwise Motif MI (bits)")
    return evosims
