Пример #1
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    ave_xs = mean(xs)
    ave_ys = mean(ys)
    s_xx = sum([(x - ave_xs) ** 2 for x in xs])
    s_yy = sum([(y - ave_ys) ** 2 for y in ys])
    s_xy = sum([(x - ave_xs) * (y - ave_ys) for x, y in zip(xs, ys)])
    b, r_squared = s_xy / s_xx, s_xy ** 2 / (s_xx * s_yy)
    a = ave_ys - b * ave_xs
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #2
0
def find_centroid(cluster):
    """Return the centroid of the locations of the restaurants in cluster."""
    locations = [restaurant_location(x) for x in cluster]
    lat = [x[0] for x in locations]
    lon = [x[1] for x in locations]
    centroid = [mean(lat), mean(lon)]
    return centroid
Пример #3
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for USER by performing least-squares linear regression using FEATURE_FN
    on the items in RESTAURANTS. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]
    
    mean_x = mean(xs)
    mean_y = mean(ys)

    sxx = sum([pow((feature_fn(r) - mean_x), 2) for r in restaurants])
    syy = sum([(pow((e - mean_y), 2)) for e in ys])
    lst_x = [(feature_fn(r) - mean_x) for r in restaurants] 
    lst_y = [(r - mean_y) for r in ys]
    sxy = sum([lst_x[i] * lst_y[i] for i in range(len(restaurants))])
    b = (sxy)/(sxx)
    a = (mean_y) - (b * mean_x) 
    r_squared = ((sxy)**2)/(sxx * syy)
    
    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
def experiment3(trials=10):
    mu = -10
    Ne = 5
    L = 10
    sigma = 1
    codes = [sample_code(L, sigma) for i in range(trials)]
    pssms = [sample_matrix(L, sigma) for i in range(trials)]
    sites = [random_site(L) for i in xrange(10000)]
    apw_site_sigmas = [sd([score(code,site) for site in sites]) for code in codes]
    linear_site_sigmas = [sd([score_seq(pssm,site) for site in sites]) for pssm in pssms]
    def apw_phat(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def apw_occ(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))
    def linear_phat(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_occ(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))
    apw_mean_fits = [exp(mean(map(log10, mh(lambda s:apw_phat(code, s), proposal=mutate_site, x0=random_site(L),
                                          capture_state = lambda s:apw_occ(code, s))[1:])))
                         for code in tqdm(codes)]
    linear_mean_fits = [exp(mean(map(log10, mh(lambda s:linear_phat(pssm, s), proposal=mutate_site, x0=random_site(L),
                                             capture_state = lambda s:linear_occ(pssm, s))[1:])))
                        for pssm in tqdm(pssms)]
    plt.scatter(apw_site_sigmas, apw_mean_fits, label='apw')
    plt.scatter(linear_site_sigmas, linear_mean_fits, color='g',label='linear')
    plt.semilogy()
    plt.legend(loc='lower right')
Пример #5
0
def parametrize_approx(site,eta=1,tol=10**-2,mono=True,iterations=100000):
    L = len(sites[0])
    mono_fs = [lambda site,i=i,b=b:site[i]==b for i in range(L) for b in bases]
    di_fs = [lambda site,i=i,b1=b1,b2=b2:(site[i]==b1 and site[i+1]==b2)
             for i in range(L-1)
             for b1 in bases
             for b2 in bases]
    if mono:
        fs = mono_fs
    else:
        fs = di_fs
    ys = [mean(f(site) for site in sites) for f in fs]
    lambs = [1 for y in ys]
    err = 1
    while err > tol:
        site_chain = sample_dist(fs,lambs,iterations=iterations)
        yhats = [mean(fi(site) for site in site_chain)
                 for fi in fs]
        lambs_new = [lamb + (yhat - y)*eta for lamb,y,yhat in zip(lambs,ys,yhats)]
        for y,yhat,lamb,lamb_new in zip(ys,yhats,lambs,lambs_new):
            print y,"vs.",yhat,":",lamb,"->",lamb_new
        err = sum((y-yhat)**2 for y,yhat in zip(ys,yhats))
        print "err:",err
        lambs = lambs_new
    return lambs
Пример #6
0
def find_centroid(restaurants):
    """Return the centroid of the locations of RESTAURANTS."""
    "*** YOUR CODE HERE ***"
    list_locations = [restaurant_location(restaurant) for restaurant in restaurants]
    list_lat = [x[0] for x in list_locations]
    list_long = [y[1] for y in list_locations]
    return [mean(list_lat), mean(list_long)]
Пример #7
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for `user` by performing least-squares linear regression using `feature_fn`
    on the items in `restaurants`. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    "*** REPLACE THIS LINE ***"
    x_mean = mean(xs)
    y_mean = mean(ys)
    s_xx = sum([pow(x-x_mean,2) for x in xs])
    s_yy = sum([pow(y-y_mean,2) for y in ys])
    s_xy = sum([(x-x_mean)*(y-y_mean) for x,y in zip(xs,ys)])
    b, a, r_squared = s_xy/s_xx, y_mean - (s_xy/s_xx)*x_mean, (pow(s_xy,2))/(s_xx*s_yy)  # REPLACE THIS LINE WITH YOUR SOLUTION
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #8
0
def find_centroid(restaurants):
    """Return the centroid of the locations of RESTAURANTS."""
    "*** YOUR CODE HERE ***"
    location_list=[restaurant_location(i) for i in restaurants]
    latitude=mean([i[0] for i in location_list])
    longitude=mean([i[1] for i in location_list])
    return [latitude,longitude]
def plot_results_dict_gini_qq(results_dict,filename=None):
    bios = []
    maxents = []
    uniforms = []
    for i,k in enumerate(results_dict):
        g1,g2,tf = k.split("_")
        genome = g1 + "_" + g2
        bio_motif = extract_tfdf_sites(genome,tf)
        bio_ic = motif_ic(bio_motif)
        bio_gini = motif_gini(bio_motif)
        d = results_dict[k]
        bios.append(bio_gini)
        maxents.append(mean(d['maxent']['motif_gini']))
        uniforms.append(mean(d['uniform']['motif_gini']))
    plt.scatter(bios,maxents,label='ME')
    plt.scatter(bios,uniforms,label='TURS',color='g')
    minval = min(bios+maxents+uniforms)
    maxval = max(bios+maxents+uniforms)
    plt.plot([minval,maxval],[minval,maxval],linestyle='--')
    plt.xlabel("Observed Gini Coefficient")
    plt.ylabel("Mean Sampled Gini Coefficient")
    plt.legend(loc='upper left')
    print "bio vs maxent:",pearsonr(bios,maxents)
    print "bio vs uniform:",pearsonr(bios,uniforms)
    maybesave(filename)
Пример #10
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    S_xx, S_yy, S_xy = 0, 0, 0
    mean_x, mean_y = mean(xs), mean(ys)
    for x in xs:
        S_xx += (x - mean_x)**2
    for y in ys:
        S_yy += (y - mean_y)**2
    xsys = zip(xs, ys)
    for x, y in xsys:
        S_xy += (x - mean_x) * (y - mean_y)
    b, a, r_squared = S_xy / S_xx, mean_y - (S_xy / S_xx) * mean_x, S_xy**2 / (S_xx * S_yy)  

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
def sigma_Ne_contour_plot(filename=None):
    sigmas = np.linspace(0,5,20)
    Nes = np.linspace(1,20,20)
    L = 10
    n = 50
    copies = 10*n
    trials = 100
    motifss = [[[(sample_motif(sigma, Ne, L, copies, n))
               for i in range(trials)]
          for sigma in sigmas] for Ne in tqdm(Nes)]
    occ_M = [[expected_occupancy(sigma, Ne, L, copies)
          for sigma in sigmas] for Ne in tqdm(Nes)]
    print "ic_M"
    ic_M = mmap(lambda ms:mean(map(motif_ic,ms)),motifss)
    print "gini_M"
    gini_M = mmap(lambda ms:mean(map(motif_gini,ms)),motifss)
    print "mi_M"
    mi_M = mmap(lambda ms:mean(map(total_motif_mi,ms)),tqdm(motifss))
    plt.subplot(2,2,1)
    plt.contourf(sigmas,Nes,occ_M,cmap='jet')
    plt.colorbar()
    plt.subplot(2,2,2)
    plt.contourf(sigmas,Nes,ic_M,cmap='jet')
    plt.colorbar()
    plt.subplot(2,2,3)
    plt.contourf(sigmas,Nes,gini_M,cmap='jet')
    plt.colorbar()
    plt.subplot(2,2,4)
    plt.contourf(sigmas,Nes,mi_M,cmap='jet')
    plt.colorbar()
    maybesave(filename)
Пример #12
0
def find_centroid(cluster):
    """Return the centroid of the locations of the restaurants in cluster."""
    # BEGIN Question 5
    locations = [restaurant_location(restaurant) for restaurant in cluster]
    mean_lat = mean([loc[0] for loc in locations])
    mean_long = mean([loc[1] for loc in locations])
    return [mean_lat, mean_long]
Пример #13
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(restaurant) for restaurant in restaurants]
    ys = [reviews_by_user[restaurant_name(restaurant)] for restaurant in restaurants]

    # BEGIN Question 7
    def sum(s1, s2):
        result = 0
        for a,b in zip(s1, s2):
            result += (a - mean(s1))*(b - mean(s2))
        return result

    S_xx = sum(xs,xs)
    S_yy = sum(ys, ys)
    S_xy = sum(xs, ys)

    b = S_xy/S_xx
    a = mean(ys) - b * mean(xs)
    r_squared = (S_xy*S_xy)/(S_xx*S_yy)
    # END Question 7
    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #14
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for `user` by performing least-squares linear regression using `feature_fn`
    on the items in `restaurants`. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    mean_x=mean(xs)
    mean_y=mean(ys)
    S_xx, S_xy, S_yy = 0, 0, 0
    for elem in xs:
        S_xx += (elem-mean_x) ** 2
    for elem in ys:
        S_yy += (elem-mean_y) ** 2
    for x,y in zip(xs,ys):
        S_xy+=(x-mean_x) * (y-mean_y)
    b = S_xy/S_xx
    a = mean_y-b * mean_x
    r_squared = S_xy ** 2 / (S_xx * S_yy)  # REPLACE THIS LINE WITH YOUR SOLUTION
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #15
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for USER by performing least-squares linear regression using FEATURE_FN
    on the items in RESTAURANTS. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    "*** YOUR CODE HERE ***"
    b, a, r_squared = 0, 0, 0  # REPLACE THIS LINE WITH YOUR SOLUTION
    

    mean_x = mean(xs) 
    mean_y = mean(ys)
    s_xx = sum([pow(x-mean_x,2) for x in xs]) 
    s_yy = sum([pow(y-mean_y,2) for y in ys]) 
    xy_pair = zip(xs,ys)
    s_xy = sum([(pair[0]-mean_x)*(pair[1]-mean_y) for pair in xy_pair]) 
    b = s_xy/s_xx 
    a = mean_y-b*mean_x
    r_squared = pow(s_xy,2)/(s_xx*s_yy)

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #16
0
def find_centroid(cluster):
    """Return the centroid of the locations of the restaurants in cluster."""
    # BEGIN Question 5
    locations = [restaurant_location(i) for i in cluster]
    longitude = [i[0] for i in locations]
    latitude = [i[1] for i in locations]
    return [mean(longitude), mean(latitude)]
Пример #17
0
def find_centroid(cluster):
    """Return the centroid of the locations of the restaurants in cluster."""
    # BEGIN Question 5
    restaurant_set = [restaurant_location(location) for location in cluster]
    latitude = [location[0] for location in  restaurant_set]
    longitude = [location[1] for location in  restaurant_set]
    return [mean(latitude), mean(longitude)]
Пример #18
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for USER by performing least-squares linear regression using FEATURE_FN
    on the items in RESTAURANTS. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]
    mean_xs = mean(xs)
    mean_ys = mean(ys)

    s_xx = sum([pow(x - mean_xs, 2) for x in xs])
    s_yy = sum([pow(y - mean_ys, 2) for y in ys])
    s_xy = sum([mul(z[0], z[1]) for z in zip([x - mean_xs for x in xs], [y - mean_ys for y in ys])])

    b = s_xy / s_xx
    a = mean_ys - b * mean_xs
    r_squared = pow(s_xy, 2) / mul(s_xx, s_yy)

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
def moran_process(N=1000,turns=10000,mean_site_muts=1,mean_rec_muts=1,init=sample_species,mutate=mutate,
                  fitness=fitness,pop=None,print_modulus=100,hist_modulus=10):
    #ringer = (np.array([1]+[0]*(K-1)),sample_eps())
    if pop is None:
        pop = [(lambda spec:(spec,fitness(spec)))(init())
               for _ in trange(N)]
    # ringer = make_ringer()
    # pop[0] = (ringer,fitness(ringer))
    #pop = [(ringer,fitness(ringer)) for _ in xrange(N)]
    site_mu = min(1/float(n*L) * mean_site_muts,1)
    rec_mu = min(1/float(K) * mean_rec_muts,1)
    hist = []
    for turn in xrange(turns):
        fits = [f for (s,f) in pop]
        #print fits
        birth_idx = inverse_cdf_sample(range(N),fits,normalized=False)
        if birth_idx is None:
            return pop
        death_idx = random.randrange(N)
        #print birth_idx,death_idx
        mother,f = pop[birth_idx]
        daughter = mutate(mother,site_mu,rec_mu)
        #print "mutated"
        pop[death_idx] = (daughter,fitness(daughter))
        mean_fits = mean(fits)
        #hist.append((f,mean_fits))
        if turn % hist_modulus == 0:
            mean_dna_ic = mean([motif_ic(sites,correct=False) for ((sites,eps),_) in pop])
            mean_rec = mean([recognizer_promiscuity(x) for (x,f) in pop])
            mean_recced = mean([sites_recognized((dna,rec)) for ((dna,rec),_) in pop])
            hist.append((turn,f,mean_fits,mean_dna_ic,mean_rec,mean_recced))
            if turn % print_modulus == 0:
                print turn,"sel_fit:",f,"mean_fit:",mean_fits,"mean_dna_ic:",mean_dna_ic,"mean_rec_prom:",mean_rec
    return pop,hist
Пример #20
0
def find_centroid(restaurants):
    """Return the centroid of the locations of RESTAURANTS."""
    "*** YOUR CODE HERE ***"
    locations = [restaurant_location(restaurant) for restaurant in restaurants]
    latitude = [location[0] for location in locations]
    longitude = [location[1] for location in locations]
    return [mean(latitude), mean(longitude)]
Пример #21
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for USER by performing least-squares linear regression using FEATURE_FN
    on the items in RESTAURANTS. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    "*** YOUR CODE HERE ***"
    b, a, r_squared = 0, 0, 0  # REPLACE THIS LINE WITH YOUR SOLUTION
    xs_mean = mean(xs)
    xs_minus_mean = [x - xs_mean for x in xs]
    ys_mean = mean(ys)
    ys_minus_mean = [y - ys_mean for y in ys]
    Sxx = sum([pow(x, 2) for x in xs_minus_mean])
    Syy = sum([pow(y, 2) for y in ys_minus_mean])
    Sxy = sum([x * y for x, y in zip(xs_minus_mean, ys_minus_mean)])
    b = Sxy / Sxx
    a = ys_mean - b * xs_mean
    r_squared = pow(Sxy, 2) / (Sxx * Syy)

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
 def test_mean(self):
     """
     Test calculating arithmetic mean.
     """
     self.assertEqual(0, utils.mean([]))
     self.assertEqual(2.5, utils.mean([5, 0]))
     self.assertAlmostEqual(1.914213, utils.mean([8**0.5, 1]), places=5)
Пример #23
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    xx, yy, sxy = [elem - mean(xs) for elem in xs], [elem - mean(ys) for elem in ys], 0
    # computes the standard deviations of x and y
    sxx, syy = sum(x**2 for x in xx), sum(y**2 for y in yy)
    for a, b in zip(xx, yy):
        sxy += a*b
    
    # finds a and b to solve for predictor function
    b = sxy/sxx
    a = mean(ys) - b*mean(xs)
    r_squared = sxy**2/(sxx*syy)
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #24
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    "*** REPLACE THIS LINE ***"
    # b, a, r_squared = 0, 0, 0  # REPLACE THIS LINE WITH YOUR SOLUTION
    mean_x = mean(xs)
    mean_y = mean(ys)
    xys = zip(xs, ys)
    sxx = sum([(x-mean_x) * (x-mean_x) for x in xs])
    syy = sum([(y-mean_y) * (y-mean_y) for y in ys])
    sxy = sum([(x-mean_x) * (y-mean_y) for x, y in xys])
    b = sxy / sxx
    a = mean_y - b * mean_x
    r_squared = sxy * sxy / (sxx * syy)
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #25
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    "*** REPLACE THIS LINE ***"
    mean_x, mean_y = mean(xs), mean(ys)
    xx_list = [x - mean_x for x in xs]
    yy_list = [y - mean_y for y in ys]
    xy_list = zip(xx_list, yy_list)
    S_xx = sum([xx ** 2 for xx in xx_list])
    S_yy = sum([yy ** 2 for yy in yy_list])
    S_xy = sum([xx * yy for xx, yy in xy_list])
    b = S_xy / S_xx
    a = mean_y - b * mean_x
    r_squared = S_xy ** 2 / (S_xx * S_yy)    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #26
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    x_avg, y_avg = mean(xs), mean(ys)
    Sxx, Syy = sum([(x-x_avg)**2 for x in xs]), sum([(y-y_avg)**2 for y in ys])
    x_y_pairs = zip(xs,ys)
    Sxy = sum([(pair[0]-x_avg)*(pair[1]-y_avg) for pair in x_y_pairs])

    b, r_squared = Sxy/Sxx, Sxy**2/(Sxx*Syy) 
    a = y_avg - b*x_avg

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #27
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    "*** REPLACE THIS LINE ***"
    mean_xs = mean(xs)
    mean_ys = mean(ys)
    list_x = [x - mean_xs for x in xs]
    list_y = [y - mean_ys for y in ys]
    sxx = sum( map(lambda x: x * x, list_x) )
    syy = sum( map(lambda y: y * y, list_y) )
    sxy = sum([a * b for a,b in zip (list_x, list_y)])

    b = sxy / sxx
    a = mean_ys - b * mean_xs
    r_squared =  (sxy ** 2) / (sxx * syy)
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
def evo_sim_experiment(filename=None):
    """compare bio motifs to on-off evosims"""
    tfdf = extract_motif_object_from_tfdf()
    bio_motifs = [getattr(tfdf,tf) for tf in tfdf.tfs]
    evosims = [spoof_motif(motif,num_motifs=100,Ne_tol=10**-4)
               for motif in tqdm(bio_motifs)]
    evo_ics = [mean(map(motif_ic,sm)) for sm in tqdm(evosims)]
    evo_ginis = [mean(map(motif_gini,sm)) for sm in tqdm(evosims)]
    evo_mis = [mean(map(total_motif_mi,sm)) for sm in tqdm(evosims)]
    plt.subplot(1,3,1)
    scatter(map(motif_ic,bio_motifs),evo_ics)
    plt.title("Motif IC (bits)")
    plt.xlabel("Biological Value")
    plt.ylabel("Simulated Value")
    plt.subplot(1,3,2)
    scatter(map(motif_gini,bio_motifs),
            evo_ginis)
    plt.title("Motif Gini Coefficient")
    plt.xlabel("Biological Value")
    plt.ylabel("Simulated Value")
    plt.subplot(1,3,3)
    scatter(map(total_motif_mi,bio_motifs),
            evo_mis)
    plt.xlabel("Biological Value")
    plt.ylabel("Simulated Value")
    plt.title("Pairwise Motif MI (bits)")
    plt.loglog()
    plt.tight_layout()
    plt.savefig(filename)
    return evosims
Пример #29
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}
    #print (reviews_by_user)

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]
    joined_list = zip(xs, ys)
    # BEGIN Question 7
    meanx = mean(xs)
    meany = mean(ys)
    sxx = sum([(x-meanx)**2 for x in xs])
    syy = sum([(y-meany)**2 for y in ys])
    sxy = sum([(xy[0]-meanx)*(xy[1]-meany) for xy in joined_list])

    b = sxy/ sxx
    a = meany - b * meanx
    r_squared = sxy**2 / (sxx * syy)
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #30
0
def find_centroid(cluster):
    """Return the centroid of the locations of the restaurants in cluster."""
    # BEGIN Question 5
    locations = []  # this list should store the locations of all restaurants in cluster
    for restaurant in cluster:
        locations.append(restaurant_location(restaurant))

    return [mean([location[0] for location in locations]), mean([location[1] for location in locations])] 
Пример #31
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }
    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]
    sxx = 0
    for i in xs:
        sxx += (i - mean(xs))**2
    syy = 0
    for i in ys:
        syy += (i - mean(ys))**2
    sxy = 0
    for i in zip(xs, ys):
        sxy += (i[0] - mean(xs)) * (i[1] - mean(ys))
    b = sxy / sxx
    a = mean(ys) - b * mean(xs)
    r_squared = (sxy * sxy) / (sxx * syy)

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #32
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    # Initialize values at 0
    s_xx = 0
    s_yy = 0
    s_xy = 0
    # Compute sums
    for i in range(0, len(xs)):
        s_xx += (xs[i] - mean(xs)) ** 2
        s_yy += (ys[i] - mean(ys)) ** 2
        s_xy += (xs[i] - mean(xs)) * (ys[i] - mean(ys))
    # Compute b, a, and r_squared
    b = s_xy/s_xx
    a = mean(ys) - b * mean(xs)
    r_squared = s_xy**2 / (s_xx * s_yy)
    # END Question 7
    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #33
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for USER by performing least-squares linear regression using FEATURE_FN
    on the items in RESTAURANTS. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    "*** YOUR CODE HERE ***"
    data_points=zip(xs, ys)
    S_xx, S_yy, S_xy = 0, 0, 0
    for x in xs:
        S_xx += pow((x - mean(xs)), 2)
    for y in ys:
        S_yy += pow((y - mean(ys)), 2)
    for x, y in data_points:
        S_xy += (x - mean(xs))*(y - mean(ys))

    b = S_xy/S_xx
    a, r_squared =  mean(ys)-b*mean(xs), pow(S_xy, 2)/(S_xx*S_yy)  # REPLACE THIS LINE WITH YOUR SOLUTION

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #34
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    list_of_x_values = [feature_fn(r) for r in restaurants]
    list_of_y_values = [
        reviews_by_user[restaurant_name(r)] for r in restaurants
    ]

    # BEGIN Question 7
    "*** YOUR CODE HERE ***"
    #b, a, r_squared = 0, 0, 0  # REPLACE THIS LINE WITH YOUR SOLUTION
    #    b, a, r_squared = 0, 0, 0  # REPLACE THIS LINE WITH YOUR SOLUTION
    mean_x = mean(list_of_x_values)
    mean_y = mean(list_of_y_values)
    Sxx = sum((x - mean_x)**2 for x in list_of_x_values)
    Syy = sum((y - mean_y)**2 for y in list_of_y_values)
    Sxy = sum((xy[0] - mean_x) * (xy[1] - mean_y)
              for xy in zip(list_of_x_values, list_of_y_values))
    b = Sxy / Sxx
    a = mean_y - b * mean_x
    r_squared = Sxy**2 / (Sxx * Syy)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #35
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    xs = [feature_fn(r) for r in restaurants]
    ys = [user_rating(user, restaurant_name(r)) for r in restaurants]

    # BEGIN Question 7
    sxx, syy, sxy = 0, 0, 0
    for feature in xs:
        sxx += (feature - mean(xs))**2
    for rating in ys:
        syy += (rating - mean(ys))**2
    for zipped in zip(xs, ys):
        sxy += (zipped[0] - mean(xs)) * (zipped[1] - mean(ys))

    b = sxy / sxx
    a = mean(ys) - b * mean(xs)
    r_squared = sxy**2 / (sxx * syy)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #36
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    s_xx = sum([(x_i - mean(xs))**2 for x_i in xs])
    s_yy = sum([(y_i - mean(ys))**2 for y_i in ys])
    s_xy = sum([(x_i - mean(xs)) * (y_i - mean(ys))
                for x_i, y_i in zip(xs, ys)])
    b, a, r_squared = s_xy / s_xx, mean(ys) - (s_xy / s_xx) * mean(xs), (
        s_xy)**2 / (s_xx * s_yy)  # REPLACE THIS LINE WITH YOUR SOLUTION

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #37
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    pairs = zip(xs, ys)
    s_xx, s_yy, s_xy = 0, 0, 0
    for x, y in pairs:
        s_xx += (x - mean(xs)) ** 2
        s_yy += (y - mean(ys)) ** 2
        s_xy += (x - mean(xs)) * (y - mean(ys))
    b = s_xy / s_xx
    a = mean(ys) - b * mean(xs)
    r_squared = (s_xy ** 2) / (s_xx * s_yy)
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #38
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    predictor f(restaurants) -> ratings
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    #reviews_by_user = {"Soda" : 5, "Seven": 5 ...}
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    Sxx = sum([(xs[i] - mean(xs))**2 for i in range(0, len(xs))])
    Syy = sum([(ys[i] - mean(ys))**2 for i in range(0, len(ys))])
    Sxy = sum([(xs[i] - mean(xs)) * (ys[i] - mean(ys))
               for i in range(0, len(xs))])
    b = Sxy / Sxx  # REPLACE THIS LINE WITH YOUR SOLUTION
    a = mean(ys) - b * mean(xs)
    r_squared = Sxy**2 / (Sxx * Syy)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #39
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    s_xx = sum([(x - mean(xs))**2 for x in xs])
    s_yy = sum([(y - mean(ys))**2 for y in ys])
    #S_XY_GROUP pairs each S_XX of index i with the equivalent Y_XX of index i by using ZIP fxn;
    s_xy_group = zip([(x - mean(xs)) for x in xs],
                     [(y - mean(ys)) for y in ys])
    #now we carry out the multiplication of these, effectively complelting the formula for S_XY
    s_xy = sum([z[0] * z[1] for z in s_xy_group])
    b = s_xy / s_xx
    a = mean(ys) - b * mean(xs)
    r_squared = (s_xy**2) / (s_xx * s_yy)

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #40
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    xs = [feature_fn(r) for r in restaurants]
    ys = [user_rating(user, restaurant_name(r)) for r in restaurants]

    # BEGIN Question 7
    S_xx = sum([(x - mean(xs))**2 for x in xs])
    Sum_x_squares = S_xx
    S_yy = sum([(y - mean(ys))**2 for y in ys])
    Sum_y_squares = S_yy
    S_xy = sum([((x - mean(xs)) * (y - mean(ys))) for x, y, in zip(xs, ys)])
    Sum_both = S_xy
    b = Sum_both / Sum_x_squares
    a = mean(ys) - b * mean(xs)
    r_squared = (Sum_both**2) / (Sum_x_squares * Sum_y_squares)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #41
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    """Calculate the sums of squares, using mean and zip fuctions."""
    sxx = sum([(x - mean(xs))**2 for x in xs])
    syy = sum([(y - mean(ys))**2 for y in ys])
    sxy = sum([(x - mean(xs)) * (y - mean(ys)) for (x, y) in zip(xs, ys)])

    b = sxy / sxx
    a = mean(ys) - b * mean(xs)
    r_squared = sxy**2 / (sxx * syy)

    def predictor(restaurant):
        return a + b * feature_fn(restaurant)

    return predictor, r_squared
Пример #42
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Inputs:
        user: A str user
        restaurants: A list of restaurants
        feature_fn: A function that takes a restaurant and returns a number
    """
    #Dict of reviews by users
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    #Creating xs and ys
    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    #Finding sums
    S_xx = sum([(x - mean(xs))**2 for x in xs])
    S_yy = sum([(y - mean(ys))**2 for y in ys])
    S_xy = sum([(x - mean(xs)) * (y - mean(ys)) for x, y in zip(xs, ys)])

    #Creating variables
    b, a, r_squared = (S_xy /
                       S_xx), (mean(ys) -
                               (S_xy / S_xx) * mean(xs)), ((S_xy)**2 /
                                                           (S_xx * S_yy))

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #43
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]
    zs = zip(xs, ys)

    # BEGIN Question 7
    sxx = (sum((x - mean(xs)) ** 2 for x in xs))
    syy = (sum((y - mean(ys)) ** 2 for y in ys))
    sxy = sum(((a - mean(xs)) * (b - mean(ys))) for a, b in zs)

    b = (sxy / sxx)
    a = (mean(ys) - (b * mean(xs)))
    r_squared = (((sxy) ** 2) / ((sxx) * (syy))) # REPLACE THIS LINE WITH YOUR SOLUTION
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #44
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for USER by performing least-squares linear regression using FEATURE_FN
    on the items in RESTAURANTS. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    "*** YOUR CODE HERE ***"
    sxx = sum([pow((x - mean(xs)), 2) for x in xs])
    syy = sum([pow((y - mean(ys)), 2) for y in ys])
    sxy = sum([(x - mean(xs)) * (y - mean(ys)) for x, y in zip(xs, ys)])

    b = sxy / sxx
    a = mean(ys) - b * mean(xs)
    r_squared = pow(sxy, 2) / (sxx * syy)

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #45
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    xs = [feature_fn(r) for r in restaurants]
    ys = [user_rating(user, restaurant_name(r)) for r in restaurants]

    # BEGIN Question 7
    S_xx = sum([(i - mean(xs))**2 for i in xs])
    S_yy = sum([(i - mean(ys))**2 for i in ys])
    S_xy = sum([(ix - mean(xs)) * (iy - mean(ys)) for ix, iy in zip(xs, ys)])
    '''this part does the sum calculations from the question'''

    b = S_xy / S_xx
    a = mean(ys) - (b * mean(xs))
    r_squared = (S_xy**2) / (S_xx * S_yy)

    # END Question 7
    def predictor(restaurant):

        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #46
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    xs = [feature_fn(r) for r in restaurants]
    ys = [user_rating(user, restaurant_name(r)) for r in restaurants]

    # BEGIN Question 7
    s_xx = sum(pow((x - mean(xs)), 2) for x in xs)
    s_yy = sum(pow((y - mean(ys)), 2) for y in ys)
    s_xy = sum((s[0] - mean(xs)) * (s[1] - mean(ys)) for s in zip(xs, ys))

    b = s_xy / s_xx
    a = mean(ys) - b * mean(xs)
    r_squared = pow(s_xy, 2) / (s_xx * s_yy)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #47
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    "*** REPLACE THIS LINE ***"
    Sxx = sum((i - mean(xs))**2 for i in xs)
    Syy = sum((i - mean(ys))**2 for i in ys)
    z1 = [i - mean(xs) for i in xs]
    z2 = [i - mean(ys) for i in ys]
    zip1 = zip(z1, z2)
    Sxy = sum(q * t for q, t in zip1)
    b = Sxy / Sxx
    a = mean(ys) - (b * mean(xs))
    r_squared = (Sxy**2) / (Sxx * Syy)  # REPLACE THIS LINE WITH YOUR SOLUTION

    # END Question 7
    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #48
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    xs = [feature_fn(r) for r in restaurants]
    ys = [user_rating(user, restaurant_name(r)) for r in restaurants]

    # BEGIN Question 7
    S_xx, S_yy, S_xy = 0, 0, 0
    for x in xs:
        S_xx += (x - mean(xs))**2
    for y in ys:
        S_yy += (y - mean(ys))**2
    for i in range(len(ys)):
        S_xy += (xs[i] - mean(xs)) * (ys[i] - mean(ys))
    b = S_xy / S_xx
    a = mean(ys) - b * mean(xs)
    r_squared = S_xy**2 / (S_xx * S_yy)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #49
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    xs = [feature_fn(r) for r in restaurants]
    ys = [user_rating(user, restaurant_name(r)) for r in restaurants]

    s_xx = sum([(i - mean(xs))**2 for i in xs])
    s_yy = sum([(i - mean(ys))**2 for i in ys])
    s_xy = sum([(xs[i] - mean(xs)) * (ys[i] - mean(ys))
                for i in range(len(xs))])

    b = s_xy / s_xx
    a = mean(ys) - b * mean(xs)
    r_squared = (s_xy**2) / (s_xx * s_yy)

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #50
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    xs = [feature_fn(r) for r in restaurants]
    ys = [user_rating(user, restaurant_name(r)) for r in restaurants]

    # BEGIN Question 7
    xx = [pow(x - mean(xs), 2) for x in xs]
    yy = [pow(y - mean(ys), 2) for y in ys]
    Sxx = sum(xx)
    Syy = sum(yy)
    x1 = [x - mean(xs) for x in xs]
    y1 = [y - mean(ys) for y in ys]
    Sxy = sum(map(lambda a,b: a*b, x1, y1))
    b = Sxy/Sxx
    a = mean(ys) - b * mean(xs)
    r_squared = pow(Sxy, 2)/(Sxx*Syy)
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #51
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    s_xx = sum([pow((i-mean(xs)),2) for i in xs])
    s_yy = sum([pow((j-mean(ys)),2) for j in ys])
    zipped = zip(xs,ys)
    s_xy = sum([(k-mean(xs)) * (m-mean(ys)) for k,m in zipped])
    b = (s_xy/s_xx)
    a =  (mean(ys) - b * mean(xs))
    r_squared = pow(s_xy,2)/(s_xx*s_yy)
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #52
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    zipped_xy = zip(xs, ys)
    sXX = sum([pow(x - mean(xs), 2) for x in xs])
    sYY = sum([pow(y - mean(ys), 2) for y in ys])
    sXY = sum([(x - mean(xs)) * (y - mean(ys)) for x, y in zipped_xy])
    b = sXY / sXX
    a = mean(ys) - b * mean(xs)
    r_squared = pow(sXY, 2) / (sXX * sYY)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #53
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    Sxx = sum([(x - mean(xs))**2 for x in xs])
    Syy = sum([(y - mean(ys))**2 for y in ys])
    Sxy = sum([(x[0] - mean(xs)) * (x[1] - mean(ys)) for x in zip(xs, ys)])
    b = Sxy / Sxx
    a = mean(ys) - b * mean(xs)
    r_squared = (Sxy**2) / (Sxx * Syy)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #54
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for USER by performing least-squares linear regression using FEATURE_FN
    on the items in RESTAURANTS. Also, return the R^2 value of this model.

    Arguments:
    user -- A use
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    ## dictionary of (name: rating) pairs for a SINGLE user
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    sxx_list = [r - mean(xs) for r in xs]
    syy_list = [r - mean(ys) for r in ys]
    sxy_list = zip(sxx_list, syy_list)

    # sxx = Σi (xi - mean(x))^2
    sxx = sum([pow(r, 2) for r in sxx_list])
    # syy = Σi (yi - mean(y))^2
    syy = sum([pow(r, 2) for r in syy_list])
    # sxy = Σi (xi - mean(x)) (yi - mean(y))
    sxy = sum([r[0] * r[1] for r in sxy_list])

    # y = a + bx
    b = sxy / sxx
    a = mean(ys) - b * mean(xs)
    r_squared = pow(sxy, 2) / (
        sxx * syy)  # measures how accurately this line describes original data

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #55
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    xs = [feature_fn(r) for r in restaurants]
    ys = [user_rating(user, restaurant_name(r)) for r in restaurants]

    # BEGIN Question 7
    "*** YOUR CODE HERE ***"

    sxx = sum([(x - mean(xs))**2 for x in xs])
    syy = sum([(y - mean(ys))**2 for y in ys])
    sxy = sum([(x - mean(xs)) * (y - mean(ys)) for x, y in zip(xs, ys)])

    b = sxy / sxx
    a = mean(ys) - b * mean(xs)
    r_squared = sxy**2 / (sxx * syy)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #56
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]
    # BEGIN Question 7
    Sxx = sum([pow((xi-mean(xs)),2)for xi in xs]) #Σi (xi - mean(x)) (yi - mean(y))
    Syy = sum([pow((yi-mean(ys)),2)for yi in ys]) #Syy = Σi (yi - mean(y))2
    Sxy = sum([((xi - mean(xs))*(yi-mean(ys))) for xi, yi in zip(xs,ys)]) #Σi (xi - mean(x)) (yi - mean(y))
    b = Sxy / Sxx
    #a = mean(y) - b * mean(x)
    #R2 = Sxy2 / (SxxSyy)
    b, a, r_squared = (Sxy/Sxx), mean(ys) - b * mean(xs), Sxy**2 / (Sxx*Syy)  # REPLACE THIS LINE WITH YOUR SOLUTION
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #57
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    b, a, r_squared = 0, 0, 0  # REPLACE THIS LINE WITH YOUR SOLUTION
    sxx = sum([(x - mean(xs))**2 for x in xs])
    syy = sum([(y - mean(ys))**2 for y in ys])
    sxy = sum(x * y for x, y in zip([x - mean(xs)
                                     for x in xs], [y - mean(ys) for y in ys]))
    b = sxy / sxx
    a = mean(ys) - b * mean(xs)
    r_squared = pow(sxy, 2) / (sxx * syy)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #58
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    #creating the individual components of a,b, and r_square
    sxx = sum([(item - mean(xs))**2 for item in xs])
    syy = sum([(review - mean(ys))**2 for review in ys])
    sxy = sum([((xs[i] - mean(xs)) * (ys[i] - mean(ys)))
               for i in range(len(xs))])

    b = sxy / sxx
    a = mean(ys) - b * mean(xs)
    r_squared = (sxy**2) / (sxx * syy)  # REPLACE THIS LINE WITH YOUR SOLUTION

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #59
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for USER by performing least-squares linear regression using FEATURE_FN
    on the items in RESTAURANTS. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {
        review_restaurant_name(review): review_rating(review)
        for review in user_reviews(user).values()
    }

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    "*** YOUR CODE HERE ***"
    b, a, r_squared = 0, 0, 0  # REPLACE THIS LINE WITH YOUR SOLUTION

    Sxx = sum([pow(xi - mean(xs), 2) for xi in xs])
    Syy = sum([pow(yi - mean(ys), 2) for yi in ys])
    Sxy = sum([(xi - mean(xs)) * (yi - mean(ys)) for [xi, yi] in zip(xs, ys)])

    b = Sxy / Sxx
    a = mean(ys) - b * mean(xs)
    r_squared = Sxy**2 / (Sxx * Syy)

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared
Пример #60
0
def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regress   ion using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    xs = [feature_fn(r) for r in restaurants]
    ys = [user_rating(user, restaurant_name(r)) for r in restaurants]

    # BEGIN Question 7
    Sxx, Syy, Sxy = 0, 0, 0
    xlist, ylist = [], []
    for i in xs:
        Sxx += (i - mean(xs))**2
        xlist = xlist + [(i - mean(xs))]
    for i in ys:
        Syy += (i - mean(ys))**2
        ylist = ylist + [(i - mean(ys))]
    for x in zip(ylist, xlist):
        Sxy += x[0] * x[1]

    b = Sxy / Sxx
    a = mean(ys) - b * mean(xs)
    r_squared = (Sxy**2) / (Sxx * Syy)

    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared