def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 ave_xs = mean(xs) ave_ys = mean(ys) s_xx = sum([(x - ave_xs) ** 2 for x in xs]) s_yy = sum([(y - ave_ys) ** 2 for y in ys]) s_xy = sum([(x - ave_xs) * (y - ave_ys) for x, y in zip(xs, ys)]) b, r_squared = s_xy / s_xx, s_xy ** 2 / (s_xx * s_yy) a = ave_ys - b * ave_xs # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_centroid(cluster): """Return the centroid of the locations of the restaurants in cluster.""" locations = [restaurant_location(x) for x in cluster] lat = [x[0] for x in locations] lon = [x[1] for x in locations] centroid = [mean(lat), mean(lon)] return centroid
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for USER by performing least-squares linear regression using FEATURE_FN on the items in RESTAURANTS. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] mean_x = mean(xs) mean_y = mean(ys) sxx = sum([pow((feature_fn(r) - mean_x), 2) for r in restaurants]) syy = sum([(pow((e - mean_y), 2)) for e in ys]) lst_x = [(feature_fn(r) - mean_x) for r in restaurants] lst_y = [(r - mean_y) for r in ys] sxy = sum([lst_x[i] * lst_y[i] for i in range(len(restaurants))]) b = (sxy)/(sxx) a = (mean_y) - (b * mean_x) r_squared = ((sxy)**2)/(sxx * syy) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def experiment3(trials=10): mu = -10 Ne = 5 L = 10 sigma = 1 codes = [sample_code(L, sigma) for i in range(trials)] pssms = [sample_matrix(L, sigma) for i in range(trials)] sites = [random_site(L) for i in xrange(10000)] apw_site_sigmas = [sd([score(code,site) for site in sites]) for code in codes] linear_site_sigmas = [sd([score_seq(pssm,site) for site in sites]) for pssm in pssms] def apw_phat(code, site): ep = score(code, site) return 1/(1+exp(ep-mu))**(Ne-1) def apw_occ(code, site): ep = score(code, site) return 1/(1+exp(ep-mu)) def linear_phat(pssm, site): ep = score_seq(pssm, site) return 1/(1+exp(ep-mu))**(Ne-1) def linear_occ(pssm, site): ep = score_seq(pssm, site) return 1/(1+exp(ep-mu)) apw_mean_fits = [exp(mean(map(log10, mh(lambda s:apw_phat(code, s), proposal=mutate_site, x0=random_site(L), capture_state = lambda s:apw_occ(code, s))[1:]))) for code in tqdm(codes)] linear_mean_fits = [exp(mean(map(log10, mh(lambda s:linear_phat(pssm, s), proposal=mutate_site, x0=random_site(L), capture_state = lambda s:linear_occ(pssm, s))[1:]))) for pssm in tqdm(pssms)] plt.scatter(apw_site_sigmas, apw_mean_fits, label='apw') plt.scatter(linear_site_sigmas, linear_mean_fits, color='g',label='linear') plt.semilogy() plt.legend(loc='lower right')
def parametrize_approx(site,eta=1,tol=10**-2,mono=True,iterations=100000): L = len(sites[0]) mono_fs = [lambda site,i=i,b=b:site[i]==b for i in range(L) for b in bases] di_fs = [lambda site,i=i,b1=b1,b2=b2:(site[i]==b1 and site[i+1]==b2) for i in range(L-1) for b1 in bases for b2 in bases] if mono: fs = mono_fs else: fs = di_fs ys = [mean(f(site) for site in sites) for f in fs] lambs = [1 for y in ys] err = 1 while err > tol: site_chain = sample_dist(fs,lambs,iterations=iterations) yhats = [mean(fi(site) for site in site_chain) for fi in fs] lambs_new = [lamb + (yhat - y)*eta for lamb,y,yhat in zip(lambs,ys,yhats)] for y,yhat,lamb,lamb_new in zip(ys,yhats,lambs,lambs_new): print y,"vs.",yhat,":",lamb,"->",lamb_new err = sum((y-yhat)**2 for y,yhat in zip(ys,yhats)) print "err:",err lambs = lambs_new return lambs
def find_centroid(restaurants): """Return the centroid of the locations of RESTAURANTS.""" "*** YOUR CODE HERE ***" list_locations = [restaurant_location(restaurant) for restaurant in restaurants] list_lat = [x[0] for x in list_locations] list_long = [y[1] for y in list_locations] return [mean(list_lat), mean(list_long)]
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for `user` by performing least-squares linear regression using `feature_fn` on the items in `restaurants`. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 "*** REPLACE THIS LINE ***" x_mean = mean(xs) y_mean = mean(ys) s_xx = sum([pow(x-x_mean,2) for x in xs]) s_yy = sum([pow(y-y_mean,2) for y in ys]) s_xy = sum([(x-x_mean)*(y-y_mean) for x,y in zip(xs,ys)]) b, a, r_squared = s_xy/s_xx, y_mean - (s_xy/s_xx)*x_mean, (pow(s_xy,2))/(s_xx*s_yy) # REPLACE THIS LINE WITH YOUR SOLUTION # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_centroid(restaurants): """Return the centroid of the locations of RESTAURANTS.""" "*** YOUR CODE HERE ***" location_list=[restaurant_location(i) for i in restaurants] latitude=mean([i[0] for i in location_list]) longitude=mean([i[1] for i in location_list]) return [latitude,longitude]
def plot_results_dict_gini_qq(results_dict,filename=None): bios = [] maxents = [] uniforms = [] for i,k in enumerate(results_dict): g1,g2,tf = k.split("_") genome = g1 + "_" + g2 bio_motif = extract_tfdf_sites(genome,tf) bio_ic = motif_ic(bio_motif) bio_gini = motif_gini(bio_motif) d = results_dict[k] bios.append(bio_gini) maxents.append(mean(d['maxent']['motif_gini'])) uniforms.append(mean(d['uniform']['motif_gini'])) plt.scatter(bios,maxents,label='ME') plt.scatter(bios,uniforms,label='TURS',color='g') minval = min(bios+maxents+uniforms) maxval = max(bios+maxents+uniforms) plt.plot([minval,maxval],[minval,maxval],linestyle='--') plt.xlabel("Observed Gini Coefficient") plt.ylabel("Mean Sampled Gini Coefficient") plt.legend(loc='upper left') print "bio vs maxent:",pearsonr(bios,maxents) print "bio vs uniform:",pearsonr(bios,uniforms) maybesave(filename)
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] S_xx, S_yy, S_xy = 0, 0, 0 mean_x, mean_y = mean(xs), mean(ys) for x in xs: S_xx += (x - mean_x)**2 for y in ys: S_yy += (y - mean_y)**2 xsys = zip(xs, ys) for x, y in xsys: S_xy += (x - mean_x) * (y - mean_y) b, a, r_squared = S_xy / S_xx, mean_y - (S_xy / S_xx) * mean_x, S_xy**2 / (S_xx * S_yy) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def sigma_Ne_contour_plot(filename=None): sigmas = np.linspace(0,5,20) Nes = np.linspace(1,20,20) L = 10 n = 50 copies = 10*n trials = 100 motifss = [[[(sample_motif(sigma, Ne, L, copies, n)) for i in range(trials)] for sigma in sigmas] for Ne in tqdm(Nes)] occ_M = [[expected_occupancy(sigma, Ne, L, copies) for sigma in sigmas] for Ne in tqdm(Nes)] print "ic_M" ic_M = mmap(lambda ms:mean(map(motif_ic,ms)),motifss) print "gini_M" gini_M = mmap(lambda ms:mean(map(motif_gini,ms)),motifss) print "mi_M" mi_M = mmap(lambda ms:mean(map(total_motif_mi,ms)),tqdm(motifss)) plt.subplot(2,2,1) plt.contourf(sigmas,Nes,occ_M,cmap='jet') plt.colorbar() plt.subplot(2,2,2) plt.contourf(sigmas,Nes,ic_M,cmap='jet') plt.colorbar() plt.subplot(2,2,3) plt.contourf(sigmas,Nes,gini_M,cmap='jet') plt.colorbar() plt.subplot(2,2,4) plt.contourf(sigmas,Nes,mi_M,cmap='jet') plt.colorbar() maybesave(filename)
def find_centroid(cluster): """Return the centroid of the locations of the restaurants in cluster.""" # BEGIN Question 5 locations = [restaurant_location(restaurant) for restaurant in cluster] mean_lat = mean([loc[0] for loc in locations]) mean_long = mean([loc[1] for loc in locations]) return [mean_lat, mean_long]
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(restaurant) for restaurant in restaurants] ys = [reviews_by_user[restaurant_name(restaurant)] for restaurant in restaurants] # BEGIN Question 7 def sum(s1, s2): result = 0 for a,b in zip(s1, s2): result += (a - mean(s1))*(b - mean(s2)) return result S_xx = sum(xs,xs) S_yy = sum(ys, ys) S_xy = sum(xs, ys) b = S_xy/S_xx a = mean(ys) - b * mean(xs) r_squared = (S_xy*S_xy)/(S_xx*S_yy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for `user` by performing least-squares linear regression using `feature_fn` on the items in `restaurants`. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 mean_x=mean(xs) mean_y=mean(ys) S_xx, S_xy, S_yy = 0, 0, 0 for elem in xs: S_xx += (elem-mean_x) ** 2 for elem in ys: S_yy += (elem-mean_y) ** 2 for x,y in zip(xs,ys): S_xy+=(x-mean_x) * (y-mean_y) b = S_xy/S_xx a = mean_y-b * mean_x r_squared = S_xy ** 2 / (S_xx * S_yy) # REPLACE THIS LINE WITH YOUR SOLUTION # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for USER by performing least-squares linear regression using FEATURE_FN on the items in RESTAURANTS. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] "*** YOUR CODE HERE ***" b, a, r_squared = 0, 0, 0 # REPLACE THIS LINE WITH YOUR SOLUTION mean_x = mean(xs) mean_y = mean(ys) s_xx = sum([pow(x-mean_x,2) for x in xs]) s_yy = sum([pow(y-mean_y,2) for y in ys]) xy_pair = zip(xs,ys) s_xy = sum([(pair[0]-mean_x)*(pair[1]-mean_y) for pair in xy_pair]) b = s_xy/s_xx a = mean_y-b*mean_x r_squared = pow(s_xy,2)/(s_xx*s_yy) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_centroid(cluster): """Return the centroid of the locations of the restaurants in cluster.""" # BEGIN Question 5 locations = [restaurant_location(i) for i in cluster] longitude = [i[0] for i in locations] latitude = [i[1] for i in locations] return [mean(longitude), mean(latitude)]
def find_centroid(cluster): """Return the centroid of the locations of the restaurants in cluster.""" # BEGIN Question 5 restaurant_set = [restaurant_location(location) for location in cluster] latitude = [location[0] for location in restaurant_set] longitude = [location[1] for location in restaurant_set] return [mean(latitude), mean(longitude)]
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for USER by performing least-squares linear regression using FEATURE_FN on the items in RESTAURANTS. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] mean_xs = mean(xs) mean_ys = mean(ys) s_xx = sum([pow(x - mean_xs, 2) for x in xs]) s_yy = sum([pow(y - mean_ys, 2) for y in ys]) s_xy = sum([mul(z[0], z[1]) for z in zip([x - mean_xs for x in xs], [y - mean_ys for y in ys])]) b = s_xy / s_xx a = mean_ys - b * mean_xs r_squared = pow(s_xy, 2) / mul(s_xx, s_yy) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def moran_process(N=1000,turns=10000,mean_site_muts=1,mean_rec_muts=1,init=sample_species,mutate=mutate, fitness=fitness,pop=None,print_modulus=100,hist_modulus=10): #ringer = (np.array([1]+[0]*(K-1)),sample_eps()) if pop is None: pop = [(lambda spec:(spec,fitness(spec)))(init()) for _ in trange(N)] # ringer = make_ringer() # pop[0] = (ringer,fitness(ringer)) #pop = [(ringer,fitness(ringer)) for _ in xrange(N)] site_mu = min(1/float(n*L) * mean_site_muts,1) rec_mu = min(1/float(K) * mean_rec_muts,1) hist = [] for turn in xrange(turns): fits = [f for (s,f) in pop] #print fits birth_idx = inverse_cdf_sample(range(N),fits,normalized=False) if birth_idx is None: return pop death_idx = random.randrange(N) #print birth_idx,death_idx mother,f = pop[birth_idx] daughter = mutate(mother,site_mu,rec_mu) #print "mutated" pop[death_idx] = (daughter,fitness(daughter)) mean_fits = mean(fits) #hist.append((f,mean_fits)) if turn % hist_modulus == 0: mean_dna_ic = mean([motif_ic(sites,correct=False) for ((sites,eps),_) in pop]) mean_rec = mean([recognizer_promiscuity(x) for (x,f) in pop]) mean_recced = mean([sites_recognized((dna,rec)) for ((dna,rec),_) in pop]) hist.append((turn,f,mean_fits,mean_dna_ic,mean_rec,mean_recced)) if turn % print_modulus == 0: print turn,"sel_fit:",f,"mean_fit:",mean_fits,"mean_dna_ic:",mean_dna_ic,"mean_rec_prom:",mean_rec return pop,hist
def find_centroid(restaurants): """Return the centroid of the locations of RESTAURANTS.""" "*** YOUR CODE HERE ***" locations = [restaurant_location(restaurant) for restaurant in restaurants] latitude = [location[0] for location in locations] longitude = [location[1] for location in locations] return [mean(latitude), mean(longitude)]
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for USER by performing least-squares linear regression using FEATURE_FN on the items in RESTAURANTS. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] "*** YOUR CODE HERE ***" b, a, r_squared = 0, 0, 0 # REPLACE THIS LINE WITH YOUR SOLUTION xs_mean = mean(xs) xs_minus_mean = [x - xs_mean for x in xs] ys_mean = mean(ys) ys_minus_mean = [y - ys_mean for y in ys] Sxx = sum([pow(x, 2) for x in xs_minus_mean]) Syy = sum([pow(y, 2) for y in ys_minus_mean]) Sxy = sum([x * y for x, y in zip(xs_minus_mean, ys_minus_mean)]) b = Sxy / Sxx a = ys_mean - b * xs_mean r_squared = pow(Sxy, 2) / (Sxx * Syy) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def test_mean(self): """ Test calculating arithmetic mean. """ self.assertEqual(0, utils.mean([])) self.assertEqual(2.5, utils.mean([5, 0])) self.assertAlmostEqual(1.914213, utils.mean([8**0.5, 1]), places=5)
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 xx, yy, sxy = [elem - mean(xs) for elem in xs], [elem - mean(ys) for elem in ys], 0 # computes the standard deviations of x and y sxx, syy = sum(x**2 for x in xx), sum(y**2 for y in yy) for a, b in zip(xx, yy): sxy += a*b # finds a and b to solve for predictor function b = sxy/sxx a = mean(ys) - b*mean(xs) r_squared = sxy**2/(sxx*syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 "*** REPLACE THIS LINE ***" # b, a, r_squared = 0, 0, 0 # REPLACE THIS LINE WITH YOUR SOLUTION mean_x = mean(xs) mean_y = mean(ys) xys = zip(xs, ys) sxx = sum([(x-mean_x) * (x-mean_x) for x in xs]) syy = sum([(y-mean_y) * (y-mean_y) for y in ys]) sxy = sum([(x-mean_x) * (y-mean_y) for x, y in xys]) b = sxy / sxx a = mean_y - b * mean_x r_squared = sxy * sxy / (sxx * syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 "*** REPLACE THIS LINE ***" mean_x, mean_y = mean(xs), mean(ys) xx_list = [x - mean_x for x in xs] yy_list = [y - mean_y for y in ys] xy_list = zip(xx_list, yy_list) S_xx = sum([xx ** 2 for xx in xx_list]) S_yy = sum([yy ** 2 for yy in yy_list]) S_xy = sum([xx * yy for xx, yy in xy_list]) b = S_xy / S_xx a = mean_y - b * mean_x r_squared = S_xy ** 2 / (S_xx * S_yy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] x_avg, y_avg = mean(xs), mean(ys) Sxx, Syy = sum([(x-x_avg)**2 for x in xs]), sum([(y-y_avg)**2 for y in ys]) x_y_pairs = zip(xs,ys) Sxy = sum([(pair[0]-x_avg)*(pair[1]-y_avg) for pair in x_y_pairs]) b, r_squared = Sxy/Sxx, Sxy**2/(Sxx*Syy) a = y_avg - b*x_avg def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 "*** REPLACE THIS LINE ***" mean_xs = mean(xs) mean_ys = mean(ys) list_x = [x - mean_xs for x in xs] list_y = [y - mean_ys for y in ys] sxx = sum( map(lambda x: x * x, list_x) ) syy = sum( map(lambda y: y * y, list_y) ) sxy = sum([a * b for a,b in zip (list_x, list_y)]) b = sxy / sxx a = mean_ys - b * mean_xs r_squared = (sxy ** 2) / (sxx * syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def evo_sim_experiment(filename=None): """compare bio motifs to on-off evosims""" tfdf = extract_motif_object_from_tfdf() bio_motifs = [getattr(tfdf,tf) for tf in tfdf.tfs] evosims = [spoof_motif(motif,num_motifs=100,Ne_tol=10**-4) for motif in tqdm(bio_motifs)] evo_ics = [mean(map(motif_ic,sm)) for sm in tqdm(evosims)] evo_ginis = [mean(map(motif_gini,sm)) for sm in tqdm(evosims)] evo_mis = [mean(map(total_motif_mi,sm)) for sm in tqdm(evosims)] plt.subplot(1,3,1) scatter(map(motif_ic,bio_motifs),evo_ics) plt.title("Motif IC (bits)") plt.xlabel("Biological Value") plt.ylabel("Simulated Value") plt.subplot(1,3,2) scatter(map(motif_gini,bio_motifs), evo_ginis) plt.title("Motif Gini Coefficient") plt.xlabel("Biological Value") plt.ylabel("Simulated Value") plt.subplot(1,3,3) scatter(map(total_motif_mi,bio_motifs), evo_mis) plt.xlabel("Biological Value") plt.ylabel("Simulated Value") plt.title("Pairwise Motif MI (bits)") plt.loglog() plt.tight_layout() plt.savefig(filename) return evosims
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} #print (reviews_by_user) xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] joined_list = zip(xs, ys) # BEGIN Question 7 meanx = mean(xs) meany = mean(ys) sxx = sum([(x-meanx)**2 for x in xs]) syy = sum([(y-meany)**2 for y in ys]) sxy = sum([(xy[0]-meanx)*(xy[1]-meany) for xy in joined_list]) b = sxy/ sxx a = meany - b * meanx r_squared = sxy**2 / (sxx * syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_centroid(cluster): """Return the centroid of the locations of the restaurants in cluster.""" # BEGIN Question 5 locations = [] # this list should store the locations of all restaurants in cluster for restaurant in cluster: locations.append(restaurant_location(restaurant)) return [mean([location[0] for location in locations]), mean([location[1] for location in locations])]
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] sxx = 0 for i in xs: sxx += (i - mean(xs))**2 syy = 0 for i in ys: syy += (i - mean(ys))**2 sxy = 0 for i in zip(xs, ys): sxy += (i[0] - mean(xs)) * (i[1] - mean(ys)) b = sxy / sxx a = mean(ys) - b * mean(xs) r_squared = (sxy * sxy) / (sxx * syy) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 # Initialize values at 0 s_xx = 0 s_yy = 0 s_xy = 0 # Compute sums for i in range(0, len(xs)): s_xx += (xs[i] - mean(xs)) ** 2 s_yy += (ys[i] - mean(ys)) ** 2 s_xy += (xs[i] - mean(xs)) * (ys[i] - mean(ys)) # Compute b, a, and r_squared b = s_xy/s_xx a = mean(ys) - b * mean(xs) r_squared = s_xy**2 / (s_xx * s_yy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for USER by performing least-squares linear regression using FEATURE_FN on the items in RESTAURANTS. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] "*** YOUR CODE HERE ***" data_points=zip(xs, ys) S_xx, S_yy, S_xy = 0, 0, 0 for x in xs: S_xx += pow((x - mean(xs)), 2) for y in ys: S_yy += pow((y - mean(ys)), 2) for x, y in data_points: S_xy += (x - mean(xs))*(y - mean(ys)) b = S_xy/S_xx a, r_squared = mean(ys)-b*mean(xs), pow(S_xy, 2)/(S_xx*S_yy) # REPLACE THIS LINE WITH YOUR SOLUTION def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } list_of_x_values = [feature_fn(r) for r in restaurants] list_of_y_values = [ reviews_by_user[restaurant_name(r)] for r in restaurants ] # BEGIN Question 7 "*** YOUR CODE HERE ***" #b, a, r_squared = 0, 0, 0 # REPLACE THIS LINE WITH YOUR SOLUTION # b, a, r_squared = 0, 0, 0 # REPLACE THIS LINE WITH YOUR SOLUTION mean_x = mean(list_of_x_values) mean_y = mean(list_of_y_values) Sxx = sum((x - mean_x)**2 for x in list_of_x_values) Syy = sum((y - mean_y)**2 for y in list_of_y_values) Sxy = sum((xy[0] - mean_x) * (xy[1] - mean_y) for xy in zip(list_of_x_values, list_of_y_values)) b = Sxy / Sxx a = mean_y - b * mean_x r_squared = Sxy**2 / (Sxx * Syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ xs = [feature_fn(r) for r in restaurants] ys = [user_rating(user, restaurant_name(r)) for r in restaurants] # BEGIN Question 7 sxx, syy, sxy = 0, 0, 0 for feature in xs: sxx += (feature - mean(xs))**2 for rating in ys: syy += (rating - mean(ys))**2 for zipped in zip(xs, ys): sxy += (zipped[0] - mean(xs)) * (zipped[1] - mean(ys)) b = sxy / sxx a = mean(ys) - b * mean(xs) r_squared = sxy**2 / (sxx * syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 s_xx = sum([(x_i - mean(xs))**2 for x_i in xs]) s_yy = sum([(y_i - mean(ys))**2 for y_i in ys]) s_xy = sum([(x_i - mean(xs)) * (y_i - mean(ys)) for x_i, y_i in zip(xs, ys)]) b, a, r_squared = s_xy / s_xx, mean(ys) - (s_xy / s_xx) * mean(xs), ( s_xy)**2 / (s_xx * s_yy) # REPLACE THIS LINE WITH YOUR SOLUTION # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 pairs = zip(xs, ys) s_xx, s_yy, s_xy = 0, 0, 0 for x, y in pairs: s_xx += (x - mean(xs)) ** 2 s_yy += (y - mean(ys)) ** 2 s_xy += (x - mean(xs)) * (y - mean(ys)) b = s_xy / s_xx a = mean(ys) - b * mean(xs) r_squared = (s_xy ** 2) / (s_xx * s_yy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), predictor f(restaurants) -> ratings for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ #reviews_by_user = {"Soda" : 5, "Seven": 5 ...} reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 Sxx = sum([(xs[i] - mean(xs))**2 for i in range(0, len(xs))]) Syy = sum([(ys[i] - mean(ys))**2 for i in range(0, len(ys))]) Sxy = sum([(xs[i] - mean(xs)) * (ys[i] - mean(ys)) for i in range(0, len(xs))]) b = Sxy / Sxx # REPLACE THIS LINE WITH YOUR SOLUTION a = mean(ys) - b * mean(xs) r_squared = Sxy**2 / (Sxx * Syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] s_xx = sum([(x - mean(xs))**2 for x in xs]) s_yy = sum([(y - mean(ys))**2 for y in ys]) #S_XY_GROUP pairs each S_XX of index i with the equivalent Y_XX of index i by using ZIP fxn; s_xy_group = zip([(x - mean(xs)) for x in xs], [(y - mean(ys)) for y in ys]) #now we carry out the multiplication of these, effectively complelting the formula for S_XY s_xy = sum([z[0] * z[1] for z in s_xy_group]) b = s_xy / s_xx a = mean(ys) - b * mean(xs) r_squared = (s_xy**2) / (s_xx * s_yy) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ xs = [feature_fn(r) for r in restaurants] ys = [user_rating(user, restaurant_name(r)) for r in restaurants] # BEGIN Question 7 S_xx = sum([(x - mean(xs))**2 for x in xs]) Sum_x_squares = S_xx S_yy = sum([(y - mean(ys))**2 for y in ys]) Sum_y_squares = S_yy S_xy = sum([((x - mean(xs)) * (y - mean(ys))) for x, y, in zip(xs, ys)]) Sum_both = S_xy b = Sum_both / Sum_x_squares a = mean(ys) - b * mean(xs) r_squared = (Sum_both**2) / (Sum_x_squares * Sum_y_squares) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 """Calculate the sums of squares, using mean and zip fuctions.""" sxx = sum([(x - mean(xs))**2 for x in xs]) syy = sum([(y - mean(ys))**2 for y in ys]) sxy = sum([(x - mean(xs)) * (y - mean(ys)) for (x, y) in zip(xs, ys)]) b = sxy / sxx a = mean(ys) - b * mean(xs) r_squared = sxy**2 / (sxx * syy) def predictor(restaurant): return a + b * feature_fn(restaurant) return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Inputs: user: A str user restaurants: A list of restaurants feature_fn: A function that takes a restaurant and returns a number """ #Dict of reviews by users reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } #Creating xs and ys xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] #Finding sums S_xx = sum([(x - mean(xs))**2 for x in xs]) S_yy = sum([(y - mean(ys))**2 for y in ys]) S_xy = sum([(x - mean(xs)) * (y - mean(ys)) for x, y in zip(xs, ys)]) #Creating variables b, a, r_squared = (S_xy / S_xx), (mean(ys) - (S_xy / S_xx) * mean(xs)), ((S_xy)**2 / (S_xx * S_yy)) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] zs = zip(xs, ys) # BEGIN Question 7 sxx = (sum((x - mean(xs)) ** 2 for x in xs)) syy = (sum((y - mean(ys)) ** 2 for y in ys)) sxy = sum(((a - mean(xs)) * (b - mean(ys))) for a, b in zs) b = (sxy / sxx) a = (mean(ys) - (b * mean(xs))) r_squared = (((sxy) ** 2) / ((sxx) * (syy))) # REPLACE THIS LINE WITH YOUR SOLUTION # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for USER by performing least-squares linear regression using FEATURE_FN on the items in RESTAURANTS. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] "*** YOUR CODE HERE ***" sxx = sum([pow((x - mean(xs)), 2) for x in xs]) syy = sum([pow((y - mean(ys)), 2) for y in ys]) sxy = sum([(x - mean(xs)) * (y - mean(ys)) for x, y in zip(xs, ys)]) b = sxy / sxx a = mean(ys) - b * mean(xs) r_squared = pow(sxy, 2) / (sxx * syy) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ xs = [feature_fn(r) for r in restaurants] ys = [user_rating(user, restaurant_name(r)) for r in restaurants] # BEGIN Question 7 S_xx = sum([(i - mean(xs))**2 for i in xs]) S_yy = sum([(i - mean(ys))**2 for i in ys]) S_xy = sum([(ix - mean(xs)) * (iy - mean(ys)) for ix, iy in zip(xs, ys)]) '''this part does the sum calculations from the question''' b = S_xy / S_xx a = mean(ys) - (b * mean(xs)) r_squared = (S_xy**2) / (S_xx * S_yy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ xs = [feature_fn(r) for r in restaurants] ys = [user_rating(user, restaurant_name(r)) for r in restaurants] # BEGIN Question 7 s_xx = sum(pow((x - mean(xs)), 2) for x in xs) s_yy = sum(pow((y - mean(ys)), 2) for y in ys) s_xy = sum((s[0] - mean(xs)) * (s[1] - mean(ys)) for s in zip(xs, ys)) b = s_xy / s_xx a = mean(ys) - b * mean(xs) r_squared = pow(s_xy, 2) / (s_xx * s_yy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 "*** REPLACE THIS LINE ***" Sxx = sum((i - mean(xs))**2 for i in xs) Syy = sum((i - mean(ys))**2 for i in ys) z1 = [i - mean(xs) for i in xs] z2 = [i - mean(ys) for i in ys] zip1 = zip(z1, z2) Sxy = sum(q * t for q, t in zip1) b = Sxy / Sxx a = mean(ys) - (b * mean(xs)) r_squared = (Sxy**2) / (Sxx * Syy) # REPLACE THIS LINE WITH YOUR SOLUTION # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ xs = [feature_fn(r) for r in restaurants] ys = [user_rating(user, restaurant_name(r)) for r in restaurants] # BEGIN Question 7 S_xx, S_yy, S_xy = 0, 0, 0 for x in xs: S_xx += (x - mean(xs))**2 for y in ys: S_yy += (y - mean(ys))**2 for i in range(len(ys)): S_xy += (xs[i] - mean(xs)) * (ys[i] - mean(ys)) b = S_xy / S_xx a = mean(ys) - b * mean(xs) r_squared = S_xy**2 / (S_xx * S_yy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ xs = [feature_fn(r) for r in restaurants] ys = [user_rating(user, restaurant_name(r)) for r in restaurants] s_xx = sum([(i - mean(xs))**2 for i in xs]) s_yy = sum([(i - mean(ys))**2 for i in ys]) s_xy = sum([(xs[i] - mean(xs)) * (ys[i] - mean(ys)) for i in range(len(xs))]) b = s_xy / s_xx a = mean(ys) - b * mean(xs) r_squared = (s_xy**2) / (s_xx * s_yy) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ xs = [feature_fn(r) for r in restaurants] ys = [user_rating(user, restaurant_name(r)) for r in restaurants] # BEGIN Question 7 xx = [pow(x - mean(xs), 2) for x in xs] yy = [pow(y - mean(ys), 2) for y in ys] Sxx = sum(xx) Syy = sum(yy) x1 = [x - mean(xs) for x in xs] y1 = [y - mean(ys) for y in ys] Sxy = sum(map(lambda a,b: a*b, x1, y1)) b = Sxy/Sxx a = mean(ys) - b * mean(xs) r_squared = pow(Sxy, 2)/(Sxx*Syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 s_xx = sum([pow((i-mean(xs)),2) for i in xs]) s_yy = sum([pow((j-mean(ys)),2) for j in ys]) zipped = zip(xs,ys) s_xy = sum([(k-mean(xs)) * (m-mean(ys)) for k,m in zipped]) b = (s_xy/s_xx) a = (mean(ys) - b * mean(xs)) r_squared = pow(s_xy,2)/(s_xx*s_yy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 zipped_xy = zip(xs, ys) sXX = sum([pow(x - mean(xs), 2) for x in xs]) sYY = sum([pow(y - mean(ys), 2) for y in ys]) sXY = sum([(x - mean(xs)) * (y - mean(ys)) for x, y in zipped_xy]) b = sXY / sXX a = mean(ys) - b * mean(xs) r_squared = pow(sXY, 2) / (sXX * sYY) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 Sxx = sum([(x - mean(xs))**2 for x in xs]) Syy = sum([(y - mean(ys))**2 for y in ys]) Sxy = sum([(x[0] - mean(xs)) * (x[1] - mean(ys)) for x in zip(xs, ys)]) b = Sxy / Sxx a = mean(ys) - b * mean(xs) r_squared = (Sxy**2) / (Sxx * Syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for USER by performing least-squares linear regression using FEATURE_FN on the items in RESTAURANTS. Also, return the R^2 value of this model. Arguments: user -- A use restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ ## dictionary of (name: rating) pairs for a SINGLE user reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] sxx_list = [r - mean(xs) for r in xs] syy_list = [r - mean(ys) for r in ys] sxy_list = zip(sxx_list, syy_list) # sxx = Σi (xi - mean(x))^2 sxx = sum([pow(r, 2) for r in sxx_list]) # syy = Σi (yi - mean(y))^2 syy = sum([pow(r, 2) for r in syy_list]) # sxy = Σi (xi - mean(x)) (yi - mean(y)) sxy = sum([r[0] * r[1] for r in sxy_list]) # y = a + bx b = sxy / sxx a = mean(ys) - b * mean(xs) r_squared = pow(sxy, 2) / ( sxx * syy) # measures how accurately this line describes original data def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ xs = [feature_fn(r) for r in restaurants] ys = [user_rating(user, restaurant_name(r)) for r in restaurants] # BEGIN Question 7 "*** YOUR CODE HERE ***" sxx = sum([(x - mean(xs))**2 for x in xs]) syy = sum([(y - mean(ys))**2 for y in ys]) sxy = sum([(x - mean(xs)) * (y - mean(ys)) for x, y in zip(xs, ys)]) b = sxy / sxx a = mean(ys) - b * mean(xs) r_squared = sxy**2 / (sxx * syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = {review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values()} xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 Sxx = sum([pow((xi-mean(xs)),2)for xi in xs]) #Σi (xi - mean(x)) (yi - mean(y)) Syy = sum([pow((yi-mean(ys)),2)for yi in ys]) #Syy = Σi (yi - mean(y))2 Sxy = sum([((xi - mean(xs))*(yi-mean(ys))) for xi, yi in zip(xs,ys)]) #Σi (xi - mean(x)) (yi - mean(y)) b = Sxy / Sxx #a = mean(y) - b * mean(x) #R2 = Sxy2 / (SxxSyy) b, a, r_squared = (Sxy/Sxx), mean(ys) - b * mean(xs), Sxy**2 / (Sxx*Syy) # REPLACE THIS LINE WITH YOUR SOLUTION # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 b, a, r_squared = 0, 0, 0 # REPLACE THIS LINE WITH YOUR SOLUTION sxx = sum([(x - mean(xs))**2 for x in xs]) syy = sum([(y - mean(ys))**2 for y in ys]) sxy = sum(x * y for x, y in zip([x - mean(xs) for x in xs], [y - mean(ys) for y in ys])) b = sxy / sxx a = mean(ys) - b * mean(xs) r_squared = pow(sxy, 2) / (sxx * syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regression using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] # BEGIN Question 7 #creating the individual components of a,b, and r_square sxx = sum([(item - mean(xs))**2 for item in xs]) syy = sum([(review - mean(ys))**2 for review in ys]) sxy = sum([((xs[i] - mean(xs)) * (ys[i] - mean(ys))) for i in range(len(xs))]) b = sxy / sxx a = mean(ys) - b * mean(xs) r_squared = (sxy**2) / (sxx * syy) # REPLACE THIS LINE WITH YOUR SOLUTION # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for USER by performing least-squares linear regression using FEATURE_FN on the items in RESTAURANTS. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ reviews_by_user = { review_restaurant_name(review): review_rating(review) for review in user_reviews(user).values() } xs = [feature_fn(r) for r in restaurants] ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] "*** YOUR CODE HERE ***" b, a, r_squared = 0, 0, 0 # REPLACE THIS LINE WITH YOUR SOLUTION Sxx = sum([pow(xi - mean(xs), 2) for xi in xs]) Syy = sum([pow(yi - mean(ys), 2) for yi in ys]) Sxy = sum([(xi - mean(xs)) * (yi - mean(ys)) for [xi, yi] in zip(xs, ys)]) b = Sxy / Sxx a = mean(ys) - b * mean(xs) r_squared = Sxy**2 / (Sxx * Syy) def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared
def find_predictor(user, restaurants, feature_fn): """Return a rating predictor (a function from restaurants to ratings), for a user by performing least-squares linear regress ion using feature_fn on the items in restaurants. Also, return the R^2 value of this model. Arguments: user -- A user restaurants -- A sequence of restaurants feature_fn -- A function that takes a restaurant and returns a number """ xs = [feature_fn(r) for r in restaurants] ys = [user_rating(user, restaurant_name(r)) for r in restaurants] # BEGIN Question 7 Sxx, Syy, Sxy = 0, 0, 0 xlist, ylist = [], [] for i in xs: Sxx += (i - mean(xs))**2 xlist = xlist + [(i - mean(xs))] for i in ys: Syy += (i - mean(ys))**2 ylist = ylist + [(i - mean(ys))] for x in zip(ylist, xlist): Sxy += x[0] * x[1] b = Sxy / Sxx a = mean(ys) - b * mean(xs) r_squared = (Sxy**2) / (Sxx * Syy) # END Question 7 def predictor(restaurant): return b * feature_fn(restaurant) + a return predictor, r_squared