def fit( real, known, latent_d=1, ret_pmf=False, subtract_mean=False, sig_u=1e10, sig_v=1e10, sig=1, do_bayes=False, burnin=10, samps=200, stop_thresh=1e-10, min_learning_rate=1e-20, ): ratings = np.zeros((known.sum(), 3)) for idx, (i, j) in enumerate(np.transpose(known.nonzero())): ratings[idx] = [i, j, real[i, j]] pmf = ProbabilisticMatrixFactorization(ratings, latent_d, subtract_mean) pmf.sigma_sq = sig pmf.sigma_u_sq = sig_u pmf.sigma_v_sq = sig_v pmf.stop_thresh = stop_thresh pmf.min_learning_rate = min_learning_rate pmf.fit() if not do_bayes: pred = pmf.predicted_matrix() return (pmf, pred) if ret_pmf else pred else: bpmf = BayesianPMF(ratings, 1) bpmf.__setstate__(pmf.__getstate__()) sampler = bpmf.samples() # do burn-in next(islice(sampler, burnin, burnin), None) pred = bpmf.predict(islice(sampler, samps)) return (bpmf, pred) if ret_pmf else pred
def fit(real, known, latent_d=1, ret_pmf=False, subtract_mean=False, sig_u=1e10, sig_v=1e10, sig=1, do_bayes=False, burnin=10, samps=200, stop_thresh=1e-10, min_learning_rate=1e-20): ratings = np.zeros((known.sum(), 3)) for idx, (i, j) in enumerate(np.transpose(known.nonzero())): ratings[idx] = [i, j, real[i, j]] pmf = ProbabilisticMatrixFactorization(ratings, latent_d, subtract_mean) pmf.sigma_sq = sig pmf.sigma_u_sq = sig_u pmf.sigma_v_sq = sig_v pmf.stop_thresh = stop_thresh pmf.min_learning_rate = min_learning_rate pmf.fit() if not do_bayes: pred = pmf.predicted_matrix() return (pmf, pred) if ret_pmf else pred else: bpmf = BayesianPMF(ratings, 1) bpmf.__setstate__(pmf.__getstate__()) sampler = bpmf.samples() # do burn-in next(islice(sampler, burnin, burnin), None) pred = bpmf.predict(islice(sampler, samps)) return (bpmf, pred) if ret_pmf else pred
ratings[idx, :] = i, j, data[i, j] else: ratings = data['_ratings'] bpmf = BayesianPMF(ratings, args.latent_d) print("Doing initial MAP fit") for train, valid in bpmf.fit_minibatches_until_validation( args.batch_size, args.validation_size, do_yield=True, stop_thresh=args.stop_thresh): print("\t{} {:.5} {:.5}".format(datetime.datetime.now().time(), train, valid)) print("Saving model") with open(args.out + '_model.pkl', 'wb') as f: pickle.dump(bpmf, f) print("Getting MCMC samples") num_samps = 2000 u_samps = np.empty((num_samps, bpmf.num_users, bpmf.latent_d)); u_samps.fill(np.nan) v_samps = np.empty((num_samps, bpmf.num_items, bpmf.latent_d)); v_samps.fill(np.nan) for idx, (u, v) in enumerate(itertools.islice(bpmf.samples(), num_samps)): if idx % 10 == 0: print(datetime.datetime.now().time(), idx) u_samps[idx,:,:] = u v_samps[idx,:,:] = v print("Saving u samples") np.save(args.out + '_u_samps.npy', u_samps) print("Saving v samples") np.save(args.out + '_v_samps.npy', v_samps)