def generate_X(I,J,K,target): alpha = numpy.ones(I) beta = numpy.ones(J) tau = 1 U = numpy.array([ [random.normalvariate(0,math.sqrt(alpha[k])) for i in range(0,I)] for k in range(0,K) ]) V = numpy.array([ [random.normalvariate(0,math.sqrt(beta[k])) for j in range(0,J)] for k in range(0,K) ]) X = numpy.array([ [ random.normalvariate(element,math.sqrt(tau)) for element in row ] for row in numpy.dot(U.transpose(),V) ]) # Write to file store_X_U_V(target,X,U,V) return
def recover(M,source,target,iterations,calc_predictions=False,M_inv=[]): (I,J,K,X,U,V) = load_X_U_V(source) PMF = VariationalPMF(X,M,K) PMF.run(iterations=iterations,updates=10,calc_predictions=calc_predictions,M_inv=M_inv) predicted_U = PMF.U predicted_V = PMF.V predicted_X = PMF.predicted_X # Write predicted_X, U, V to output file store_X_U_V(target,predicted_X,predicted_U,predicted_V) return
M = generate_M(I,J,fraction_unknown) M_inv = calc_inverse_M(M) K = 3 outputfile = "recovered_matrices.txt" iterations = 10 PMF = VariationalPMF(X,M,K) PMF.run(iterations=iterations,updates=1,calc_predictions=True,M_inv=M_inv) predicted_U = PMF.U predicted_V = PMF.V predicted_X = PMF.predicted_X # Store the predicted matrix X with U and V store_X_U_V(outputfile,predicted_X,predicted_U,predicted_V) # Now we plot the predictions vs the true values actual_vs_predicted = recover_predictions(M,X,predicted_X) (actual,predicted) = zip(*actual_vs_predicted) RMSE_predictions = compute_RMSE(actual_vs_predicted) RMSE_training = PMF.RMSE print "RMSE of predictions: %s" % RMSE_predictions print "RMSE of training data: %s" % RMSE_training bins_predictions = plt.figure(1) plt.title('Histogram of values - true values, predictions') plt.xlabel('values - actual vs predicted') plt.ylabel('no.')