import utility import numpy r, w = utility.get_R() for k in [10, 50, 100]: U, V = utility.nmf(r, k, w) uv = numpy.dot(U, V) lse = 0 for i in range(len(r)): for j in range(len(r[i])): if w[i, j] == 1: lse += (r[i, j] - uv[i, j])**2 print 'For %d latent terms, LSE: %f' % (k, lse)
falsePos = 0; falseNeg = 0; precisionArray = [] recallArray = [] threshold_ranges = [x/10.0 for x in range(0, 50, 5)] #threshold_ranges = [3, 3.5] kf = KFold(100000, 10, True) for train, test in kf: for threshold in threshold_ranges: test_index += 1 local_error = 0 r, w, test_rows = utility.r_skiplist(test) u, v = utility.nmf(r, K_VALUE, w) uv = numpy.dot(u, v) # UV here is the 90% trained set. Comparison next for row in test_rows: ui = row[0]-1 mi = row[1]-1 if(uv[ui, mi] >= threshold): if(r[ui, mi] >= threshold): truePos += 1 else: falsePos += 1 else: if(r[ui, mi] >= threshold): #if(uv[ui, mi] < threshold):
# # Test for empty rows. # for row in w: # sum = 0 # for element in row: # sum += element # if sum == 0: # print sum # # print "Switching to columns now" # for column in w.T: # sum = 0 # for element in column: # sum += element # if sum == 0: # print sum u, v = utility.nmf(r, K_VALUE, w) uv = numpy.dot(u, v) # UV here is the 90% trained set. Comparison next for row in test_rows: ui = row[0] - 1 mi = row[1] - 1 local_error += numpy.abs(r[ui, mi] - uv[ui, mi]) test_error.append(local_error / 10000) print test_error print max(test_error) print min(test_error)