def run(name, dataset, config, all_users, all_movies, tests, initial_v, sep): config_name = config['name'] number_hidden = config['number_hidden'] epochs = config['epochs'] ks = config['ks'] momentums = config['momentums'] l_w = config['l_w'] l_v = config['l_v'] l_h = config['l_h'] decay = config['decay'] batch_size = config['batch_size'] config_result = config.copy() config_result['results'] = [] vis = T.matrix() vmasks = T.matrix() rbm = CFRBM(len(all_movies) * 5, number_hidden) profiles = defaultdict(list) with open(dataset, 'rt') as data: for i, line in enumerate(data): uid, mid, rat, timstamp = line.strip().split(sep) profiles[uid].append((mid, float(rat))) print("Users and ratings loaded") for j in range(epochs): def get_index(col): if j/(epochs/len(col)) < len(col): return j/(epochs/len(col)) else: return -1 index = get_index(ks) mindex = get_index(momentums) icurrent_l_w = get_index(l_w) icurrent_l_v = get_index(l_v) icurrent_l_h = get_index(l_h) k = ks[index] momentum = momentums[mindex] current_l_w = l_w[icurrent_l_w] current_l_v = l_v[icurrent_l_v] current_l_h = l_h[icurrent_l_h] train = rbm.cdk_fun(vis, vmasks, k=k, w_lr=current_l_w, v_lr=current_l_v, h_lr=current_l_h, decay=decay, momentum=momentum) predict = rbm.predict(vis) for batch_i, batch in enumerate(chunker(profiles.keys(), batch_size)): size = min(len(batch), batch_size) # create needed binary vectors bin_profiles = {} masks = {} for userid in batch: user_profile = [0.] * len(all_movies) mask = [0] * (len(all_movies) * 5) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(5): mask[5 * all_movies.index(movie_id) + _i] = 1 example = expand(np.array([user_profile])).astype('float32') bin_profiles[userid] = example masks[userid] = mask profile_batch = [bin_profiles[id] for id in batch] masks_batch = [masks[id] for id in batch] train_batch = np.array(profile_batch).reshape(size, len(all_movies) * 5) train_masks = np.array(masks_batch).reshape(size, len(all_movies) * 5) train_masks = train_masks.astype('float32') train(train_batch, train_masks) sys.stdout.write('.') sys.stdout.flush() ratings = [] predictions = [] for batch in chunker(tests.keys(), batch_size): size = min(len(batch), batch_size) # create needed binary vectors bin_profiles = {} masks = {} for userid in batch: user_profile = [0.] * len(all_movies) mask = [0] * (len(all_movies) * 5) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(5): mask[5 * all_movies.index(movie_id) + _i] = 1 example = expand(np.array([user_profile])).astype('float32') bin_profiles[userid] = example masks[userid] = mask positions = {profile_id: pos for pos, profile_id in enumerate(batch)} profile_batch = [bin_profiles[el] for el in batch] test_batch = np.array(profile_batch).reshape(size, len(all_movies) * 5) user_preds = revert_expected_value(predict(test_batch)) for profile_id in batch: test_movies = tests[profile_id] try: for movie, rating in test_movies: current_profile = user_preds[positions[profile_id]] predicted = current_profile[all_movies.index(movie)] rating = float(rating) ratings.append(rating) predictions.append(predicted) except Exception: pass vabs = np.vectorize(abs) distances = np.array(ratings) - np.array(predictions) mae = vabs(distances).mean() rmse = sqrt((distances ** 2).mean()) iteration_result = { 'iteration': j, 'k': k, 'momentum': momentum, 'mae': mae, 'rmse': rmse, 'lrate': current_l_w } config_result['results'].append(iteration_result) print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse)) with open('experiments/{}_{}.json'.format(config_name, name), 'wt') as res_output: res_output.write(json.dumps(config_result, indent=4)) W,V,H = rbm.get_weights() print H
def run(name, dataset, config, all_users, all_movies, tests, initial_v, sep): config_name = config['name'] number_hidden = config['number_hidden'] epochs = config['epochs'] ks = config['ks'] momentums = config['momentums'] l_w = config['l_w'] l_v = config['l_v'] l_h = config['l_h'] decay = config['decay'] batch_size = config['batch_size'] config_result = config.copy() config_result['results'] = [] vis = T.matrix() vmasks = T.matrix() rbm = CFRBM(len(all_movies) * 5, number_hidden) profiles = defaultdict(list) with open(dataset, 'rt') as data: for i, line in enumerate(data): uid, mid, rat, timstamp = line.strip().split(sep) profiles[uid].append((mid, float(rat))) print("Users and ratings loaded") for j in range(epochs): def get_index(col): if j/(epochs/len(col)) < len(col): return j/(epochs/len(col)) else: return -1 index = get_index(ks) mindex = get_index(momentums) icurrent_l_w = get_index(l_w) icurrent_l_v = get_index(l_v) icurrent_l_h = get_index(l_h) k = ks[index] momentum = momentums[mindex] current_l_w = l_w[icurrent_l_w] current_l_v = l_v[icurrent_l_v] current_l_h = l_h[icurrent_l_h] train = rbm.cdk_fun(vis, vmasks, k=k, w_lr=current_l_w, v_lr=current_l_v, h_lr=current_l_h, decay=decay, momentum=momentum) predict = rbm.predict(vis) for batch_i, batch in enumerate(chunker(profiles.keys(), batch_size)): size = min(len(batch), batch_size) # create needed binary vectors bin_profiles = {} masks = {} for userid in batch: user_profile = [0.] * len(all_movies) mask = [0] * (len(all_movies) * 5) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(5): mask[5 * all_movies.index(movie_id) + _i] = 1 example = expand(np.array([user_profile])).astype('float32') bin_profiles[userid] = example masks[userid] = mask profile_batch = [bin_profiles[id] for id in batch] masks_batch = [masks[id] for id in batch] train_batch = np.array(profile_batch).reshape(size, len(all_movies) * 5) train_masks = np.array(masks_batch).reshape(size, len(all_movies) * 5) train_masks = train_masks.astype('float32') train(train_batch, train_masks) sys.stdout.write('.') sys.stdout.flush() ratings = [] predictions = [] for batch in chunker(tests.keys(), batch_size): size = min(len(batch), batch_size) # create needed binary vectors bin_profiles = {} masks = {} for userid in batch: user_profile = [0.] * len(all_movies) mask = [0] * (len(all_movies) * 5) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(5): mask[5 * all_movies.index(movie_id) + _i] = 1 example = expand(np.array([user_profile])).astype('float32') bin_profiles[userid] = example masks[userid] = mask positions = {profile_id: pos for pos, profile_id in enumerate(batch)} profile_batch = [bin_profiles[el] for el in batch] test_batch = np.array(profile_batch).reshape(size, len(all_movies) * 5) user_preds = revert_expected_value(predict(test_batch)) for profile_id in batch: test_movies = tests[profile_id] try: for movie, rating in test_movies: current_profile = user_preds[positions[profile_id]] predicted = current_profile[all_movies.index(movie)] rating = float(rating) ratings.append(rating) predictions.append(predicted) except Exception: pass vabs = np.vectorize(abs) distances = np.array(ratings) - np.array(predictions) mae = vabs(distances).mean() rmse = sqrt((distances ** 2).mean()) iteration_result = { 'iteration': j, 'k': k, 'momentum': momentum, 'mae': mae, 'rmse': rmse, 'lrate': current_l_w } config_result['results'].append(iteration_result) print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse)) with open('{}_{}.json'.format(config_name, name), 'wt') as res_output: res_output.write(json.dumps(config_result, indent=4))
def run(name, dataset, user_info, config, all_users, all_movies, all_occupations, all_sex, all_ages, tests, initial_v, sep): config_name = config['name'] number_hidden = config['number_hidden'] epochs = config['epochs'] ks = config['ks'] momentums = config['momentums'] l_w = config['l_w'] l_v = config['l_v'] l_h = config['l_h'] decay = config['decay'] batch_size = config['batch_size'] config_result = config.copy() config_result['results'] = [] vis_x = T.matrix() vis_o = T.matrix() vis_s = T.matrix() vis_a = T.matrix() vmasks_x = T.matrix() vmasks_o = T.matrix() vmasks_s = T.matrix() vmasks_a = T.matrix() rbm = CFRBM(len(all_movies) * 5, len(all_occupations), 1, len(all_ages), number_hidden) profiles = defaultdict(list) with open(dataset, 'rt') as data: for i, line in enumerate(data): uid, mid, rat, timstamp = line.strip().split(sep) profiles[uid].append((mid, float(rat))) print("Users and ratings loaded") user_occ = defaultdict(list) user_sex = defaultdict(list) user_age = defaultdict(list) r = csv.reader(open(user_info, 'rb'), delimiter='|') for row in r: user_age[row[0]] = [int(x) for x in row[1:7]] user_sex[row[0]] = [int(row[7])] user_occ[row[0]] = [int(x) for x in row[8:]] print("User info loaded") for j in range(epochs): def get_index(col): if j/(epochs/len(col)) < len(col): return j/(epochs/len(col)) else: return -1 index = get_index(ks) mindex = get_index(momentums) icurrent_l_w = get_index(l_w) icurrent_l_v = get_index(l_v) icurrent_l_h = get_index(l_h) k = ks[index] momentum = momentums[mindex] current_l_w = l_w[icurrent_l_w] current_l_v = l_v[icurrent_l_v] current_l_h = l_h[icurrent_l_h] train = rbm.cdk_fun(vis_x, vis_o, vis_s, vis_a, vmasks_x, vmasks_o, vmasks_s, vmasks_a, k=k, w_lr=current_l_w, v_lr=current_l_v, h_lr=current_l_h, decay=decay, momentum=momentum) predict = rbm.predict(vis_x, vis_o, vis_s, vis_a) start_time = time.time() for batch_i, batch in enumerate(chunker(profiles.keys(), batch_size)): size = min(len(batch), batch_size) # create needed binary vectors bin_profiles = {} occ_profiles = {} sex_profiles = {} age_profiles = {} masks_x = {} masks_o = {} masks_s = {} masks_a = {} for userid in batch: user_profile = [0.] * len(all_movies) occ_profile = [0.] * len(all_occupations) sex_profile = [0.] * 1 age_profile = [0.] * len(all_ages) mask_x = [0] * (len(all_movies) * 5) mask_o = [1] * (len(all_occupations)) mask_s = [1] * (1) mask_a = [1] * (len(all_ages)) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(5): mask_x[5 * all_movies.index(movie_id) + _i] = 1 mask_o = [1] * len(all_occupations) mask_s = [1] * 1 mask_a = [1] * len(all_ages) example_x = expand(np.array([user_profile])).astype('float32') example_o = expand(np.array([occ_profile]), k=1).astype('float32') example_s = expand(np.array([sex_profile]), k=1).astype('float32') example_a = expand(np.array([age_profile]), k=1).astype('float32') bin_profiles[userid] = example_x occ_profiles[userid] = example_o sex_profiles[userid] = example_s age_profiles[userid] = example_a masks_x[userid] = mask_x masks_o[userid] = mask_o masks_s[userid] = mask_s masks_a[userid] = mask_a profile_batch = [bin_profiles[id] for id in batch] occ_batch = [occ_profiles[id] for id in batch] sex_batch = [sex_profiles[id] for id in batch] age_batch = [age_profiles[id] for id in batch] masks_x_batch = [masks_x[id] for id in batch] masks_o_batch = [masks_o[id] for id in batch] masks_s_batch = [masks_s[id] for id in batch] masks_a_batch = [masks_a[id] for id in batch] train_batch_x = np.array(profile_batch).reshape(size, len(all_movies) * 5) train_batch_o = np.array(occ_batch).reshape(size, len(all_occupations)) train_batch_s = np.array(sex_batch).reshape(size, 1) train_batch_a = np.array(age_batch).reshape(size, len(all_ages)) train_masks_x = np.array(masks_x_batch).reshape(size, len(all_movies) * 5) train_masks_o = np.array(masks_o_batch).reshape(size, len(all_occupations)) train_masks_s = np.array(masks_s_batch).reshape(size, 1) train_masks_a = np.array(masks_a_batch).reshape(size, len(all_ages)) train_masks_x = train_masks_x.astype('float32') train_masks_o = train_masks_o.astype('float32') train_masks_s = train_masks_s.astype('float32') train_masks_a = train_masks_a.astype('float32') train(train_batch_x, train_batch_o, train_batch_s, train_batch_a, train_masks_x, train_masks_o, train_masks_s, train_masks_a) sys.stdout.write('.') sys.stdout.flush() end_time = time.time() train_time = end_time - start_time ratings = [] predictions = [] start_time = time.time() for batch in chunker(tests.keys(), batch_size): size = min(len(batch), batch_size) # create needed binary vectors bin_profiles = {} occ_profiles = {} sex_profiles = {} age_profiles = {} masks_x = {} masks_o = {} masks_s = {} masks_a = {} for userid in batch: user_profile = [0.] * len(all_movies) occ_profile = [0.] * len(all_occupations) sex_profile = [0.] * 1 age_profile = [0.] * len(all_ages) mask_x = [0] * (len(all_movies) * 5) mask_o = [1] * (len(all_occupations)) mask_s = [1] * (1) mask_a = [1] * (len(all_ages)) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(5): mask_x[5 * all_movies.index(movie_id) + _i] = 1 mask_o = [1] * len(all_occupations) mask_s = [1] * 1 mask_a = [1] * len(all_ages) example_x = expand(np.array([user_profile])).astype('float32') example_o = expand(np.array([occ_profile]), k=1).astype('float32') example_s = expand(np.array([sex_profile]), k=1).astype('float32') example_a = expand(np.array([age_profile]), k=1).astype('float32') bin_profiles[userid] = example_x occ_profiles[userid] = example_o sex_profiles[userid] = example_s age_profiles[userid] = example_a masks_x[userid] = mask_x masks_o[userid] = mask_o masks_s[userid] = mask_s masks_a[userid] = mask_a positions = {profile_id: pos for pos, profile_id in enumerate(batch)} profile_batch = [bin_profiles[el] for el in batch] occ_batch = [occ_profiles[el] for el in batch] sex_batch = [sex_profiles[el] for el in batch] age_batch = [age_profiles[el] for el in batch] test_batch_x = np.array(profile_batch).reshape(size, len(all_movies) * 5) test_batch_o = np.array(occ_batch).reshape(size, len(all_occupations)) test_batch_s = np.array(sex_batch).reshape(size, 1) test_batch_a = np.array(age_batch).reshape(size, len(all_ages)) user_preds = revert_expected_value(predict(test_batch_x, test_batch_o, test_batch_s, test_batch_a)) for profile_id in batch: test_movies = tests[profile_id] try: for movie, rating in test_movies: current_profile = user_preds[positions[profile_id]] predicted = current_profile[all_movies.index(movie)] rating = float(rating) ratings.append(rating) predictions.append(predicted) except Exception: pass end_time = time.time() test_time = end_time - start_time true_rat = np.array(ratings, dtype=np.uint8) pred_rat = np.array(predictions, dtype=np.uint8) #print true_rat < 3, true_rat prec_rec = precision_recall_fscore_support(true_rat < 3,pred_rat < 3, average='binary') print prec_rec vabs = np.vectorize(abs) distances = np.array(ratings) - np.array(predictions) mae = vabs(distances).mean() rmse = sqrt((distances ** 2).mean()) iteration_result = { 'iteration': j, 'k': k, 'momentum': momentum, 'mae': mae, 'rmse': rmse, 'lrate': current_l_w, 'train_time': train_time, 'test_time': test_time, 'prec_rec': prec_rec } config_result['results'].append(iteration_result) print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse)) with open('experiments/{}_{}.json'.format(config_name, name), 'wt') as res_output: res_output.write(json.dumps(config_result, indent=4))
def run(name, dataset, config, all_users, all_movies, tests, initial_v, sep): config_name = config['name'] number_hidden = config['number_hidden'] epochs = config['epochs'] ks = config['ks'] momentums = config['momentums'] l_w = config['l_w'] l_v = config['l_v'] l_h = config['l_h'] lr_decay = config['lr_decay'][0] decay = config['decay'] batch_size = config['batch_size'] config_result = config.copy() config_result['results'] = [] vis = T.matrix() vmasks = T.matrix() rbm = CFRBM(len(all_movies) * 20, number_hidden) profiles = defaultdict(list) with open(dataset, 'rt') as data: for i, line in enumerate(data): uid, mid, rat = line.strip().split(sep) profiles[uid].append((mid, float(rat))) current_l_w = l_w[0] current_l_v = l_v[0] current_l_h = l_h[0] print("Users and ratings loaded") for j in range(epochs): print "epochs: ", j def get_index(col): if j / (epochs / len(col)) < len(col): return j / (epochs / len(col)) else: return -1 index = get_index(ks) mindex = get_index(momentums) #icurrent_l_w = get_index(l_w) #icurrent_l_v = get_index(l_v) #icurrent_l_h = get_index(l_h) k = ks[index] momentum = momentums[mindex] current_l_w *= lr_decay current_l_v *= lr_decay current_l_h *= lr_decay train = rbm.cdk_fun(vis, vmasks, k=k, w_lr=current_l_w, v_lr=current_l_v, h_lr=current_l_h, decay=decay, momentum=momentum) predict = rbm.predict(vis) n_batch = 0 users_ids = [] for batch in chunker(tests.keys(), batch_size): n_batch += 1 # print "&*&*" * 20 # print "START OF A BATCH" # print "batch: ", batch users_ids.extend(batch) size = min(len(batch), batch_size) # create needed binary vectors bin_profiles = {} masks = {} for userid in batch: user_profile = [0.] * len(all_movies) mask = [0] * (len(all_movies) * 20) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(20): mask[20 * all_movies.index(movie_id) + _i] = 1 example = expand(np.array([user_profile])).astype('float32') bin_profiles[userid] = example masks[userid] = mask #print np.sum(mask) positions = { profile_id: pos for pos, profile_id in enumerate(batch) } profile_batch = [bin_profiles[el] for el in batch] # print profile_batch[0] # print len(profile_batch[0]) test_batch = np.array(profile_batch).reshape( size, len(all_movies) * 20) # print batch # print "test batch :" # print test_batch # print test_batch.shape #print test_batch[:3,:3] batch_preds = predict(test_batch) user_preds = revert_expected_value(batch_preds, do_round=False) if n_batch == 1: print user_preds[:4, :5] train_batch_i = 0 for batch_i, batch in enumerate(chunker(profiles.keys(), batch_size)): size = min(len(batch), batch_size) train_batch_i += 1 # create needed binary vectors bin_profiles = {} masks = {} for userid in batch: user_profile = [0.] * len(all_movies) mask = [0] * (len(all_movies) * 20) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(20): mask[20 * all_movies.index(movie_id) + _i] = 1 example = expand(np.array([user_profile])).astype('float32') bin_profiles[userid] = example masks[userid] = mask # print example # print len(example[0]) profile_batch = [bin_profiles[id] for id in batch] # print profile_batch[0][0] # print len(profile_batch[0][0]) masks_batch = [masks[id] for id in batch] train_batch = np.array(profile_batch).reshape( size, len(all_movies) * 20) train_masks = np.array(masks_batch).reshape( size, len(all_movies) * 20) train_masks = train_masks.astype('float32') train(train_batch, train_masks) if (train_batch_i % 200 == 0): sys.stdout.write('.') sys.stdout.flush() # print "number of train batches: ", train_batch_i ratings = [] predictions = [] # pickle.dump(all_movies, open("item_ids.pickle", "wb")) # print "###############################################" # print "user ids" # print tests.keys()[1:100] # # print len(tests.keys) # # print type(tests.keys) # print "all users" # print all_users[1:100] # print len(all_users) # print type(all_users) # print "beer ids" # print all_movies[1:100] # print len(all_movies) # print type(all_movies) #reconstruct_mat = np.array([]).reshape(0, 1269) n_batch = 0 users_ids = [] for batch in chunker(tests.keys(), batch_size): n_batch += 1 # print "&*&*" * 20 # print "START OF A BATCH" # print "batch: ", batch users_ids.extend(batch) size = min(len(batch), batch_size) # create needed binary vectors bin_profiles = {} masks = {} for userid in batch: user_profile = [0.] * len(all_movies) mask = [0] * (len(all_movies) * 20) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(20): mask[20 * all_movies.index(movie_id) + _i] = 1 example = expand(np.array([user_profile])).astype('float32') bin_profiles[userid] = example masks[userid] = mask #print np.sum(mask) positions = { profile_id: pos for pos, profile_id in enumerate(batch) } profile_batch = [bin_profiles[el] for el in batch] # print profile_batch[0] # print len(profile_batch[0]) test_batch = np.array(profile_batch).reshape( size, len(all_movies) * 20) #print batch # print "test batch :" # print test_batch # print test_batch.shape batch_preds = predict(test_batch) user_preds = revert_expected_value(batch_preds, do_round=False) #if n_batch == 1: # print test_batch[:2,:] # reconstruct_mat = np.concatenate((reconstruct_mat, user_preds)) # print predict(test_batch) # print "user pred: ", user_preds # print user_preds.shape for profile_id in batch: test_movies = tests[profile_id] try: for movie, rating in test_movies: current_profile = user_preds[positions[profile_id]] predicted = current_profile[all_movies.index(movie)] rating = float(rating) ratings.append(rating) predictions.append(predicted) except Exception: pass #print (np.array(predictions))[0:10] # print "number of test batches: ", n_batch # print reconstruct_mat # pickle.dump(users_ids, open("users_ids.pickle", "wb")) # pickle.dump(reconstruct_mat, open("reconstruct_mat.pickle", "wb")) vabs = np.vectorize(abs) distances = np.array(ratings) - np.array(predictions) mae = vabs(distances).mean() rmse = sqrt((distances**2).mean()) iteration_result = { 'iteration': j, 'k': k, 'momentum': momentum, 'mae': mae, 'rmse': rmse, 'lrate': current_l_w } config_result['results'].append(iteration_result) print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse)) with open('{}_{}.json'.format(config_name, name), 'wt') as res_output: res_output.write(json.dumps(config_result, indent=4)) w = rbm.weights.eval() np.save('weights', w)
def run(name, dataset, config, all_users, all_movies, tests, initial_v, sep): config_name = config['name'] number_hidden = config['number_hidden'] epochs = config['epochs'] ks = config['ks'] momentums = config['momentums'] l_w = config['l_w'] l_v = config['l_v'] l_h = config['l_h'] decay = config['decay'] batch_size = config['batch_size'] config_result = config.copy() config_result['results'] = [] vis = T.matrix() vmasks = T.matrix() rbm = CFRBM(len(all_movies) * 5, number_hidden) profiles = defaultdict(list) #all_ratings = np.zeros((943,1682*5), dtype=np.float32) #all_masks = np.zeros((943,1682*5), dtype=np.float32) with open(dataset, 'rt') as data: for i, line in enumerate(data): uid, mid, rat, timstamp = line.strip().split(sep) profiles[uid].append((mid, float(rat))) #for i in range(1,5): # if i == int(rat): # all_ratings[int(uid)-1][(int(mid)-1)*5+i-1] = 1.0 # all_masks[int(uid)-1][(int(mid)-1)*5+i-1] = 1.0 print("Users and ratings loaded") for j in range(epochs): def get_index(col): if j/(epochs/len(col)) < len(col): return j/(epochs/len(col)) else: return -1 index = get_index(ks) mindex = get_index(momentums) icurrent_l_w = get_index(l_w) icurrent_l_v = get_index(l_v) icurrent_l_h = get_index(l_h) k = ks[index] momentum = momentums[mindex] current_l_w = l_w[icurrent_l_w] current_l_v = l_v[icurrent_l_v] current_l_h = l_h[icurrent_l_h] train = rbm.cdk_fun(vis, vmasks, k=k, w_lr=current_l_w, v_lr=current_l_v, h_lr=current_l_h, decay=decay, momentum=momentum) predict = rbm.predict(vis) #batch_size = 10 start_time = time.time() for batch_i, batch in enumerate(chunker(profiles.keys(), batch_size)): #for batch_i in range(0,943,batch_size): #profile_batch = np.copy(all_ratings[batch_i:batch_i+batch_size]) #masks_batch = np.copy(all_masks[batch_i:batch_i+batch_size]) #print batch_i, len(profile_batch) size = min(len(batch), batch_size) #create needed binary vectors bin_profiles = {} masks = {} for userid in batch: user_profile = [0.] * len(all_movies) mask = [0] * (len(all_movies) * 5) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(5): mask[5 * all_movies.index(movie_id) + _i] = 1 example = expand(np.array([user_profile])).astype('float32') bin_profiles[userid] = example masks[userid] = mask #print example[0].shape,userid,all_ratings[343].shape #print example[0][:20],all_ratings[343][:20],user_profile[:20] profile_batch = [bin_profiles[id] for id in batch] masks_batch = [masks[id] for id in batch] train_batch = np.array(profile_batch).reshape(size, len(all_movies) * 5) train_masks = np.array(masks_batch).reshape(size, len(all_movies) * 5) #print train_batch[0] train_masks = train_masks.astype('float32') train(train_batch, train_masks) #train(movies_batch, masks_batch) sys.stdout.write('.') sys.stdout.flush() end_time = time.time() train_time = end_time - start_time #batch_size = 10 ratings = [] predictions = [] start_time = time.time() for batch in chunker(tests.keys(), batch_size): size = min(len(batch), batch_size) #profile_batch = [] #from_test = [] #for b in batch: # profile_batch.append(all_ratings[int(b)-1]) # users = [0 for x in range(1682)] # for u in tests[b]: # users[int(u[0])-1] = int(u[1]) # from_test.append(users) bin_profiles = {} masks = {} for userid in batch: user_profile = [0.] * len(all_movies) mask = [0] * (len(all_movies) * 5) for movie_id, rat in profiles[userid]: user_profile[all_movies.index(movie_id)] = rat for _i in range(5): mask[5 * all_movies.index(movie_id) + _i] = 1 example = expand(np.array([user_profile])).astype('float32') bin_profiles[userid] = example masks[userid] = mask positions = {profile_id: pos for pos, profile_id in enumerate(batch)} profile_batch = [bin_profiles[el] for el in batch] test_batch = np.array(profile_batch).reshape(size, len(all_movies) * 5) user_preds = revert_expected_value(predict(test_batch)) for profile_id in batch: test_movies = tests[profile_id] try: for movie, rating in test_movies: current_profile = user_preds[positions[profile_id]] predicted = current_profile[all_movies.index(movie)] rating = float(rating) ratings.append(rating) predictions.append(predicted) except Exception: pass end_time = time.time() test_time = end_time - start_time vabs = np.vectorize(abs) distances = np.array(ratings) - np.array(predictions) true_rat = np.array(ratings, dtype=np.uint8) pred_rat = np.array(predictions, dtype=np.uint8) #print true_rat < 3, true_rat prec_rec = precision_recall_fscore_support(true_rat < 3,pred_rat < 3, average='binary') print prec_rec mae = vabs(distances).mean() rmse = sqrt((distances ** 2).mean()) iteration_result = { 'iteration': j, 'k': k, 'momentum': momentum, 'mae': mae, 'rmse': rmse, 'lrate': current_l_w, 'train_time': train_time, 'test_time': test_time, 'prec_rec': prec_rec } config_result['results'].append(iteration_result) print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse)) with open('experiments/{}_{}.json'.format(config_name, name), 'wt') as res_output: res_output.write(json.dumps(config_result, indent=4)) W,V,H = rbm.get_weights() print H
def run(name, dataset, config, all_users, all_movies, tests, initial_v, sep): config_name = config["name"] number_hidden = config["number_hidden"] epochs = config["epochs"] ks = config["ks"] momentums = config["momentums"] l_w = config["l_w"] l_v = config["l_v"] l_h = config["l_h"] decay = config["decay"] config_result = config.copy() config_result["results"] = [] vis = T.matrix() vmasks = T.matrix() rbm = CFRBM(len(all_users) * 5, number_hidden) profiles = defaultdict(list) with open(dataset, "rt") as data: for i, line in enumerate(data): uid, mid, rat, timstamp = line.strip().split(sep) profiles[mid].append((uid, float(rat))) print("Users and ratings loaded") for j in range(epochs): def get_index(col): if j / (epochs / len(col)) < len(col): return j / (epochs / len(col)) else: return -1 index = get_index(ks) mindex = get_index(momentums) icurrent_l_w = get_index(l_w) icurrent_l_v = get_index(l_v) icurrent_l_h = get_index(l_h) k = ks[index] momentum = momentums[mindex] current_l_w = l_w[icurrent_l_w] current_l_v = l_v[icurrent_l_v] current_l_h = l_h[icurrent_l_h] train = rbm.cdk_fun( vis, vmasks, k=k, w_lr=current_l_w, v_lr=current_l_v, h_lr=current_l_h, decay=decay, momentum=momentum ) predict = rbm.predict(vis) batch_size = 10 for batch_i, batch in enumerate(utils.chunker(profiles.keys(), batch_size)): size = min(len(batch), batch_size) # create needed binary vectors bin_profiles = {} masks = {} for movieid in batch: movie_profile = [0.0] * len(all_users) mask = [0] * (len(all_users) * 5) for user_id, rat in profiles[movieid]: movie_profile[all_users.index(user_id)] = rat for _i in range(5): mask[5 * all_users.index(user_id) + _i] = 1 example = expand(np.array([movie_profile])).astype("float32") bin_profiles[movieid] = example masks[movieid] = mask movies_batch = [bin_profiles[id] for id in batch] masks_batch = [masks[id] for id in batch] train_batch = np.array(movies_batch).reshape(size, len(all_users) * 5) train_masks = np.array(masks_batch).reshape(size, len(all_users) * 5) train_masks = train_masks.astype("float32") train(train_batch, train_masks) sys.stdout.write(".") sys.stdout.flush() batch_size = 10 ratings = [] predictions = [] for batch in utils.chunker(tests.keys(), batch_size): size = min(len(batch), batch_size) # create needed binary vectors bin_profiles = {} masks = {} for movieid in batch: movie_profile = [0.0] * len(all_users) mask = [0] * (len(all_users) * 5) for userid, rat in profiles[movieid]: movie_profile[all_users.index(userid)] = rat for _i in range(5): mask[5 * all_users.index(userid) + _i] = 1 example = expand(np.array([movie_profile])).astype("float32") bin_profiles[movieid] = example masks[movieid] = mask positions = {movie_id: pos for pos, movie_id in enumerate(batch)} movies_batch = [bin_profiles[el] for el in batch] test_batch = np.array(movies_batch).reshape(size, len(all_users) * 5) movie_predictions = revert_expected_value(predict(test_batch)) for movie_id in batch: test_users = tests[movie_id] try: for user, rating in test_users: current_movie = movie_predictions[positions[movie_id]] predicted = current_movie[all_users.index(user)] rating = float(rating) ratings.append(rating) predictions.append(predicted) except Exception: pass vabs = np.vectorize(abs) distances = np.array(ratings) - np.array(predictions) mae = vabs(distances).mean() rmse = sqrt((distances ** 2).mean()) iteration_result = { "iteration": j, "k": k, "momentum": momentum, "mae": mae, "rmse": rmse, "lrate": current_l_w, } config_result["results"].append(iteration_result) print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse)) with open("{}_{}.json".format(config_name, name), "wt") as res_output: res_output.write(json.dumps(config_result, indent=4))