print(nb_train) validate_data_path = data_dir + 'validate.txt' validate_instances = utils.read_instances_lines_from_file(validate_data_path) nb_validate = len(validate_instances) print(nb_validate) test_data_path = data_dir + 'test.txt' test_instances = utils.read_instances_lines_from_file(test_data_path) nb_test = len(test_instances) print(nb_test) ### build knowledge ### print("@Build knowledge") MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs = utils.build_knowledge(train_instances, validate_instances) print("#Statistic") NB_ITEMS = len(item_dict) print(" + Maximum sequence length: ", MAX_SEQ_LENGTH) print(" + Total items: ", NB_ITEMS) print('density of C matrix: %.6f' % (real_adj_matrix.nnz * 1.0 / NB_ITEMS / NB_ITEMS)) batch_size = args.batch_size # train_loader = data_utils.generate_data_loader(train_instances, load_param['batch_size'], item_dict, MAX_SEQ_LENGTH, is_bseq=True, is_shuffle=True) # valid_loader = data_utils.generate_data_loader(validate_instances, load_param['batch_size'], item_dict, MAX_SEQ_LENGTH, is_bseq=True, is_shuffle=False) test_loader = data_utils.generate_data_loader(test_instances, batch_size, item_dict, MAX_SEQ_LENGTH, is_bseq=True, is_shuffle=False) pre_trained_model = model.RecSysModel(load_param, MAX_SEQ_LENGTH, item_probs, real_adj_matrix.todense(), device, model_data_type) pre_trained_model.to(device, dtype= model_data_type) optimizer = torch.optim.RMSprop(pre_trained_model.parameters(), lr= 0.001)
nb_predict = args.nb_predict topk = args.topk ex_file = args.example_file data_dir = f_dir train_data_path = data_dir + 'train_lines.txt' train_instances = utils.read_instances_lines_from_file(train_data_path) nb_train = len(train_instances) # print(nb_train) test_data_path = data_dir + 'test_lines.txt' test_instances = utils.read_instances_lines_from_file(test_data_path) nb_test = len(test_instances) # print(nb_test) # print("---------------------@Build knowledge-------------------------------") MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs, item_freq_dict, user_dict = utils.build_knowledge( train_instances + test_instances) if not os.path.exists(o_dir): os.makedirs(o_dir) saved_file = os.path.join(o_dir, model_name) # print("Save model in ", saved_file) f = np.load(saved_file, item_probs) item_probs = f['item_probs'] # print(item_probs) pop_model = POP(item_dict, reversed_item_dict, item_probs) if ex_file is not None: ex_instances = utils.read_instances_lines_from_file(ex_file) else: ex_instances = test_instances for i in random.sample(ex_instances, nb_predict):
validate_instances = utils.read_file_as_lines(validate_file) nb_validate = len(validate_instances) total_validate_batches = utils.compute_total_batches(nb_validate, config.batch_size) print(" + Total validating sequences: ", nb_validate) print(" + #batches in validate ", total_validate_batches) testing_instances = utils.read_file_as_lines(testing_file) nb_test = len(testing_instances) total_test_batches = utils.compute_total_batches(nb_test, config.batch_size) print(" + Total testing sequences: ", nb_test) print(" + #batches in test ", total_test_batches) # Create dictionary print("@Build knowledge") MAX_SEQ_LENGTH, item_dict = utils.build_knowledge(training_instances, validate_instances) print("#Statistic") NB_ITEMS = len(item_dict) print(" + Maximum sequence length: ", MAX_SEQ_LENGTH) print(" + Total items: ", NB_ITEMS) model_dir = output_dir + "/models" if config.train_mode: with tf.Session(config=gpu_config) as sess: # Init the network net = procedure.create_network(sess, MAX_SEQ_LENGTH, NB_ITEMS, config) sess.run(tf.global_variables_initializer()) # Train the network train_generator = utils.seq_batch_generator(training_instances,
print(nb_train) validate_data_path = data_dir + 'validate.txt' valid_instances = utils.read_instances_lines_from_file(validate_data_path) nb_validate = len(valid_instances) print(nb_validate) test_data_path = data_dir + 'test.txt' test_instances = utils.read_instances_lines_from_file(test_data_path) nb_test = len(test_instances) print(nb_test) ### build knowledge ### print("---------------------@Build knowledge-------------------------------") MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs, user_consumption_dict = utils.build_knowledge( train_instances, valid_instances, test_instances) config_param = dict() config_param['batch_size'] = 16 print('---------------------Create data loader--------------------') train_loader = data_utils.generate_data_loader(train_instances[:1000], config_param['batch_size'], item_dict, MAX_SEQ_LENGTH, is_bseq=True, is_shuffle=True) valid_loader = data_utils.generate_data_loader(valid_instances[:500], config_param['batch_size'], item_dict, MAX_SEQ_LENGTH, is_bseq=True,