def main(): args = parse_args() state_dict = torch.load(args.model_checkpoint_path, map_location=torch.device("cpu")) layers = [state_dict["mlp.0.weight"].size()[1]] layer_keys = sorted([ k for k in state_dict.keys() if (k.startswith("mlp") and k.endswith("bias")) ]) layers.extend([state_dict[k].size()[0] for k in layer_keys]) n_items = state_dict["mf_item_embed.weight"].size()[0] n_users = state_dict["mf_user_embed.weight"].size()[0] model = NeuMF( nb_users=n_users, nb_items=n_items, mf_dim=state_dict["mf_item_embed.weight"].size()[1], mlp_layer_sizes=layers, dropout=0.5, ) model.load_state_dict(state_dict) model.eval() users = torch.LongTensor(np.full(n_items, n_users - 1)) items = torch.LongTensor(np.arange(n_items, dtype=np.int64)) predictions = model(users, items, sigmoid=True) predictions = predictions.detach().numpy().squeeze() if args.output_dir != "./": try: os.makedirs(args.output_dir, exist_ok=True) except OSError as err: print("Failed to create output directory: {}".format(err)) sys.exit(-1) names = [] with open(args.movie_db_file) as fh: for line in fh: mid, _, name = line.partition(",") name, _, tail = name.rpartition(",") name = name.strip('"') names.append(name) predictions_file = os.path.join(args.output_dir, "predictions.csv") with open(predictions_file, "wt") as fh: fh.write("Movie, Predicted Rating\n") argsorts = np.argsort(predictions)[::-1] for idx in argsorts: fh.write("{}, {:0.2f}\n".format(names[idx], predictions[idx] * 5)) print("Predictions saved to {}".format(predictions_file)) return
def main(): args = parse_args() dllogger.init(backends=[ dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=args.log_path), dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE) ]) dllogger.log(data=vars(args), step='PARAMETER') model = NeuMF(nb_users=args.n_users, nb_items=args.n_items, mf_dim=args.factors, mlp_layer_sizes=args.layers, dropout=args.dropout) model = model.cuda() if args.load_checkpoint_path: state_dict = torch.load(args.load_checkpoint_path) model.load_state_dict(state_dict) if args.opt_level == "O2": model = amp.initialize(model, opt_level=args.opt_level, keep_batchnorm_fp32=False, loss_scale='dynamic') model.eval() users = torch.cuda.LongTensor(args.batch_size).random_(0, args.n_users) items = torch.cuda.LongTensor(args.batch_size).random_(0, args.n_items) latencies = [] for _ in range(args.num_batches): torch.cuda.synchronize() start = time.time() predictions = model(users, items, sigmoid=True) torch.cuda.synchronize() latencies.append(time.time() - start) dllogger.log(data={ 'batch_size': args.batch_size, 'best_inference_throughput': args.batch_size / min(latencies), 'best_inference_latency': min(latencies), 'mean_inference_throughput': args.batch_size / np.mean(latencies), 'mean_inference_latency': np.mean(latencies), 'inference_latencies': latencies }, step=tuple()) dllogger.flush() return
def main(): args = parse_args() dllogger.init(backends=[dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=args.log_path), dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)]) dllogger.log(data=vars(args), step='PARAMETER') model = NeuMF(nb_users=args.n_users, nb_items=args.n_items, mf_dim=args.factors, mlp_layer_sizes=args.layers, dropout=args.dropout) model = model.cuda() if args.load_checkpoint_path: state_dict = torch.load(args.load_checkpoint_path) model.load_state_dict(state_dict) if args.fp16: model.half() model.eval() batch_sizes = args.batch_sizes.split(',') batch_sizes = [int(s) for s in batch_sizes] result_data = {} for batch_size in batch_sizes: print('benchmarking batch size: ', batch_size) users = torch.cuda.LongTensor(batch_size).random_(0, args.n_users) items = torch.cuda.LongTensor(batch_size).random_(0, args.n_items) latencies = [] for _ in range(args.num_batches): torch.cuda.synchronize() start = time.time() _ = model(users, items, sigmoid=True) torch.cuda.synchronize() latencies.append(time.time() - start) result_data[f'batch_{batch_size}_mean_throughput'] = batch_size / np.mean(latencies) result_data[f'batch_{batch_size}_mean_latency'] = np.mean(latencies) result_data[f'batch_{batch_size}_p90_latency'] = np.percentile(latencies, 0.90) result_data[f'batch_{batch_size}_p95_latency'] = np.percentile(latencies, 0.95) result_data[f'batch_{batch_size}_p99_latency'] = np.percentile(latencies, 0.99) dllogger.log(data=result_data, step=tuple()) dllogger.flush() return
def main(): log_hardware() args = parse_args() log_args(args) model = NeuMF(nb_users=args.n_users, nb_items=args.n_items, mf_dim=args.factors, mlp_layer_sizes=args.layers, dropout=args.dropout) model = model.cuda() if args.load_checkpoint_path: state_dict = torch.load(args.load_checkpoint_path) model.load_state_dict(state_dict) if args.opt_level == "O2": model = amp.initialize(model, opt_level=args.opt_level, keep_batchnorm_fp32=False, loss_scale='dynamic') model.eval() users = torch.cuda.LongTensor(args.batch_size).random_(0, args.n_users) items = torch.cuda.LongTensor(args.batch_size).random_(0, args.n_items) latencies = [] for _ in range(args.num_batches): torch.cuda.synchronize() start = time.time() predictions = model(users, items, sigmoid=True) torch.cuda.synchronize() latencies.append(time.time() - start) LOGGER.log(key='batch_size', value=args.batch_size) LOGGER.log(key='best_inference_throughput', value=args.batch_size / min(latencies)) LOGGER.log(key='best_inference_latency', value=min(latencies)) LOGGER.log(key='inference_latencies', value=latencies) return