def test_export_with_pretrained(tmp_path): config = SampleConfig() config.update({ "model": "resnet18", "dataset": "imagenet", "input_info": { "sample_size": [2, 3, 299, 299] }, "num_classes": 1000, "compression": { "algorithm": "magnitude_sparsity" } }) config_factory = ConfigFactory(config, tmp_path / 'config.json') onnx_path = os.path.join(str(tmp_path), "model.onnx") args = { "--mode": "export", "--config": config_factory.serialize(), "--pretrained": '', "--to-onnx": onnx_path } if not torch.cuda.is_available(): args["--cpu-only"] = True runner = Command(create_command_line(args, "classification")) runner.run() assert os.path.exists(onnx_path)
def test_trained_model_eval(request, config, tmp_path, multiprocessing_distributed, case_common_dirs): depends_on_pretrained_train(request, config["test_case_id"], multiprocessing_distributed) config_factory = ConfigFactory(config['nncf_config'], tmp_path / 'config.json') config_factory.config = update_compression_algo_dict_with_legr_save_load_params( config_factory.config, case_common_dirs['save_coeffs_path']) ckpt_path = os.path.join( case_common_dirs["checkpoint_save_dir"], "distributed" if multiprocessing_distributed else "data_parallel", get_name(config_factory.config) + "_last.pth") args = { "--mode": "test", "--data": config["dataset_path"], "--config": config_factory.serialize(), "--log-dir": tmp_path, "--batch-size": config["batch_size"] * NUM_DEVICES, "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue "--weights": ckpt_path, "--dist-url": "tcp://127.0.0.1:8987" } if not torch.cuda.is_available(): args["--cpu-only"] = True elif multiprocessing_distributed: args["--multiprocessing-distributed"] = True runner = Command(create_command_line(args, config["sample_type"])) runner.run()
def test_export_with_resume(request, config, tmp_path, multiprocessing_distributed, case_common_dirs): depends_on_pretrained_train(request, config["test_case_id"], multiprocessing_distributed) config_factory = ConfigFactory(config['nncf_config'], tmp_path / 'config.json') config_factory.config = update_compression_algo_dict_with_legr_save_load_params( config_factory.config, case_common_dirs['save_coeffs_path'], False) ckpt_path = get_resuming_checkpoint_path( config_factory, multiprocessing_distributed, case_common_dirs["checkpoint_save_dir"]) onnx_path = os.path.join(str(tmp_path), "model.onnx") args = { "--mode": "export", "--config": config_factory.serialize(), "--resume": ckpt_path, "--to-onnx": onnx_path } if not torch.cuda.is_available(): args["--cpu-only"] = True runner = Command(create_command_line(args, config["sample_type"])) runner.run() assert os.path.exists(onnx_path)
def test_compression_train(_params, tmp_path, case_common_dirs): p = _params args = p['args'] tc = p['test_config'] args['config'] = update_compression_config_with_legr_save_load_params( args['config'], case_common_dirs["save_coeffs_path"]) args['mode'] = 'train' args['log-dir'] = tmp_path args[ 'workers'] = 0 # Workaround for PyTorch MultiprocessingDataLoader issues args['seed'] = 1 # Workaround for PyTorch 1.9.1 Multiprocessing issue related to determinism and asym quantization # https://github.com/pytorch/pytorch/issues/61032 if 'mobilenet_v2_asym_int8.json' in args['config']: args.pop('seed') runner = Command( create_command_line(get_cli_dict_args(args), tc['sample_type'])) env_with_cuda_reproducibility = os.environ.copy() env_with_cuda_reproducibility['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' runner.kwargs.update(env=env_with_cuda_reproducibility) runner.run(timeout=tc['timeout']) checkpoint_path = os.path.join(args['checkpoint-save-dir'], tc['checkpoint_name'] + '_best.pth') assert os.path.exists(checkpoint_path) actual_acc = torch.load(checkpoint_path)['best_acc1'] ref_acc = tc['expected_accuracy'] better_accuracy_tolerance = 3 tolerance = tc[ 'absolute_tolerance_train'] if actual_acc < ref_acc else better_accuracy_tolerance assert actual_acc == approx(ref_acc, abs=tolerance)
def test_xnli_eval(self, temp_folder): com_line = "examples/pytorch/text-classification/run_xnli.py --model_name_or_path {output}" \ " --language zh --do_eval --learning_rate 5e-5 --max_seq_length 128 --output_dir" \ " {output} --nncf_config nncf_bert_config_xnli.json --per_gpu_eval_batch_size 24" \ " --max_eval_samples 10" \ .format(output=os.path.join(temp_folder["models"], "xnli")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run()
def test_ner_eval(self, temp_folder): com_line = "examples/pytorch/token-classification/run_ner.py " \ " --model_name_or_path {output} --do_eval " \ " --output_dir {output} --dataset_name conll2003" \ " --max_eval_samples 10" \ " --nncf_config nncf_bert_config_conll.json" \ .format(output=os.path.join(temp_folder["models"], "ner_output")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run()
def test_squad_eval(self, temp_folder): com_line = "examples/pytorch/question-answering/run_qa.py --model_name_or_path {output}" \ " --do_eval --dataset_name squad --learning_rate 3e-5" \ " --max_seq_length 384 --doc_stride 128 --per_gpu_eval_batch_size=4 --output_dir {output} " \ " --max_eval_samples 10" \ " --nncf_config nncf_bert_config_squad.json" \ .format(output=os.path.join(temp_folder["models"], "squad")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run()
def test_lm_eval(self, temp_folder): com_line = "examples/pytorch/language-modeling/run_clm.py " \ " --model_name_or_path {output} --do_eval " \ " --output_dir {output} --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1" \ " --max_eval_samples 10" \ " --nncf_config nncf_gpt2_config_wikitext_hw_config.json" \ .format(output=os.path.join(temp_folder["models"], "lm_output")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run()
def test_glue_distilbert_eval(self, temp_folder): com_line = "examples/pytorch/text-classification/run_glue.py --model_name_or_path {output}" \ " --task_name sst2 --do_eval --max_seq_length 128" \ " --output_dir {output} --validation_file {}/glue/glue_data/SST-2/test.tsv" \ " --max_eval_samples 10" \ " --nncf_config nncf_distilbert_config_sst2.json" \ .format(DATASET_PATH, output=os.path.join(temp_folder["models"], "distilbert_output")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run()
def test_glue_eval(self, temp_folder): com_line = "examples/pytorch/text-classification/run_glue.py --model_name_or_path {output}" \ " --task_name mnli --do_eval --validation_file {}/glue/glue_data/MNLI/dev_matched.tsv " \ " --learning_rate 2e-5" \ " --max_seq_length 128 --output_dir {output}" \ " --max_eval_samples 10" \ " --nncf_config nncf_roberta_config_mnli.json" \ .format(DATASET_PATH, output=os.path.join(temp_folder["models"], "roberta_mnli")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run()
def test_squad_train(self, temp_folder): com_line = "examples/pytorch/question-answering/run_qa.py --model_name_or_path " \ "bert-large-uncased-whole-word-masking-finetuned-squad --dataset_name squad --do_train " \ " --learning_rate 3e-5 --num_train_epochs 0.0001 --max_seq_length 384 --doc_stride 128 " \ " --output_dir {} --per_gpu_train_batch_size=1 --save_steps=200 --nncf_config" \ " nncf_bert_config_squad.json".format(os.path.join(temp_folder["models"], "squad")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "squad", "pytorch_model.bin"))
def test_xnli_train(self, temp_folder): com_line = "examples/pytorch/text-classification/run_xnli.py --model_name_or_path bert-base-chinese" \ " --language zh --train_language zh --do_train --per_gpu_train_batch_size 24" \ " --learning_rate 5e-5 --num_train_epochs 0.0001 --max_seq_length 128 --output_dir {}" \ " --save_steps 200 --nncf_config nncf_bert_config_xnli.json" \ .format(os.path.join(temp_folder["models"], "xnli")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "xnli", "pytorch_model.bin"))
def test_accuracy_aware_training_pipeline(accuracy_aware_config, tmp_path, multiprocessing_distributed): config_factory = ConfigFactory(accuracy_aware_config['nncf_config'], tmp_path / 'config.json') args = { "--mode": "train", "--data": accuracy_aware_config["dataset_path"], "--config": config_factory.serialize(), "--log-dir": tmp_path, "--batch-size": accuracy_aware_config["batch_size"] * NUM_DEVICES, "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue "--epochs": 2, "--dist-url": "tcp://127.0.0.1:8989" } if not torch.cuda.is_available(): args["--cpu-only"] = True elif multiprocessing_distributed: args["--multiprocessing-distributed"] = True runner = Command( create_command_line(args, accuracy_aware_config["sample_type"])) runner.run() from glob import glob time_dir_1 = glob( os.path.join(tmp_path, get_name(config_factory.config), '*/'))[0].split('/')[-2] time_dir_2 = glob( os.path.join(tmp_path, get_name(config_factory.config), time_dir_1, 'accuracy_aware_training', '*/'))[0].split('/')[-2] last_checkpoint_path = os.path.join(tmp_path, get_name(config_factory.config), time_dir_1, 'accuracy_aware_training', time_dir_2, 'acc_aware_checkpoint_last.pth') assert os.path.exists(last_checkpoint_path) if 'compression' in accuracy_aware_config['nncf_config']: allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED, CompressionStage.PARTIALLY_COMPRESSED) else: allowed_compression_stages = (CompressionStage.UNCOMPRESSED, ) compression_stage = extract_compression_stage_from_checkpoint( last_checkpoint_path) assert compression_stage in allowed_compression_stages
def test_resume(request, config, tmp_path, multiprocessing_distributed, case_common_dirs): depends_on_pretrained_train(request, config["test_case_id"], multiprocessing_distributed) checkpoint_save_dir = os.path.join(str(tmp_path), "models") config_factory = ConfigFactory(config['nncf_config'], tmp_path / 'config.json') config_factory.config = update_compression_algo_dict_with_legr_save_load_params( config_factory.config, case_common_dirs['save_coeffs_path'], False) ckpt_path = get_resuming_checkpoint_path( config_factory, multiprocessing_distributed, case_common_dirs["checkpoint_save_dir"]) if "max_iter" in config_factory.config: config_factory.config["max_iter"] += 2 args = { "--mode": "train", "--data": config["dataset_path"], "--config": config_factory.serialize(), "--log-dir": tmp_path, "--batch-size": config["batch_size"] * NUM_DEVICES, "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue "--epochs": 3, "--checkpoint-save-dir": checkpoint_save_dir, "--resume": ckpt_path, "--dist-url": "tcp://127.0.0.1:8986" } if not torch.cuda.is_available(): args["--cpu-only"] = True elif multiprocessing_distributed: args["--multiprocessing-distributed"] = True runner = Command(create_command_line(args, config["sample_type"])) runner.run() last_checkpoint_path = os.path.join( checkpoint_save_dir, get_name(config_factory.config) + "_last.pth") assert os.path.exists(last_checkpoint_path) if 'compression' in config['nncf_config']: allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED, CompressionStage.PARTIALLY_COMPRESSED) else: allowed_compression_stages = (CompressionStage.UNCOMPRESSED, ) compression_stage = extract_compression_stage_from_checkpoint( last_checkpoint_path) assert compression_stage in allowed_compression_stages
def test_glue_train(self, temp_folder): com_line = "examples/pytorch/text-classification/run_glue.py --model_name_or_path" \ " roberta-large-mnli --task_name mnli --do_train " \ " --per_gpu_train_batch_size 4 --learning_rate 2e-5 --num_train_epochs 0.001 --max_seq_length 128 " \ " --output_dir {} --save_steps 200 --nncf_config" \ " nncf_roberta_config_mnli.json" \ .format(os.path.join(temp_folder["models"], "roberta_mnli")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "roberta_mnli", "pytorch_model.bin"))
def test_pretrained_model_train(config, tmp_path, multiprocessing_distributed, case_common_dirs): checkpoint_save_dir = os.path.join( case_common_dirs["checkpoint_save_dir"], "distributed" if multiprocessing_distributed else "data_parallel") config_factory = ConfigFactory(config['nncf_config'], tmp_path / 'config.json') config_factory.config = update_compression_algo_dict_with_legr_save_load_params( config_factory.config, case_common_dirs['save_coeffs_path']) args = { "--mode": "train", "--data": config["dataset_path"], "--config": config_factory.serialize(), "--log-dir": tmp_path, "--batch-size": config["batch_size"] * NUM_DEVICES, "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue "--epochs": 2, "--checkpoint-save-dir": checkpoint_save_dir, "--dist-url": "tcp://127.0.0.1:8989" } if not torch.cuda.is_available(): args["--cpu-only"] = True elif multiprocessing_distributed: args["--multiprocessing-distributed"] = True elif config['nncf_config']["model"] == "inception_v3": pytest.skip( "InceptionV3 may not be trained in DataParallel " "because it outputs namedtuple, which DP seems to be unable " "to support even still.") runner = Command(create_command_line(args, config["sample_type"])) runner.run() last_checkpoint_path = os.path.join( checkpoint_save_dir, get_name(config_factory.config) + "_last.pth") assert os.path.exists(last_checkpoint_path) if 'compression' in config['nncf_config']: allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED, CompressionStage.PARTIALLY_COMPRESSED) else: allowed_compression_stages = (CompressionStage.UNCOMPRESSED, ) compression_stage = extract_compression_stage_from_checkpoint( last_checkpoint_path) assert compression_stage in allowed_compression_stages
def test_glue_distilbert_train(self, temp_folder): com_line = "examples/pytorch/text-classification/run_glue.py --model_name_or_path" \ " distilbert-base-uncased --train_file {}/glue/glue_data/SST-2/train.tsv" \ " --task_name sst2 --do_train --max_seq_length 128 --per_gpu_train_batch_size 8" \ " --learning_rate 5e-5 --num_train_epochs 0.001" \ " --output_dir {} --save_steps 200 --nncf_config" \ " nncf_distilbert_config_sst2.json".format(DATASET_PATH, os.path.join(temp_folder["models"], "distilbert_output")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "distilbert_output", "pytorch_model.bin"))
def test_convert_to_onnx(self, temp_folder): com_line = "examples/pytorch/question-answering/run_qa.py --model_name_or_path {output} " \ " --do_eval" \ " --dataset_name squad " \ " --max_eval_samples 10" \ " --output_dir {output}" \ " --to_onnx {output}/model.onnx" \ " --nncf_config nncf_bert_config_squad.json".format(output=os.path.join(temp_folder["models"], "squad")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "squad", "model.onnx"))
def test_ner_train(self, temp_folder): com_line = "examples/pytorch/token-classification/run_ner.py --model_name_or_path bert-base-uncased" \ " --do_train --per_gpu_train_batch_size 1" \ " --dataset_name conll2003 " \ " --max_train_samples 10" \ " --output_dir {} " \ " --nncf_config nncf_bert_config_conll.json".format(os.path.join(temp_folder["models"], "ner_output")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "ner_output", "pytorch_model.bin"))
def test_lm_train(self, temp_folder): com_line = "examples/pytorch/language-modeling/run_clm.py --model_name_or_path gpt2" \ " --do_train --per_gpu_train_batch_size 1" \ " --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 " \ " --num_train_epochs 0.001" \ " --output_dir {} --nncf_config" \ " nncf_gpt2_config_wikitext_hw_config.json".format(os.path.join(temp_folder["models"], "lm_output")) runner = Command( create_command_line(com_line, self.VENV_ACTIVATE, self.PYTHON_EXECUTABLE, self.CUDA_VISIBLE_STRING), self.TRANSFORMERS_REPO_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "lm_output", "pytorch_model.bin"))
def test_loaded_model_evals_according_to_saved_acc(_params, tmp_path, dataset_dir): p = _params config_path = p['sample_config_path'] checkpoint_path = p['checkpoint_path'] metrics_path = str(tmp_path.joinpath('metrics.json')) tmp_path = str(tmp_path) args = {} if not dataset_dir: dataset_dir = tmp_path args['data'] = dataset_dir args['dataset'] = p['dataset'] args['config'] = str(config_path) args['mode'] = 'test' args['log-dir'] = tmp_path args[ 'workers'] = 0 # Workaroundr the PyTorch MultiProcessingDataLoader issue args['seed'] = 1 args['resume'] = checkpoint_path args['metrics-dump'] = metrics_path if p['execution_mode'] == ExecutionMode.MULTIPROCESSING_DISTRIBUTED: args['multiprocessing-distributed'] = '' else: pytest.skip( "DataParallel eval takes too long for this test to be run during pre-commit" ) runner = Command( create_command_line(get_cli_dict_args(args), "classification")) runner.run() with open(metrics_path, encoding='utf8') as metric_file: metrics = json.load(metric_file) # accuracy is rounded to hundredths assert torch.load(checkpoint_path)['best_acc1'] == pytest.approx( metrics['Accuracy'], abs=1e-2)
def test_compression_eval_trained(_params, tmp_path, case_common_dirs): p = _params args = p['args'] tc = p['test_config'] args['config'] = update_compression_config_with_legr_save_load_params( args['config'], case_common_dirs["save_coeffs_path"], False) args['mode'] = 'test' args['log-dir'] = tmp_path args[ 'workers'] = 0 # Workaround for PyTorch MultiprocessingDataLoader issues args['seed'] = 1 # Workaround for PyTorch 1.9.1 Multiprocessing issue related to determinism and asym quantization # https://github.com/pytorch/pytorch/issues/61032 if 'mobilenet_v2_asym_int8.json' in args['config']: args.pop('seed') checkpoint_path = os.path.join(args['checkpoint-save-dir'], tc['checkpoint_name'] + '_best.pth') args['resume'] = checkpoint_path METRIC_FILE_PATH = tmp_path / 'metrics.json' args['metrics-dump'] = tmp_path / METRIC_FILE_PATH if 'weights' in args: del args['weights'] runner = Command( create_command_line(get_cli_dict_args(args), tc['sample_type'])) env_with_cuda_reproducibility = os.environ.copy() env_with_cuda_reproducibility['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' runner.kwargs.update(env=env_with_cuda_reproducibility) runner.run(timeout=tc['timeout']) with open(str(METRIC_FILE_PATH), encoding='utf8') as metric_file: metrics = json.load(metric_file) acc1 = metrics['Accuracy'] assert torch.load(checkpoint_path)['best_acc1'] == approx( acc1, abs=tc['absolute_tolerance_eval'])
def test_force_cuda_build(tmp_venv_with_nncf, install_type, tmp_path, package_type): ''' Check that CUDA Extensions weren't initially built and \ then with TORCH_CUDA_ARCH_LIST were forced to be built ''' cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH') if cuda_home is None: try: nvcc = subprocess.check_output(['which', 'nvcc']) cuda_home = os.path.dirname(os.path.dirname(nvcc)) except subprocess.CalledProcessError: if not cuda_home: cuda_home = '/usr/local/cuda' if not os.path.exists(cuda_home): cuda_home = None if not cuda_home and not torch.cuda.is_available(): pytest.skip( 'There is no CUDA on the machine. The test will be skipped') venv_path = tmp_venv_with_nncf torch_build_dir = tmp_path / 'extensions' export_env_variables = "export CUDA_VISIBLE_DEVICES='' export TORCH_EXTENSIONS_DIR={}".format( torch_build_dir) python_executable_with_venv = ". {0}/bin/activate && {1} && {0}/bin/python".format( venv_path, export_env_variables) run_path = tmp_path / 'run' shutil.copy(TEST_ROOT / 'torch' / EXTENSIONS_BUILD_FILENAME, run_path) torch_ext_dir = pathlib.Path(torch_build_dir) assert not torch_ext_dir.exists() mode = 'cpu' command = Command("{} {}/extensions_build_checks.py {}".format( python_executable_with_venv, run_path, mode), path=run_path) command.run() cpu_ext_dir = (torch_ext_dir / 'quantized_functions_cpu') assert cpu_ext_dir.exists() cpu_ext_so = (cpu_ext_dir / 'quantized_functions_cpu.so') assert cpu_ext_so.exists() cuda_ext_dir = (torch_ext_dir / 'quantized_functions_cuda') assert not cuda_ext_dir.exists() cuda_ext_so = (cuda_ext_dir / 'quantized_functions_cuda.so') assert not cuda_ext_so.exists() cpu_ext_dir = (torch_ext_dir / 'binarized_functions_cpu') assert cpu_ext_dir.exists() cpu_ext_so = (cpu_ext_dir / 'binarized_functions_cpu.so') assert cpu_ext_so.exists() cuda_ext_dir = (torch_ext_dir / 'binarized_functions_cuda') assert not cuda_ext_dir.exists() cuda_ext_so = (cuda_ext_dir / 'binarized_functions_cuda.so') assert not cuda_ext_so.exists() mode = 'cuda' command = Command("{} {}/extensions_build_checks.py {}".format( python_executable_with_venv, run_path, mode), path=run_path) command.run() cuda_ext_dir = (torch_ext_dir / 'quantized_functions_cuda') assert cuda_ext_dir.exists() cuda_ext_so = (cuda_ext_dir / 'quantized_functions_cuda.so') assert cuda_ext_so.exists() cuda_ext_dir = (torch_ext_dir / 'binarized_functions_cuda') assert cuda_ext_dir.exists() cuda_ext_so = (cuda_ext_dir / 'binarized_functions_cuda.so') assert cuda_ext_so.exists()