Пример #1
0
def test_export_with_pretrained(tmp_path):
    config = SampleConfig()
    config.update({
        "model": "resnet18",
        "dataset": "imagenet",
        "input_info": {
            "sample_size": [2, 3, 299, 299]
        },
        "num_classes": 1000,
        "compression": {
            "algorithm": "magnitude_sparsity"
        }
    })
    config_factory = ConfigFactory(config, tmp_path / 'config.json')

    onnx_path = os.path.join(str(tmp_path), "model.onnx")
    args = {
        "--mode": "export",
        "--config": config_factory.serialize(),
        "--pretrained": '',
        "--to-onnx": onnx_path
    }

    if not torch.cuda.is_available():
        args["--cpu-only"] = True

    runner = Command(create_command_line(args, "classification"))
    runner.run()
    assert os.path.exists(onnx_path)
Пример #2
0
def test_trained_model_eval(request, config, tmp_path,
                            multiprocessing_distributed, case_common_dirs):
    depends_on_pretrained_train(request, config["test_case_id"],
                                multiprocessing_distributed)
    config_factory = ConfigFactory(config['nncf_config'],
                                   tmp_path / 'config.json')
    config_factory.config = update_compression_algo_dict_with_legr_save_load_params(
        config_factory.config, case_common_dirs['save_coeffs_path'])

    ckpt_path = os.path.join(
        case_common_dirs["checkpoint_save_dir"],
        "distributed" if multiprocessing_distributed else "data_parallel",
        get_name(config_factory.config) + "_last.pth")
    args = {
        "--mode": "test",
        "--data": config["dataset_path"],
        "--config": config_factory.serialize(),
        "--log-dir": tmp_path,
        "--batch-size": config["batch_size"] * NUM_DEVICES,
        "--workers":
        0,  # Workaround for the PyTorch MultiProcessingDataLoader issue
        "--weights": ckpt_path,
        "--dist-url": "tcp://127.0.0.1:8987"
    }

    if not torch.cuda.is_available():
        args["--cpu-only"] = True
    elif multiprocessing_distributed:
        args["--multiprocessing-distributed"] = True

    runner = Command(create_command_line(args, config["sample_type"]))
    runner.run()
Пример #3
0
def test_export_with_resume(request, config, tmp_path,
                            multiprocessing_distributed, case_common_dirs):
    depends_on_pretrained_train(request, config["test_case_id"],
                                multiprocessing_distributed)
    config_factory = ConfigFactory(config['nncf_config'],
                                   tmp_path / 'config.json')
    config_factory.config = update_compression_algo_dict_with_legr_save_load_params(
        config_factory.config, case_common_dirs['save_coeffs_path'], False)

    ckpt_path = get_resuming_checkpoint_path(
        config_factory, multiprocessing_distributed,
        case_common_dirs["checkpoint_save_dir"])

    onnx_path = os.path.join(str(tmp_path), "model.onnx")
    args = {
        "--mode": "export",
        "--config": config_factory.serialize(),
        "--resume": ckpt_path,
        "--to-onnx": onnx_path
    }

    if not torch.cuda.is_available():
        args["--cpu-only"] = True

    runner = Command(create_command_line(args, config["sample_type"]))
    runner.run()
    assert os.path.exists(onnx_path)
Пример #4
0
def test_compression_train(_params, tmp_path, case_common_dirs):
    p = _params
    args = p['args']
    tc = p['test_config']

    args['config'] = update_compression_config_with_legr_save_load_params(
        args['config'], case_common_dirs["save_coeffs_path"])

    args['mode'] = 'train'
    args['log-dir'] = tmp_path
    args[
        'workers'] = 0  # Workaround for PyTorch MultiprocessingDataLoader issues
    args['seed'] = 1
    # Workaround for PyTorch 1.9.1 Multiprocessing issue related to determinism and asym quantization
    # https://github.com/pytorch/pytorch/issues/61032
    if 'mobilenet_v2_asym_int8.json' in args['config']:
        args.pop('seed')

    runner = Command(
        create_command_line(get_cli_dict_args(args), tc['sample_type']))
    env_with_cuda_reproducibility = os.environ.copy()
    env_with_cuda_reproducibility['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
    runner.kwargs.update(env=env_with_cuda_reproducibility)
    runner.run(timeout=tc['timeout'])

    checkpoint_path = os.path.join(args['checkpoint-save-dir'],
                                   tc['checkpoint_name'] + '_best.pth')
    assert os.path.exists(checkpoint_path)
    actual_acc = torch.load(checkpoint_path)['best_acc1']
    ref_acc = tc['expected_accuracy']
    better_accuracy_tolerance = 3
    tolerance = tc[
        'absolute_tolerance_train'] if actual_acc < ref_acc else better_accuracy_tolerance
    assert actual_acc == approx(ref_acc, abs=tolerance)
 def test_xnli_eval(self, temp_folder):
     com_line = "examples/pytorch/text-classification/run_xnli.py --model_name_or_path {output}" \
                " --language zh --do_eval --learning_rate 5e-5 --max_seq_length 128 --output_dir" \
                " {output} --nncf_config nncf_bert_config_xnli.json --per_gpu_eval_batch_size 24" \
                " --max_eval_samples 10" \
         .format(output=os.path.join(temp_folder["models"], "xnli"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
 def test_ner_eval(self, temp_folder):
     com_line = "examples/pytorch/token-classification/run_ner.py " \
                " --model_name_or_path {output} --do_eval " \
                " --output_dir {output} --dataset_name conll2003" \
                " --max_eval_samples 10" \
                " --nncf_config nncf_bert_config_conll.json" \
         .format(output=os.path.join(temp_folder["models"], "ner_output"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
 def test_squad_eval(self, temp_folder):
     com_line = "examples/pytorch/question-answering/run_qa.py --model_name_or_path {output}" \
                " --do_eval --dataset_name squad  --learning_rate 3e-5" \
                " --max_seq_length 384 --doc_stride 128 --per_gpu_eval_batch_size=4 --output_dir {output} " \
                " --max_eval_samples 10" \
                " --nncf_config nncf_bert_config_squad.json" \
         .format(output=os.path.join(temp_folder["models"], "squad"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
 def test_lm_eval(self, temp_folder):
     com_line = "examples/pytorch/language-modeling/run_clm.py " \
                " --model_name_or_path {output} --do_eval " \
                " --output_dir {output} --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1" \
                " --max_eval_samples 10" \
                " --nncf_config nncf_gpt2_config_wikitext_hw_config.json" \
         .format(output=os.path.join(temp_folder["models"], "lm_output"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
 def test_glue_distilbert_eval(self, temp_folder):
     com_line = "examples/pytorch/text-classification/run_glue.py --model_name_or_path {output}" \
                " --task_name sst2 --do_eval --max_seq_length 128" \
                " --output_dir {output} --validation_file {}/glue/glue_data/SST-2/test.tsv" \
                " --max_eval_samples 10" \
                " --nncf_config nncf_distilbert_config_sst2.json" \
         .format(DATASET_PATH, output=os.path.join(temp_folder["models"], "distilbert_output"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
 def test_glue_eval(self, temp_folder):
     com_line = "examples/pytorch/text-classification/run_glue.py --model_name_or_path {output}" \
                " --task_name mnli --do_eval --validation_file {}/glue/glue_data/MNLI/dev_matched.tsv " \
                " --learning_rate 2e-5" \
                " --max_seq_length 128 --output_dir {output}" \
                " --max_eval_samples 10" \
                " --nncf_config nncf_roberta_config_mnli.json" \
         .format(DATASET_PATH, output=os.path.join(temp_folder["models"], "roberta_mnli"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
 def test_squad_train(self, temp_folder):
     com_line = "examples/pytorch/question-answering/run_qa.py --model_name_or_path " \
                "bert-large-uncased-whole-word-masking-finetuned-squad --dataset_name squad --do_train " \
                " --learning_rate 3e-5 --num_train_epochs 0.0001 --max_seq_length 384 --doc_stride 128 " \
                " --output_dir {} --per_gpu_train_batch_size=1 --save_steps=200 --nncf_config" \
                " nncf_bert_config_squad.json".format(os.path.join(temp_folder["models"], "squad"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
     assert os.path.exists(
         os.path.join(temp_folder["models"], "squad", "pytorch_model.bin"))
 def test_xnli_train(self, temp_folder):
     com_line = "examples/pytorch/text-classification/run_xnli.py --model_name_or_path bert-base-chinese" \
                " --language zh --train_language zh --do_train --per_gpu_train_batch_size 24" \
                " --learning_rate 5e-5 --num_train_epochs 0.0001 --max_seq_length 128 --output_dir {}" \
                " --save_steps 200 --nncf_config nncf_bert_config_xnli.json" \
         .format(os.path.join(temp_folder["models"], "xnli"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
     assert os.path.exists(
         os.path.join(temp_folder["models"], "xnli", "pytorch_model.bin"))
Пример #13
0
def test_accuracy_aware_training_pipeline(accuracy_aware_config, tmp_path,
                                          multiprocessing_distributed):
    config_factory = ConfigFactory(accuracy_aware_config['nncf_config'],
                                   tmp_path / 'config.json')

    args = {
        "--mode": "train",
        "--data": accuracy_aware_config["dataset_path"],
        "--config": config_factory.serialize(),
        "--log-dir": tmp_path,
        "--batch-size": accuracy_aware_config["batch_size"] * NUM_DEVICES,
        "--workers":
        0,  # Workaround for the PyTorch MultiProcessingDataLoader issue
        "--epochs": 2,
        "--dist-url": "tcp://127.0.0.1:8989"
    }

    if not torch.cuda.is_available():
        args["--cpu-only"] = True
    elif multiprocessing_distributed:
        args["--multiprocessing-distributed"] = True

    runner = Command(
        create_command_line(args, accuracy_aware_config["sample_type"]))
    runner.run()

    from glob import glob
    time_dir_1 = glob(
        os.path.join(tmp_path, get_name(config_factory.config),
                     '*/'))[0].split('/')[-2]
    time_dir_2 = glob(
        os.path.join(tmp_path, get_name(config_factory.config), time_dir_1,
                     'accuracy_aware_training', '*/'))[0].split('/')[-2]
    last_checkpoint_path = os.path.join(tmp_path,
                                        get_name(config_factory.config),
                                        time_dir_1, 'accuracy_aware_training',
                                        time_dir_2,
                                        'acc_aware_checkpoint_last.pth')

    assert os.path.exists(last_checkpoint_path)
    if 'compression' in accuracy_aware_config['nncf_config']:
        allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED,
                                      CompressionStage.PARTIALLY_COMPRESSED)
    else:
        allowed_compression_stages = (CompressionStage.UNCOMPRESSED, )
    compression_stage = extract_compression_stage_from_checkpoint(
        last_checkpoint_path)
    assert compression_stage in allowed_compression_stages
Пример #14
0
def test_resume(request, config, tmp_path, multiprocessing_distributed,
                case_common_dirs):
    depends_on_pretrained_train(request, config["test_case_id"],
                                multiprocessing_distributed)
    checkpoint_save_dir = os.path.join(str(tmp_path), "models")
    config_factory = ConfigFactory(config['nncf_config'],
                                   tmp_path / 'config.json')
    config_factory.config = update_compression_algo_dict_with_legr_save_load_params(
        config_factory.config, case_common_dirs['save_coeffs_path'], False)

    ckpt_path = get_resuming_checkpoint_path(
        config_factory, multiprocessing_distributed,
        case_common_dirs["checkpoint_save_dir"])
    if "max_iter" in config_factory.config:
        config_factory.config["max_iter"] += 2
    args = {
        "--mode": "train",
        "--data": config["dataset_path"],
        "--config": config_factory.serialize(),
        "--log-dir": tmp_path,
        "--batch-size": config["batch_size"] * NUM_DEVICES,
        "--workers":
        0,  # Workaround for the PyTorch MultiProcessingDataLoader issue
        "--epochs": 3,
        "--checkpoint-save-dir": checkpoint_save_dir,
        "--resume": ckpt_path,
        "--dist-url": "tcp://127.0.0.1:8986"
    }

    if not torch.cuda.is_available():
        args["--cpu-only"] = True
    elif multiprocessing_distributed:
        args["--multiprocessing-distributed"] = True

    runner = Command(create_command_line(args, config["sample_type"]))
    runner.run()
    last_checkpoint_path = os.path.join(
        checkpoint_save_dir,
        get_name(config_factory.config) + "_last.pth")
    assert os.path.exists(last_checkpoint_path)
    if 'compression' in config['nncf_config']:
        allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED,
                                      CompressionStage.PARTIALLY_COMPRESSED)
    else:
        allowed_compression_stages = (CompressionStage.UNCOMPRESSED, )
    compression_stage = extract_compression_stage_from_checkpoint(
        last_checkpoint_path)
    assert compression_stage in allowed_compression_stages
 def test_glue_train(self, temp_folder):
     com_line = "examples/pytorch/text-classification/run_glue.py --model_name_or_path" \
                " roberta-large-mnli --task_name mnli --do_train " \
                " --per_gpu_train_batch_size 4 --learning_rate 2e-5 --num_train_epochs 0.001 --max_seq_length 128 " \
                " --output_dir {} --save_steps 200 --nncf_config" \
                " nncf_roberta_config_mnli.json" \
         .format(os.path.join(temp_folder["models"], "roberta_mnli"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
     assert os.path.exists(
         os.path.join(temp_folder["models"], "roberta_mnli",
                      "pytorch_model.bin"))
Пример #16
0
def test_pretrained_model_train(config, tmp_path, multiprocessing_distributed,
                                case_common_dirs):
    checkpoint_save_dir = os.path.join(
        case_common_dirs["checkpoint_save_dir"],
        "distributed" if multiprocessing_distributed else "data_parallel")
    config_factory = ConfigFactory(config['nncf_config'],
                                   tmp_path / 'config.json')
    config_factory.config = update_compression_algo_dict_with_legr_save_load_params(
        config_factory.config, case_common_dirs['save_coeffs_path'])

    args = {
        "--mode": "train",
        "--data": config["dataset_path"],
        "--config": config_factory.serialize(),
        "--log-dir": tmp_path,
        "--batch-size": config["batch_size"] * NUM_DEVICES,
        "--workers":
        0,  # Workaround for the PyTorch MultiProcessingDataLoader issue
        "--epochs": 2,
        "--checkpoint-save-dir": checkpoint_save_dir,
        "--dist-url": "tcp://127.0.0.1:8989"
    }

    if not torch.cuda.is_available():
        args["--cpu-only"] = True
    elif multiprocessing_distributed:
        args["--multiprocessing-distributed"] = True
    elif config['nncf_config']["model"] == "inception_v3":
        pytest.skip(
            "InceptionV3 may not be trained in DataParallel "
            "because it outputs namedtuple, which DP seems to be unable "
            "to support even still.")

    runner = Command(create_command_line(args, config["sample_type"]))
    runner.run()
    last_checkpoint_path = os.path.join(
        checkpoint_save_dir,
        get_name(config_factory.config) + "_last.pth")
    assert os.path.exists(last_checkpoint_path)
    if 'compression' in config['nncf_config']:
        allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED,
                                      CompressionStage.PARTIALLY_COMPRESSED)
    else:
        allowed_compression_stages = (CompressionStage.UNCOMPRESSED, )
    compression_stage = extract_compression_stage_from_checkpoint(
        last_checkpoint_path)
    assert compression_stage in allowed_compression_stages
 def test_glue_distilbert_train(self, temp_folder):
     com_line = "examples/pytorch/text-classification/run_glue.py --model_name_or_path" \
                " distilbert-base-uncased --train_file {}/glue/glue_data/SST-2/train.tsv" \
                " --task_name sst2 --do_train --max_seq_length 128 --per_gpu_train_batch_size 8" \
                " --learning_rate 5e-5 --num_train_epochs 0.001" \
                " --output_dir {} --save_steps 200 --nncf_config" \
                " nncf_distilbert_config_sst2.json".format(DATASET_PATH, os.path.join(temp_folder["models"],
                                                                                      "distilbert_output"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
     assert os.path.exists(
         os.path.join(temp_folder["models"], "distilbert_output",
                      "pytorch_model.bin"))
 def test_convert_to_onnx(self, temp_folder):
     com_line = "examples/pytorch/question-answering/run_qa.py --model_name_or_path {output} " \
                " --do_eval" \
                " --dataset_name squad " \
                " --max_eval_samples 10" \
                " --output_dir {output}" \
                " --to_onnx {output}/model.onnx" \
                " --nncf_config nncf_bert_config_squad.json".format(output=os.path.join(temp_folder["models"],
                                                                                        "squad"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
     assert os.path.exists(
         os.path.join(temp_folder["models"], "squad", "model.onnx"))
 def test_ner_train(self, temp_folder):
     com_line = "examples/pytorch/token-classification/run_ner.py --model_name_or_path bert-base-uncased" \
                " --do_train --per_gpu_train_batch_size 1" \
                " --dataset_name conll2003 " \
                " --max_train_samples 10" \
                " --output_dir {} " \
                " --nncf_config nncf_bert_config_conll.json".format(os.path.join(temp_folder["models"],
                                                                                 "ner_output"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
     assert os.path.exists(
         os.path.join(temp_folder["models"], "ner_output",
                      "pytorch_model.bin"))
 def test_lm_train(self, temp_folder):
     com_line = "examples/pytorch/language-modeling/run_clm.py --model_name_or_path gpt2" \
                " --do_train --per_gpu_train_batch_size 1" \
                " --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 " \
                " --num_train_epochs 0.001" \
                " --output_dir {} --nncf_config" \
                " nncf_gpt2_config_wikitext_hw_config.json".format(os.path.join(temp_folder["models"],
                                                                                "lm_output"))
     runner = Command(
         create_command_line(com_line, self.VENV_ACTIVATE,
                             self.PYTHON_EXECUTABLE,
                             self.CUDA_VISIBLE_STRING),
         self.TRANSFORMERS_REPO_PATH)
     runner.run()
     assert os.path.exists(
         os.path.join(temp_folder["models"], "lm_output",
                      "pytorch_model.bin"))
Пример #21
0
def test_loaded_model_evals_according_to_saved_acc(_params, tmp_path,
                                                   dataset_dir):
    p = _params
    config_path = p['sample_config_path']
    checkpoint_path = p['checkpoint_path']

    metrics_path = str(tmp_path.joinpath('metrics.json'))
    tmp_path = str(tmp_path)
    args = {}
    if not dataset_dir:
        dataset_dir = tmp_path
    args['data'] = dataset_dir
    args['dataset'] = p['dataset']
    args['config'] = str(config_path)
    args['mode'] = 'test'
    args['log-dir'] = tmp_path
    args[
        'workers'] = 0  # Workaroundr the PyTorch MultiProcessingDataLoader issue
    args['seed'] = 1
    args['resume'] = checkpoint_path
    args['metrics-dump'] = metrics_path

    if p['execution_mode'] == ExecutionMode.MULTIPROCESSING_DISTRIBUTED:
        args['multiprocessing-distributed'] = ''
    else:
        pytest.skip(
            "DataParallel eval takes too long for this test to be run during pre-commit"
        )

    runner = Command(
        create_command_line(get_cli_dict_args(args), "classification"))
    runner.run()

    with open(metrics_path, encoding='utf8') as metric_file:
        metrics = json.load(metric_file)
        # accuracy is rounded to hundredths
        assert torch.load(checkpoint_path)['best_acc1'] == pytest.approx(
            metrics['Accuracy'], abs=1e-2)
Пример #22
0
def test_compression_eval_trained(_params, tmp_path, case_common_dirs):
    p = _params
    args = p['args']
    tc = p['test_config']

    args['config'] = update_compression_config_with_legr_save_load_params(
        args['config'], case_common_dirs["save_coeffs_path"], False)
    args['mode'] = 'test'
    args['log-dir'] = tmp_path
    args[
        'workers'] = 0  # Workaround for PyTorch MultiprocessingDataLoader issues
    args['seed'] = 1
    # Workaround for PyTorch 1.9.1 Multiprocessing issue related to determinism and asym quantization
    # https://github.com/pytorch/pytorch/issues/61032
    if 'mobilenet_v2_asym_int8.json' in args['config']:
        args.pop('seed')
    checkpoint_path = os.path.join(args['checkpoint-save-dir'],
                                   tc['checkpoint_name'] + '_best.pth')
    args['resume'] = checkpoint_path

    METRIC_FILE_PATH = tmp_path / 'metrics.json'
    args['metrics-dump'] = tmp_path / METRIC_FILE_PATH

    if 'weights' in args:
        del args['weights']

    runner = Command(
        create_command_line(get_cli_dict_args(args), tc['sample_type']))
    env_with_cuda_reproducibility = os.environ.copy()
    env_with_cuda_reproducibility['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
    runner.kwargs.update(env=env_with_cuda_reproducibility)
    runner.run(timeout=tc['timeout'])

    with open(str(METRIC_FILE_PATH), encoding='utf8') as metric_file:
        metrics = json.load(metric_file)
    acc1 = metrics['Accuracy']
    assert torch.load(checkpoint_path)['best_acc1'] == approx(
        acc1, abs=tc['absolute_tolerance_eval'])
Пример #23
0
def test_force_cuda_build(tmp_venv_with_nncf, install_type, tmp_path,
                          package_type):
    '''
    Check that CUDA Extensions weren't initially built and \
    then with TORCH_CUDA_ARCH_LIST were forced to be built
    '''
    cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
    if cuda_home is None:
        try:
            nvcc = subprocess.check_output(['which', 'nvcc'])
            cuda_home = os.path.dirname(os.path.dirname(nvcc))
        except subprocess.CalledProcessError:
            if not cuda_home:
                cuda_home = '/usr/local/cuda'
                if not os.path.exists(cuda_home):
                    cuda_home = None
        if not cuda_home and not torch.cuda.is_available():
            pytest.skip(
                'There is no CUDA on the machine. The test will be skipped')

    venv_path = tmp_venv_with_nncf

    torch_build_dir = tmp_path / 'extensions'
    export_env_variables = "export CUDA_VISIBLE_DEVICES='' export TORCH_EXTENSIONS_DIR={}".format(
        torch_build_dir)

    python_executable_with_venv = ". {0}/bin/activate && {1} && {0}/bin/python".format(
        venv_path, export_env_variables)

    run_path = tmp_path / 'run'

    shutil.copy(TEST_ROOT / 'torch' / EXTENSIONS_BUILD_FILENAME, run_path)

    torch_ext_dir = pathlib.Path(torch_build_dir)
    assert not torch_ext_dir.exists()

    mode = 'cpu'

    command = Command("{} {}/extensions_build_checks.py {}".format(
        python_executable_with_venv, run_path, mode),
                      path=run_path)
    command.run()

    cpu_ext_dir = (torch_ext_dir / 'quantized_functions_cpu')
    assert cpu_ext_dir.exists()
    cpu_ext_so = (cpu_ext_dir / 'quantized_functions_cpu.so')
    assert cpu_ext_so.exists()

    cuda_ext_dir = (torch_ext_dir / 'quantized_functions_cuda')
    assert not cuda_ext_dir.exists()
    cuda_ext_so = (cuda_ext_dir / 'quantized_functions_cuda.so')
    assert not cuda_ext_so.exists()

    cpu_ext_dir = (torch_ext_dir / 'binarized_functions_cpu')
    assert cpu_ext_dir.exists()
    cpu_ext_so = (cpu_ext_dir / 'binarized_functions_cpu.so')
    assert cpu_ext_so.exists()

    cuda_ext_dir = (torch_ext_dir / 'binarized_functions_cuda')
    assert not cuda_ext_dir.exists()
    cuda_ext_so = (cuda_ext_dir / 'binarized_functions_cuda.so')
    assert not cuda_ext_so.exists()

    mode = 'cuda'

    command = Command("{} {}/extensions_build_checks.py {}".format(
        python_executable_with_venv, run_path, mode),
                      path=run_path)
    command.run()

    cuda_ext_dir = (torch_ext_dir / 'quantized_functions_cuda')
    assert cuda_ext_dir.exists()
    cuda_ext_so = (cuda_ext_dir / 'quantized_functions_cuda.so')
    assert cuda_ext_so.exists()

    cuda_ext_dir = (torch_ext_dir / 'binarized_functions_cuda')
    assert cuda_ext_dir.exists()
    cuda_ext_so = (cuda_ext_dir / 'binarized_functions_cuda.so')
    assert cuda_ext_so.exists()