Пример #1
0
    def test_new_processor_registration(self):
        try:
            AutoConfig.register("custom", CustomConfig)
            AutoFeatureExtractor.register(CustomConfig, CustomFeatureExtractor)
            AutoTokenizer.register(CustomConfig, slow_tokenizer_class=CustomTokenizer)
            AutoProcessor.register(CustomConfig, CustomProcessor)
            # Trying to register something existing in the Transformers library will raise an error
            with self.assertRaises(ValueError):
                AutoProcessor.register(Wav2Vec2Config, Wav2Vec2Processor)

            # Now that the config is registered, it can be used as any other config with the auto-API
            feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)

            with tempfile.TemporaryDirectory() as tmp_dir:
                vocab_file = os.path.join(tmp_dir, "vocab.txt")
                with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
                    vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
                tokenizer = CustomTokenizer(vocab_file)

            processor = CustomProcessor(feature_extractor, tokenizer)

            with tempfile.TemporaryDirectory() as tmp_dir:
                processor.save_pretrained(tmp_dir)
                new_processor = AutoProcessor.from_pretrained(tmp_dir)
                self.assertIsInstance(new_processor, CustomProcessor)

        finally:
            if "custom" in CONFIG_MAPPING._extra_content:
                del CONFIG_MAPPING._extra_content["custom"]
            if CustomConfig in FEATURE_EXTRACTOR_MAPPING._extra_content:
                del FEATURE_EXTRACTOR_MAPPING._extra_content[CustomConfig]
            if CustomConfig in TOKENIZER_MAPPING._extra_content:
                del TOKENIZER_MAPPING._extra_content[CustomConfig]
            if CustomConfig in PROCESSOR_MAPPING._extra_content:
                del PROCESSOR_MAPPING._extra_content[CustomConfig]
    def test_simple_s2t(self):

        model = Speech2TextForConditionalGeneration.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        tokenizer = AutoTokenizer.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")

        asr = AutomaticSpeechRecognitionPipeline(
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor)

        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)

        output = asr(waveform)
        self.assertEqual(output, {"text": "(Applausi)"})

        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy",
                          "clean",
                          split="validation").sort("id")
        filename = ds[40]["file"]
        output = asr(filename)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})

        filename = ds[40]["file"]
        with open(filename, "rb") as f:
            data = f.read()
        output = asr(data)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
Пример #3
0
 def test_feature_extractor_not_found(self):
     with self.assertRaisesRegex(
             EnvironmentError,
             "hf-internal-testing/config-no-model does not appear to have a file named preprocessor_config.json.",
     ):
         _ = AutoFeatureExtractor.from_pretrained(
             "hf-internal-testing/config-no-model")
Пример #4
0
    def test_maskformer(self):
        threshold = 0.8
        model_id = "facebook/maskformer-swin-base-ade"

        model = AutoModelForInstanceSegmentation.from_pretrained(model_id)
        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)

        image_segmenter = pipeline("image-segmentation", model=model, feature_extractor=feature_extractor)

        image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
        file = image[0]["file"]
        outputs = image_segmenter(file, threshold=threshold)

        for o in outputs:
            o["mask"] = hashimage(o["mask"])

        self.assertEqual(
            nested_simplify(outputs, decimals=4),
            [
                {"mask": "20d1b9480d1dc1501dbdcfdff483e370", "label": "wall", "score": None},
                {"mask": "0f902fbc66a0ff711ea455b0e4943adf", "label": "house", "score": None},
                {"mask": "4537bdc07d47d84b3f8634b7ada37bd4", "label": "grass", "score": None},
                {"mask": "b7ac77dfae44a904b479a0926a2acaf7", "label": "tree", "score": None},
                {"mask": "e9bedd56bd40650fb263ce03eb621079", "label": "plant", "score": None},
                {"mask": "37a609f8c9c1b8db91fbff269f428b20", "label": "road, route", "score": None},
                {"mask": "0d8cdfd63bae8bf6e4344d460a2fa711", "label": "sky", "score": None},
            ],
        )
Пример #5
0
 def test_revision_not_found(self):
     with self.assertRaisesRegex(
             EnvironmentError,
             r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
     ):
         _ = AutoFeatureExtractor.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER,
                                                  revision="aaaaaa")
Пример #6
0
    def test_push_to_hub_dynamic_feature_extractor(self):
        CustomFeatureExtractor.register_for_auto_class()
        feature_extractor = CustomFeatureExtractor.from_pretrained(
            SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)

        with tempfile.TemporaryDirectory() as tmp_dir:
            repo = Repository(
                tmp_dir,
                clone_from=f"{USER}/test-dynamic-feature-extractor",
                use_auth_token=self._token)
            feature_extractor.save_pretrained(tmp_dir)

            # This has added the proper auto_map field to the config
            self.assertDictEqual(
                feature_extractor.auto_map,
                {
                    "AutoFeatureExtractor":
                    "custom_feature_extraction.CustomFeatureExtractor"
                },
            )
            # The code has been copied from fixtures
            self.assertTrue(
                os.path.isfile(
                    os.path.join(tmp_dir, "custom_feature_extraction.py")))

            repo.push_to_hub()

        new_feature_extractor = AutoFeatureExtractor.from_pretrained(
            f"{USER}/test-dynamic-feature-extractor", trust_remote_code=True)
        # Can't make an isinstance check because the new_feature_extractor is from the CustomFeatureExtractor class of a dynamic module
        self.assertEqual(new_feature_extractor.__class__.__name__,
                         "CustomFeatureExtractor")
def convert_swinv2_checkpoint(swinv2_name, pytorch_dump_folder_path):
    timm_model = timm.create_model(swinv2_name, pretrained=True)
    timm_model.eval()

    config = get_swinv2_config(swinv2_name)
    model = Swinv2ForImageClassification(config)
    model.eval()

    new_state_dict = convert_state_dict(timm_model.state_dict(), model)
    model.load_state_dict(new_state_dict)

    url = "http://images.cocodataset.org/val2017/000000039769.jpg"

    feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/{}".format(swinv2_name.replace("_", "-")))
    image = Image.open(requests.get(url, stream=True).raw)
    inputs = feature_extractor(images=image, return_tensors="pt")

    timm_outs = timm_model(inputs["pixel_values"])
    hf_outs = model(**inputs).logits

    assert torch.allclose(timm_outs, hf_outs, atol=1e-3)

    print(f"Saving model {swinv2_name} to {pytorch_dump_folder_path}")
    model.save_pretrained(pytorch_dump_folder_path)

    print(f"Saving feature extractor to {pytorch_dump_folder_path}")
    feature_extractor.save_pretrained(pytorch_dump_folder_path)

    model.push_to_hub(
        repo_path_or_name=Path(pytorch_dump_folder_path, swinv2_name),
        organization="nandwalritik",
        commit_message="Add model",
    )
    def test_simple_wav2vec2(self):
        import numpy as np
        from datasets import load_dataset

        model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
        tokenizer = AutoTokenizer.from_pretrained(
            "facebook/wav2vec2-base-960h")
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h")

        asr = AutomaticSpeechRecognitionPipeline(
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor)

        waveform = np.zeros((34000, ))
        output = asr(waveform)
        self.assertEqual(output, {"text": ""})

        ds = load_dataset("patrickvonplaten/librispeech_asr_dummy",
                          "clean",
                          split="validation")
        filename = ds[0]["file"]
        output = asr(filename)
        self.assertEqual(output,
                         {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})

        filename = ds[0]["file"]
        with open(filename, "rb") as f:
            data = f.read()
        output = asr(data)
        self.assertEqual(output,
                         {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
Пример #9
0
    def test_for_image_classification(self):
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "microsoft/dit-base-finetuned-rvlcdip")
        model = AutoModelForImageClassification.from_pretrained(
            "microsoft/dit-base-finetuned-rvlcdip")
        model.to(torch_device)

        from datasets import load_dataset

        dataset = load_dataset("nielsr/rvlcdip-demo")

        image = dataset["train"][0]["image"].convert("RGB")

        inputs = feature_extractor(image, return_tensors="pt").to(torch_device)

        # forward pass
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits

        expected_shape = torch.Size((1, 16))
        self.assertEqual(logits.shape, expected_shape)

        expected_slice = torch.tensor(
            [-0.4158, -0.4092, -0.4347],
            device=torch_device,
            dtype=torch.float,
        )
        self.assertTrue(
            torch.allclose(logits[0, :3], expected_slice, atol=1e-4))
    def test_simple_wav2vec2(self):

        model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
        tokenizer = AutoTokenizer.from_pretrained(
            "facebook/wav2vec2-base-960h")
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h")

        asr = AutomaticSpeechRecognitionPipeline(
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor)

        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
        output = asr(waveform)
        self.assertEqual(output, {"text": ""})

        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy",
                          "clean",
                          split="validation").sort("id")
        filename = ds[40]["file"]
        output = asr(filename)
        self.assertEqual(output,
                         {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})

        filename = ds[40]["file"]
        with open(filename, "rb") as f:
            data = f.read()
        output = asr(data)
        self.assertEqual(output,
                         {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
def convert_weight_and_push(name: str, config: ResNetConfig, save_directory: Path, push_to_hub: bool = True):
    print(f"Converting {name}...")
    with torch.no_grad():
        from_model = timm.create_model(name, pretrained=True).eval()
        our_model = ResNetForImageClassification(config).eval()
        module_transfer = ModuleTransfer(src=from_model, dest=our_model)
        x = torch.randn((1, 3, 224, 224))
        module_transfer(x)

    assert torch.allclose(from_model(x), our_model(x).logits), "The model logits don't match the original one."

    checkpoint_name = f"resnet{'-'.join(name.split('resnet'))}"
    print(checkpoint_name)

    if push_to_hub:
        our_model.push_to_hub(
            repo_path_or_name=save_directory / checkpoint_name,
            commit_message="Add model",
            use_temp_dir=True,
        )

        # we can use the convnext one
        feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/convnext-base-224-22k-1k")
        feature_extractor.push_to_hub(
            repo_path_or_name=save_directory / checkpoint_name,
            commit_message="Add feature extractor",
            use_temp_dir=True,
        )

        print(f"Pushed {checkpoint_name}")
    def test_chunking(self):
        model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
        tokenizer = AutoTokenizer.from_pretrained(
            "facebook/wav2vec2-base-960h")
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h")
        speech_recognizer = pipeline(
            task="automatic-speech-recognition",
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor,
            framework="pt",
            chunk_length_s=10.0,
        )

        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy",
                          "clean",
                          split="validation").sort("id")
        audio = ds[40]["audio"]["array"]

        n_repeats = 10
        audio = np.tile(audio, n_repeats)
        output = speech_recognizer([audio], batch_size=2)
        expected_text = "A MAN SAID TO THE UNIVERSE SIR I EXIST " * n_repeats
        expected = [{"text": expected_text.strip()}]
        self.assertEqual(output, expected)
    def test_chunk_iterator(self):
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h")
        inputs = torch.arange(100).long()

        outs = list(chunk_iter(inputs, feature_extractor, 100, 0, 0))
        self.assertEqual(len(outs), 1)
        self.assertEqual([o["stride"] for o in outs], [(100, 0, 0)])
        self.assertEqual([o["input_values"].shape for o in outs], [(1, 100)])
        self.assertEqual([o["is_last"] for o in outs], [True])

        # two chunks no stride
        outs = list(chunk_iter(inputs, feature_extractor, 50, 0, 0))
        self.assertEqual(len(outs), 2)
        self.assertEqual([o["stride"] for o in outs], [(50, 0, 0), (50, 0, 0)])
        self.assertEqual([o["input_values"].shape for o in outs], [(1, 50),
                                                                   (1, 50)])
        self.assertEqual([o["is_last"] for o in outs], [False, True])

        # two chunks incomplete last
        outs = list(chunk_iter(inputs, feature_extractor, 80, 0, 0))
        self.assertEqual(len(outs), 2)
        self.assertEqual([o["stride"] for o in outs], [(80, 0, 0), (20, 0, 0)])
        self.assertEqual([o["input_values"].shape for o in outs], [(1, 80),
                                                                   (1, 20)])
        self.assertEqual([o["is_last"] for o in outs], [False, True])
Пример #14
0
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate a [`TrOCRProcessor`] from a pretrained TrOCR processor.

        <Tip>

        This class method is simply calling AutoFeatureExtractor's [`~PreTrainedFeatureExtractor.from_pretrained`] and
        TrOCRTokenizer's [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
        docstrings of the methods above for more information.

        </Tip>

        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            **kwargs
                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
                [`PreTrainedTokenizer`]
        """
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            pretrained_model_name_or_path, **kwargs)
        tokenizer = AutoTokenizer.from_pretrained(
            pretrained_model_name_or_path, **kwargs)

        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
Пример #15
0
    def test_small_model_from_pipeline(self):
        for small_model in self.small_models:

            model = AutoModelForImageClassification.from_pretrained(
                small_model)
            feature_extractor = AutoFeatureExtractor.from_pretrained(
                small_model)
            image_classifier = ImageClassificationPipeline(
                model=model, feature_extractor=feature_extractor)

            for valid_input in self.valid_inputs:
                output = image_classifier(**valid_input)
                top_k = valid_input.get("top_k", 5)

                def assert_valid_pipeline_output(pipeline_output):
                    self.assertTrue(isinstance(pipeline_output, list))
                    self.assertEqual(len(pipeline_output), top_k)
                    for label_result in pipeline_output:
                        self.assertTrue(isinstance(label_result, dict))
                        self.assertIn("label", label_result)
                        self.assertIn("score", label_result)

                if isinstance(valid_input["images"], list):
                    # When images are batched, pipeline output is a list of lists of dictionaries
                    self.assertEqual(len(valid_input["images"]), len(output))
                    for individual_output in output:
                        assert_valid_pipeline_output(individual_output)
                else:
                    # When images are batched, pipeline output is a list of dictionaries
                    assert_valid_pipeline_output(output)
def convert_cvt_checkpoint(cvt_file, pytorch_dump_folder):
    """
    Fucntion to convert the microsoft cvt checkpoint to huggingface checkpoint
    """
    img_labels_file = "imagenet-1k-id2label.json"
    num_labels = 1000

    repo_id = "datasets/huggingface/label-files"
    num_labels = num_labels
    id2label = json.load(
        open(cached_download(hf_hub_url(repo_id, img_labels_file)), "r"))
    id2label = {int(k): v for k, v in id2label.items()}

    id2label = id2label
    label2id = {v: k for k, v in id2label.items()}

    config = config = CvtConfig(num_labels=num_labels,
                                id2label=id2label,
                                label2id=label2id)

    # For depth size 13 (13 = 1+2+10)
    if cvt_file.rsplit("/", 1)[-1][4:6] == "13":
        config.depth = [1, 2, 10]

    # For depth size 21 (21 = 1+4+16)
    elif cvt_file.rsplit("/", 1)[-1][4:6] == "21":
        config.depth = [1, 4, 16]

    # For wide cvt (similar to wide-resnet) depth size 24 (w24 = 2 + 2 20)
    else:
        config.depth = [2, 2, 20]
        config.num_heads = [3, 12, 16]
        config.embed_dim = [192, 768, 1024]

    model = CvtForImageClassification(config)
    feature_extractor = AutoFeatureExtractor.from_pretrained(
        "facebook/convnext-base-224-22k-1k")
    original_weights = torch.load(cvt_file, map_location=torch.device("cpu"))

    huggingface_weights = OrderedDict()
    list_of_state_dict = []

    for idx in range(config.num_stages):
        if config.cls_token[idx]:
            list_of_state_dict = list_of_state_dict + cls_token(idx)
        list_of_state_dict = list_of_state_dict + embeddings(idx)
        for cnt in range(config.depth[idx]):
            list_of_state_dict = list_of_state_dict + attention(idx, cnt)

    list_of_state_dict = list_of_state_dict + final()
    for gg in list_of_state_dict:
        print(gg)
    for i in range(len(list_of_state_dict)):
        huggingface_weights[list_of_state_dict[i][0]] = original_weights[
            list_of_state_dict[i][1]]

    model.load_state_dict(huggingface_weights)
    model.save_pretrained(pytorch_dump_folder)
    feature_extractor.save_pretrained(pytorch_dump_folder)
Пример #17
0
def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config):
    try:
        feature_extractor = AutoFeatureExtractor.from_pretrained(checkpoint)
    except Exception:
        feature_extractor = None
    if hasattr(tiny_config, "image_size") and feature_extractor:
        feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
    return feature_extractor
Пример #18
0
    def test_feature_extractor_from_local_directory_from_config(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            model_config = Wav2Vec2Config()

            # remove feature_extractor_type to make sure config.json alone is enough to load feature processor locally
            config_dict = AutoFeatureExtractor.from_pretrained(
                SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR).to_dict()
            config_dict.pop("feature_extractor_type")
            config = Wav2Vec2FeatureExtractor(config_dict)

            # save in new folder
            model_config.save_pretrained(tmpdirname)
            config.save_pretrained(tmpdirname)

            config = AutoFeatureExtractor.from_pretrained(tmpdirname)

        self.assertIsInstance(config, Wav2Vec2FeatureExtractor)
Пример #19
0
 def __init__(self, model: str):
     """Create Hugging Face Inference Session"""
     self.model = AutoModelForImageClassification.from_pretrained(model)
     self.feature_extractor = AutoFeatureExtractor.from_pretrained(model)
     self.session = pipeline(
         "image-classification",
         model=self.model,
         feature_extractor=self.feature_extractor,
     )
    def test_new_feature_extractor_registration(self):
        try:
            AutoConfig.register("custom", CustomConfig)
            AutoFeatureExtractor.register(CustomConfig, CustomFeatureExtractor)
            # Trying to register something existing in the Transformers library will raise an error
            with self.assertRaises(ValueError):
                AutoFeatureExtractor.register(Wav2Vec2Config, Wav2Vec2FeatureExtractor)

            # Now that the config is registered, it can be used as any other config with the auto-API
            feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
            with tempfile.TemporaryDirectory() as tmp_dir:
                feature_extractor.save_pretrained(tmp_dir)
                new_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_dir)
                self.assertIsInstance(new_feature_extractor, CustomFeatureExtractor)

        finally:
            if "custom" in CONFIG_MAPPING._extra_content:
                del CONFIG_MAPPING._extra_content["custom"]
            if CustomConfig in FEATURE_EXTRACTOR_MAPPING._extra_content:
                del FEATURE_EXTRACTOR_MAPPING._extra_content[CustomConfig]
Пример #21
0
    def _onnx_export(self,
                     test_name,
                     name,
                     model_name,
                     feature,
                     onnx_config_class_constructor,
                     device="cpu"):
        from transformers.onnx import export

        model_class = FeaturesManager.get_model_class_for_feature(feature)
        config = AutoConfig.from_pretrained(model_name)
        model = model_class.from_config(config)
        onnx_config = onnx_config_class_constructor(model.config)

        if is_torch_available():
            from transformers.utils import torch_version

            if torch_version < onnx_config.torch_onnx_minimum_version:
                pytest.skip(
                    "Skipping due to incompatible PyTorch version. Minimum required is"
                    f" {onnx_config.torch_onnx_minimum_version}, got: {torch_version}"
                )

        # Check the modality of the inputs and instantiate the appropriate preprocessor
        if model.main_input_name == "input_ids":
            preprocessor = AutoTokenizer.from_pretrained(model_name)
            # Useful for causal lm models that do not use pad tokens.
            if not getattr(config, "pad_token_id", None):
                config.pad_token_id = preprocessor.eos_token_id
        elif model.main_input_name == "pixel_values":
            preprocessor = AutoFeatureExtractor.from_pretrained(model_name)
        else:
            raise ValueError(
                f"Unsupported model input name: {model.main_input_name}")

        with NamedTemporaryFile("w") as output:
            try:
                onnx_inputs, onnx_outputs = export(
                    preprocessor,
                    model,
                    onnx_config,
                    onnx_config.default_onnx_opset,
                    Path(output.name),
                    device=device)
                validate_model_outputs(
                    onnx_config,
                    preprocessor,
                    model,
                    Path(output.name),
                    onnx_outputs,
                    onnx_config.atol_for_validation,
                )
            except (RuntimeError, ValueError) as e:
                self.fail(f"{name}, {feature} -> {e}")
    def test_feature_extractor_from_local_directory_from_config(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            model_config = Wav2Vec2Config()

            # remove feature_extractor_type to make sure config.json alone is enough to load feature processor locally
            config_dict = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR).to_dict()

            config_dict.pop("feature_extractor_type")
            config = Wav2Vec2FeatureExtractor(**config_dict)

            # save in new folder
            model_config.save_pretrained(tmpdirname)
            config.save_pretrained(tmpdirname)

            config = AutoFeatureExtractor.from_pretrained(tmpdirname)

            # make sure private variable is not incorrectly saved
            dict_as_saved = json.loads(config.to_json_string())
            self.assertTrue("_processor_class" not in dict_as_saved)

        self.assertIsInstance(config, Wav2Vec2FeatureExtractor)
Пример #23
0
def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config):
    try:
        feature_extractor = AutoFeatureExtractor.from_pretrained(checkpoint)
    except Exception:
        feature_extractor = None
    if hasattr(tiny_config, "image_size") and feature_extractor:
        feature_extractor = feature_extractor.__class__(
            size=tiny_config.image_size, crop_size=tiny_config.image_size)

    # Speech2TextModel specific.
    if hasattr(tiny_config, "input_feat_per_channel") and feature_extractor:
        feature_extractor = feature_extractor.__class__(
            feature_size=tiny_config.input_feat_per_channel,
            num_mel_bins=tiny_config.input_feat_per_channel)
    return feature_extractor
Пример #24
0
    def test_small_model_pt_seq2seq(self):
        model_id = "hf-internal-testing/tiny-random-speech-encoder-decoder"
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)

        speech_recognizer = pipeline(
            task="automatic-speech-recognition",
            model=model_id,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor,
            framework="pt",
        )

        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
        output = speech_recognizer(waveform)
        self.assertEqual(output, {"text": "あл ش 湯 清 ه ܬ া लᆨしث ल eか u w 全 u"})
def convert_weight_and_push(
    name: str,
    config: VanConfig,
    checkpoint: str,
    from_model: nn.Module,
    save_directory: Path,
    push_to_hub: bool = True,
):
    print(f"Downloading weights for {name}...")
    checkpoint_path = cached_download(checkpoint)
    print(f"Converting {name}...")
    from_state_dict = torch.load(checkpoint_path)["state_dict"]
    from_model.load_state_dict(from_state_dict)
    from_model.eval()
    with torch.no_grad():
        our_model = VanForImageClassification(config).eval()
        module_transfer = ModuleTransfer(src=from_model, dest=our_model)
        x = torch.randn((1, 3, 224, 224))
        module_transfer(x)
        our_model = copy_parameters(from_model, our_model)

    assert torch.allclose(
        from_model(x),
        our_model(x).logits), "The model logits don't match the original one."

    checkpoint_name = name
    print(checkpoint_name)

    if push_to_hub:
        our_model.push_to_hub(
            repo_path_or_name=save_directory / checkpoint_name,
            commit_message="Add model",
            use_temp_dir=True,
        )

        # we can use the convnext one
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/convnext-base-224-22k-1k")
        feature_extractor.push_to_hub(
            repo_path_or_name=save_directory / checkpoint_name,
            commit_message="Add feature extractor",
            use_temp_dir=True,
        )

        print(f"Pushed {checkpoint_name}")
    def test_large_model_pt(self):
        model_id = "facebook/detr-resnet-50"

        model = AutoModelForObjectDetection.from_pretrained(model_id)
        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
        object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)

        outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg")
        self.assertEqual(
            nested_simplify(outputs, decimals=4),
            [
                {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
                {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
                {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
                {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
                {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
            ],
        )

        outputs = object_detector(
            [
                "http://images.cocodataset.org/val2017/000000039769.jpg",
                "http://images.cocodataset.org/val2017/000000039769.jpg",
            ]
        )
        self.assertEqual(
            nested_simplify(outputs, decimals=4),
            [
                [
                    {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
                    {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
                    {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
                    {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
                    {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
                ],
                [
                    {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
                    {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
                    {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
                    {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
                    {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
                ],
            ],
        )
    def test_simple_s2t(self):
        import numpy as np
        from datasets import load_dataset

        model = Speech2TextForConditionalGeneration.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        tokenizer = AutoTokenizer.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")

        asr = AutomaticSpeechRecognitionPipeline(
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor)

        waveform = np.zeros((34000, ))

        output = asr(waveform)
        self.assertEqual(output, {
            "text":
            "E questo è il motivo per cui non ci siamo mai incontrati."
        })

        ds = load_dataset("patrickvonplaten/librispeech_asr_dummy",
                          "clean",
                          split="validation")
        filename = ds[0]["file"]
        output = asr(filename)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})

        filename = ds[0]["file"]
        with open(filename, "rb") as f:
            data = f.read()
        output = asr(data)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
Пример #28
0
def main(args):
    # load dataset
    dataset = load_dataset(args.dataset,
                           args.config,
                           split=args.split,
                           use_auth_token=True)

    # for testing: only process the first two examples as a test
    # dataset = dataset.select(range(10))

    # load processor
    feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
    sampling_rate = feature_extractor.sampling_rate

    # resample audio
    dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))

    # load eval pipeline
    if args.device is None:
        args.device = 0 if torch.cuda.is_available() else -1
    asr = pipeline("automatic-speech-recognition",
                   model=args.model_id,
                   device=args.device)

    # map function to decode audio
    def map_to_pred(batch):
        prediction = asr(batch["audio"]["array"],
                         chunk_length_s=args.chunk_length_s,
                         stride_length_s=args.stride_length_s)

        batch["prediction"] = prediction["text"]
        batch["target"] = normalize_text(batch["sentence"])
        return batch

    # run inference on all examples
    result = dataset.map(map_to_pred, remove_columns=dataset.column_names)

    # compute and log_results
    # do not change function below
    log_results(result, args)
    def test_push_to_hub_dynamic_feature_extractor(self):
        CustomFeatureExtractor.register_for_auto_class()
        feature_extractor = CustomFeatureExtractor.from_pretrained(
            SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)

        feature_extractor.push_to_hub("test-dynamic-feature-extractor",
                                      use_auth_token=self._token)

        # This has added the proper auto_map field to the config
        self.assertDictEqual(
            feature_extractor.auto_map,
            {
                "AutoFeatureExtractor":
                "custom_feature_extraction.CustomFeatureExtractor"
            },
        )

        new_feature_extractor = AutoFeatureExtractor.from_pretrained(
            f"{USER}/test-dynamic-feature-extractor", trust_remote_code=True)
        # Can't make an isinstance check because the new_feature_extractor is from the CustomFeatureExtractor class of a dynamic module
        self.assertEqual(new_feature_extractor.__class__.__name__,
                         "CustomFeatureExtractor")
    def test_small_model_pt(self):
        model_id = "mishig/tiny-detr-mobilenetsv3"

        model = AutoModelForObjectDetection.from_pretrained(model_id)
        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
        object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)

        outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.0)

        self.assertEqual(
            nested_simplify(outputs, decimals=4),
            [
                {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
                {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
            ],
        )

        outputs = object_detector(
            [
                "http://images.cocodataset.org/val2017/000000039769.jpg",
                "http://images.cocodataset.org/val2017/000000039769.jpg",
            ],
            threshold=0.0,
        )

        self.assertEqual(
            nested_simplify(outputs, decimals=4),
            [
                [
                    {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
                    {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
                ],
                [
                    {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
                    {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
                ],
            ],
        )