Python SpeechRecognitionData示例

编程语言: Python

命名空间/包名称: flash.audio

hotexamples.com的示例: 7

Python SpeechRecognitionData - 已找到7个示例。这些是从开源项目中提取的最受好评的flash.audio.SpeechRecognitionData现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

from_json(6)

from_csv(3)

from_files(1)

示例#1

显示文件

def test_stage_test_and_valid(tmpdir):
    csv_path = csv_data(tmpdir)
    dm = SpeechRecognitionData.from_csv(
        "file", "text", train_file=csv_path, val_file=csv_path, test_file=csv_path, batch_size=1, num_workers=0
    )
    batch = next(iter(dm.val_dataloader()))
    assert DefaultDataKeys.INPUT in batch
    assert DefaultDataKeys.TARGET in batch

    batch = next(iter(dm.test_dataloader()))
    assert DefaultDataKeys.INPUT in batch
    assert DefaultDataKeys.TARGET in batch

示例#2

显示文件

def test_classification_json(tmpdir):
    json_path = json_data(tmpdir)

    data = SpeechRecognitionData.from_json(
        "file",
        "text",
        train_file=json_path,
        num_workers=0,
        batch_size=2,
    )
    model = SpeechRecognition(backbone=TEST_BACKBONE)
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, datamodule=data)

示例#3

显示文件

文件： cli.py 项目： aniketmaurya/lightning-flash

def from_timit(
    val_split: float = 0.1,
    batch_size: int = 4,
    num_workers: int = 0,
    **input_transform_kwargs,
) -> SpeechRecognitionData:
    """Downloads and loads the timit data set."""
    download_data("https://pl-flash-data.s3.amazonaws.com/timit_data.zip",
                  "./data")
    return SpeechRecognitionData.from_json(
        "file",
        "text",
        train_file="data/timit/train.json",
        test_file="data/timit/test.json",
        val_split=val_split,
        batch_size=batch_size,
        num_workers=num_workers,
        **input_transform_kwargs,
    )

示例#4

显示文件

文件： speech_recognition.py 项目： aniketmaurya/lightning-flash

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch

import flash
from flash.audio import SpeechRecognition, SpeechRecognitionData
from flash.core.data.utils import download_data

# 1. Create the DataModule
download_data("https://pl-flash-data.s3.amazonaws.com/timit_data.zip", "./data")

datamodule = SpeechRecognitionData.from_json(
    "file",
    "text",
    train_file="data/timit/train.json",
    test_file="data/timit/test.json",
    batch_size=4,
)

# 2. Build the task
model = SpeechRecognition(backbone="facebook/wav2vec2-base-960h")

# 3. Create the trainer and finetune the model
trainer = flash.Trainer(max_epochs=1, gpus=torch.cuda.device_count())
trainer.finetune(model, datamodule=datamodule, strategy="freeze")

# 4. Predict on audio files!
datamodule = SpeechRecognitionData.from_files(predict_files=["data/timit/example.wav"], batch_size=4)
predictions = trainer.predict(model, datamodule=datamodule)
print(predictions)

示例#5

显示文件

def test_audio_module_not_found_error():
    with pytest.raises(ModuleNotFoundError, match="[audio]"):
        SpeechRecognitionData.from_json("file", "text", train_file="", batch_size=1, num_workers=0)

示例#6

显示文件

def test_from_json(tmpdir):
    json_path = json_data(tmpdir)
    dm = SpeechRecognitionData.from_json("file", "text", train_file=json_path, batch_size=1, num_workers=0)
    batch = next(iter(dm.train_dataloader()))
    assert DefaultDataKeys.INPUT in batch
    assert DefaultDataKeys.TARGET in batch

示例#7

显示文件

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch

import flash
from flash.audio import SpeechRecognition, SpeechRecognitionData
from flash.core.data.utils import download_data

# 1. Create the DataModule
download_data("https://pl-flash-data.s3.amazonaws.com/timit_data.zip",
              "./data")

datamodule = SpeechRecognitionData.from_json(
    input_fields="file",
    target_fields="text",
    train_file="data/timit/train.json",
    test_file="data/timit/test.json",
)

# 2. Build the task
model = SpeechRecognition(backbone="facebook/wav2vec2-base-960h")

# 3. Create the trainer and finetune the model
trainer = flash.Trainer(max_epochs=1, gpus=torch.cuda.device_count())
trainer.finetune(model, datamodule=datamodule, strategy="no_freeze")

# 4. Predict on audio files!
predictions = model.predict(["data/timit/example.wav"])
print(predictions)

# 5. Save the model!