Python from_huggingface_datasets示例

编程语言: Python

命名空间/包名称: jina.types.document.generators

方法/功能: from_huggingface_datasets

hotexamples.com的示例: 7

Python from_huggingface_datasets - 已找到7个示例。这些是从开源项目中提取的最受好评的jina.types.document.generators.from_huggingface_datasets现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： test_io.py 项目： paddlelaw/jina

def test_input_huggingface_datasets_with_no_split(dataset_configs):
    with pytest.raises(ValueError):
        result = from_huggingface_datasets(
            dataset_configs['adversarial']['dataset_path'],
            name=dataset_configs['adversarial']['name'],
        )
        for _ in result:
            pass

示例#2

显示文件

文件： test_io.py 项目： paddlelaw/jina

def test_input_huggingface_datasets_with_tweet_dataset(dataset_configs):
    result = list(
        from_huggingface_datasets(
            dataset_configs['tweet_eval']['dataset_path'],
            name=dataset_configs['tweet_eval']['name'],
            split=dataset_configs['tweet_eval']['split'],
        ))
    assert isinstance(result[0], Document)
    assert result[0].text

示例#3

显示文件

文件： test_io.py 项目： paddlelaw/jina

def test_input_huggingface_datasets_with_filter_fields_and_no_resolver(
        dataset_configs):
    with pytest.raises(ValueError):
        result = from_huggingface_datasets(
            dataset_configs['adversarial']['dataset_path'],
            name=dataset_configs['adversarial']['name'],
            filter_fields=True,
        )
        for _ in result:
            pass

示例#4

显示文件

文件： test_io.py 项目： paddlelaw/jina

def test_input_huggingface_datasets_with_field_resolver(dataset_configs):
    field_resolver = {'question': 'text'}
    result = list(
        from_huggingface_datasets(
            dataset_configs['adversarial']['dataset_path'],
            field_resolver=field_resolver,
            name=dataset_configs['adversarial']['name'],
            split=dataset_configs['adversarial']['split'],
        ))
    assert isinstance(result[0], Document)
    assert result[0].text
    assert 'title' in result[0].tags

示例#5

显示文件

文件： test_io.py 项目： paddlelaw/jina

def test_input_huggingface_datasets_from_csv_file(dataset_configs):
    field_resolver = {'question': 'text'}
    result = list(
        from_huggingface_datasets(
            'csv',
            field_resolver=field_resolver,
            data_files='docs.csv',
            split='train',
        ))
    assert len(result) == 2
    assert isinstance(result[0], Document)
    assert result[0].text == 'What are the symptoms?'
    assert result[0].tags['source'] == 'testsrc'

示例#6

显示文件

文件： test_client.py 项目： paddlelaw/jina

def test_client_huggingface_datasets(protocol, mocker, func_name):
    with Flow(protocol=protocol).add() as f:
        mock = mocker.Mock()
        getattr(f, f'{func_name}')(
            from_huggingface_datasets(
                dataset_path='adversarial_qa',
                size=2,
                name='adversarialQA',
                split='test',
                field_resolver={'question': 'text'},
            ),
            on_done=mock,
        )
        mock.assert_called_once()

示例#7

显示文件

文件： test_io.py 项目： paddlelaw/jina

def test_input_huggingface_datasets_from_path(dataset_configs, size,
                                              sampling_rate):
    result = list(
        from_huggingface_datasets(
            dataset_configs['adversarial']['dataset_path'],
            size=size,
            name=dataset_configs['adversarial']['name'],
            sampling_rate=sampling_rate,
            split=dataset_configs['adversarial']['split'],
        ))

    if size is not None:
        assert len(result) == size

    assert isinstance(result[0], Document)