Exemplo n.º 1
0
def test_from_pandas():
    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
    ds = Dataset.from_pandas(df)

    assert ds.dataset.column_names == ["a", "b"]
    assert ds["a"] == [1, 2, 3]
    assert len(ds) == 3
Exemplo n.º 2
0
def test_from_parquet_file(resources_data_path):
    """This only shows an example of how one could read in a parquet file"""
    file_path = resources_data_path / "test.parquet"
    df = pd.read_parquet(file_path)
    dataset = Dataset.from_pandas(df)

    assert "reviewerID" in dataset.column_names
Exemplo n.º 3
0
def test_from_excel_file(resources_data_path):
    """This only shows an example of how one could read in an excel file"""
    str_value = Value("string")
    int_value = Value("int64")
    features = Features(Notification=int_value,
                        Type=str_value,
                        Plant=int_value,
                        Serial=str_value)

    file_path = resources_data_path / "test.xlsx"
    df = pd.read_excel(file_path)

    dataset = Dataset.from_pandas(df, features=features)

    assert len(dataset) > 0
Exemplo n.º 4
0
def training_dataset() -> Dataset:
    df = pd.DataFrame(
        {
            "text": [
                "This is a simple NER test",
                "This is a simple NER test with misaligned spans",
                "No NER here",
            ],
            "entities": [
                [{"start": 17, "end": 20, "label": "NER"}],
                [{"start": 17, "end": 22, "label": "NER"}],
                [],
            ],
        }
    )

    return Dataset.from_pandas(df)