Exemplo n.º 1
0
def test_array_xd_with_none():
    # Fixed shape
    features = datasets.Features(
        {"foo": datasets.Array2D(dtype="int32", shape=(2, 2))})
    dummy_array = np.array([[1, 2], [3, 4]], dtype="int32")
    dataset = datasets.Dataset.from_dict(
        {"foo": [dummy_array, None, dummy_array]}, features=features)
    arr = NumpyArrowExtractor().extract_column(dataset._data)
    assert isinstance(
        arr, np.ndarray) and arr.dtype == np.float64 and arr.shape == (3, 2, 2)
    assert np.allclose(arr[0], dummy_array) and np.allclose(
        arr[2], dummy_array)
    assert np.all(np.isnan(arr[1]))  # broadcasted np.nan - use np.all

    # Dynamic shape
    features = datasets.Features(
        {"foo": datasets.Array2D(dtype="int32", shape=(None, 2))})
    dummy_array = np.array([[1, 2], [3, 4]], dtype="int32")
    dataset = datasets.Dataset.from_dict(
        {"foo": [dummy_array, None, dummy_array]}, features=features)
    arr = NumpyArrowExtractor().extract_column(dataset._data)
    assert isinstance(
        arr, np.ndarray) and arr.dtype == np.object and arr.shape == (3, )
    np.testing.assert_equal(arr[0], dummy_array)
    np.testing.assert_equal(arr[2], dummy_array)
    assert np.isnan(arr[1])  # a single np.nan value - np.all not needed
Exemplo n.º 2
0
def test_table_to_pandas(dtype, dummy_value):
    features = datasets.Features({"foo": datasets.Array2D(dtype=dtype, shape=(2, 2))})
    dataset = datasets.Dataset.from_dict({"foo": [[[dummy_value] * 2] * 2]}, features=features)
    df = dataset._data.to_pandas()
    assert type(df.foo.dtype) == PandasArrayExtensionDtype
    arr = df.foo.to_numpy()
    np.testing.assert_equal(arr, np.array([[[dummy_value] * 2] * 2], dtype=np.dtype(dtype)))
Exemplo n.º 3
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "sequence_number":
             datasets.Value("string"),
             "subject_id":
             datasets.Value("string"),
             "rgb":
             datasets.Sequence(datasets.Image()),
             "rgb_cal": {
                 "intrisic_mat": datasets.Array2D(shape=(3, 3),
                                                  dtype="float64"),
                 "extrinsic_mat": {
                     "rotation":
                     datasets.Array2D(shape=(3, 3), dtype="float64"),
                     "translation":
                     datasets.Sequence(datasets.Value("float64"), length=3),
                 },
             },
             "depth":
             datasets.Sequence(datasets.Value("string")),
             "depth_cal": {
                 "intrisic_mat": datasets.Array2D(shape=(3, 3),
                                                  dtype="float64"),
                 "extrinsic_mat": {
                     "rotation":
                     datasets.Array2D(shape=(3, 3), dtype="float64"),
                     "translation":
                     datasets.Sequence(datasets.Value("float64"), length=3),
                 },
             },
             "head_pose_gt":
             datasets.Sequence({
                 "center":
                 datasets.Sequence(datasets.Value("float64"), length=3),
                 "rotation":
                 datasets.Array2D(shape=(3, 3), dtype="float64"),
             }),
             "head_template":
             datasets.Value("string"),
         }),
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
     )
Exemplo n.º 4
0
def test_array_xd_numpy_arrow_extractor(dtype, dummy_value):
    features = datasets.Features(
        {"foo": datasets.Array2D(dtype=dtype, shape=(2, 2))})
    dataset = datasets.Dataset.from_dict({"foo": [[[dummy_value] * 2] * 2]},
                                         features=features)
    arr = NumpyArrowExtractor().extract_column(dataset._data)
    assert isinstance(arr, np.ndarray)
    np.testing.assert_equal(
        arr, np.array([[[dummy_value] * 2] * 2], dtype=np.dtype(dtype)))
Exemplo n.º 5
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "image":
             datasets.Array2D(shape=(28, 28), dtype="uint8"),
             "label":
             datasets.features.ClassLabel(
                 names=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]),
         }),
         supervised_keys=("image", "label"),
         homepage="http://yann.lecun.com/exdb/mnist/",
         citation=_CITATION,
     )
Exemplo n.º 6
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "image":
             datasets.Array2D(shape=(28, 28), dtype="uint8"),
             "label":
             datasets.features.ClassLabel(names=[
                 "T - shirt / top",
                 "Trouser",
                 "Pullover",
                 "Dress",
                 "Coat",
                 "Sandal",
                 "Shirt",
                 "Sneaker",
                 "Bag",
                 "Ankle boot",
             ]),
         }),
         supervised_keys=("image", "label"),
         homepage="https://github.com/zalandoresearch/fashion-mnist",
         citation=_CITATION,
     )
Exemplo n.º 7
0
USAGE:
``python extracting_data.py -i <img_dir> -o <dataset_file>.datasets <batch_size>``
"""

TEST = False
CONFIG = Config.from_pretrained("unc-nlp/frcnn-vg-finetuned")
DEFAULT_SCHEMA = datasets.Features(
    OrderedDict({
        "attr_ids":
        datasets.Sequence(length=CONFIG.MAX_DETECTIONS,
                          feature=datasets.Value("float32")),
        "attr_probs":
        datasets.Sequence(length=CONFIG.MAX_DETECTIONS,
                          feature=datasets.Value("float32")),
        "boxes":
        datasets.Array2D((CONFIG.MAX_DETECTIONS, 4), dtype="float32"),
        "normalized_boxes":
        datasets.Array2D((CONFIG.MAX_DETECTIONS, 4), dtype="float32"),
        "img_id":
        datasets.Value("string"),
        "obj_ids":
        datasets.Sequence(length=CONFIG.MAX_DETECTIONS,
                          feature=datasets.Value("float32")),
        "obj_probs":
        datasets.Sequence(length=CONFIG.MAX_DETECTIONS,
                          feature=datasets.Value("float32")),
        "roi_features":
        datasets.Array2D((CONFIG.MAX_DETECTIONS, 2048), dtype="float32"),
        "sizes":
        datasets.Sequence(length=2, feature=datasets.Value("float32")),
        "preds_per_image":
Exemplo n.º 8
0
 def _info(self):
     if self.config.name == "raw":
         features = datasets.Features({
             "key_id":
             datasets.Value("string"),
             "word":
             datasets.ClassLabel(names=_NAMES),
             "recognized":
             datasets.Value("bool"),
             "timestamp":
             datasets.Value("timestamp[us, tz=UTC]"),
             "countrycode":
             datasets.Value("string"),
             "drawing":
             datasets.Sequence({
                 "x":
                 datasets.Sequence(datasets.Value("float32")),
                 "y":
                 datasets.Sequence(datasets.Value("float32")),
                 "t":
                 datasets.Sequence(datasets.Value("int32")),
             }),
         })
     elif self.config.name == "preprocessed_simplified_drawings":
         features = datasets.Features({
             "key_id":
             datasets.Value("string"),
             "word":
             datasets.ClassLabel(names=_NAMES),
             "recognized":
             datasets.Value("bool"),
             "timestamp":
             datasets.Value("timestamp[us, tz=UTC]"),
             "countrycode":
             datasets.Value("string"),
             "drawing":
             datasets.Sequence({
                 "x":
                 datasets.Sequence(datasets.Value("uint8")),
                 "y":
                 datasets.Sequence(datasets.Value("uint8")),
             }),
         })
     elif self.config.name == "preprocessed_bitmaps":
         features = datasets.Features({
             "image":
             datasets.Image(),
             "label":
             datasets.ClassLabel(names=_NAMES),
         })
     else:  # sketch_rnn, sketch_rnn_full
         features = datasets.Features({
             "word":
             datasets.ClassLabel(names=_NAMES),
             "drawing":
             datasets.Array2D(shape=(None, 3), dtype="int16"),
         })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             ImageClassification(image_column="image", label_column="label")
         ] if self.config.name == "preprocessed_bitmaps" else None,
     )
Exemplo n.º 9
0

"""
USAGE:
``python extracting_data.py -i <img_dir> -o <dataset_file>.datasets <batch_size>``
"""


TEST = False
CONFIG = Config.from_pretrained("unc-nlp/frcnn-vg-finetuned")
DEFAULT_SCHEMA = datasets.Features(
    OrderedDict(
        {
            "attr_ids": datasets.Sequence(length=CONFIG.MAX_DETECTIONS, feature=datasets.Value("float32")),
            "attr_probs": datasets.Sequence(length=CONFIG.MAX_DETECTIONS, feature=datasets.Value("float32")),
            "boxes": datasets.Array2D((CONFIG.MAX_DETECTIONS, 4), dtype="float32"),
            "img_id": datasets.Value("int32"),
            "obj_ids": datasets.Sequence(length=CONFIG.MAX_DETECTIONS, feature=datasets.Value("float32")),
            "obj_probs": datasets.Sequence(length=CONFIG.MAX_DETECTIONS, feature=datasets.Value("float32")),
            "roi_features": datasets.Array2D((CONFIG.MAX_DETECTIONS, 2048), dtype="float32"),
            "sizes": datasets.Sequence(length=2, feature=datasets.Value("float32")),
            "preds_per_image": datasets.Value(dtype="int32"),
        }
    )
)


class Extract:
    def __init__(self, argv=sys.argv[1:]):
        inputdir = None
        outputfile = None
Exemplo n.º 10
0
    dummy_array = np.array([[1, 2], [3, 4]], dtype="int32")
    dataset = datasets.Dataset.from_dict(
        {"foo": [dummy_array, None, dummy_array]}, features=features)
    arr = NumpyArrowExtractor().extract_column(dataset._data)
    assert isinstance(
        arr, np.ndarray) and arr.dtype == np.object and arr.shape == (3, )
    np.testing.assert_equal(arr[0], dummy_array)
    np.testing.assert_equal(arr[2], dummy_array)
    assert np.isnan(arr[1])  # a single np.nan value - np.all not needed


@pytest.mark.parametrize(
    "data, feature, expected",
    [
        (np.zeros((2, 2)), None, [[0.0, 0.0], [0.0, 0.0]]),
        (np.zeros((2, 3)), datasets.Array2D(shape=(2, 3), dtype="float32"),
         [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]),
        ([np.zeros(2)], datasets.Array2D(shape=(1, 2),
                                         dtype="float32"), [[0.0, 0.0]]),
        (
            [np.zeros((2, 3))],
            datasets.Array3D(shape=(1, 2, 3), dtype="float32"),
            [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
        ),
    ],
)
def test_array_xd_with_np(data, feature, expected):
    ds = datasets.Dataset.from_dict(
        {"col": [data]},
        features=datasets.Features({"col": feature}) if feature else None)
    assert ds[0]["col"] == expected