Python CSVFileDataset示例，context.libspn.CSVFileDataset Python示例

示例#1

0

显示文件

    def test_csv_csv_writeall_tensorlist(self):
        # Read&write
        dataset1 = spn.CSVFileDataset(self.data_path("data_int1.csv"),
                                      num_vals=[None] * 3,
                                      defaults=[[101], [102], [103.0], [104.0],
                                                [105.0]],
                                      num_epochs=2,
                                      batch_size=4,
                                      shuffle=False,
                                      num_labels=2,
                                      min_after_dequeue=1000,
                                      num_threads=1,
                                      allow_smaller_final_batch=True)
        path = self.out_path(self.cid() + ".csv")
        writer = spn.CSVDataWriter(path)
        data1 = dataset1.read_all()
        dataset1.write_all(writer)

        # Read again
        dataset2 = spn.CSVFileDataset(path,
                                      num_vals=[None] * 3,
                                      defaults=[[201], [202], [203.0], [204.0],
                                                [205.0]],
                                      num_epochs=1,
                                      batch_size=4,
                                      shuffle=False,
                                      num_labels=2,
                                      min_after_dequeue=1000,
                                      num_threads=1,
                                      allow_smaller_final_batch=True)
        data2 = dataset2.read_all()

        # Compare
        np.testing.assert_array_almost_equal(data1[0], data2[0])
        np.testing.assert_array_equal(data1[1], data2[1])

示例#2

0

显示文件

文件： test_data_csv.py 项目： hrstoyanov/libspn

 def test_read_all_labeled_csv_file_dataset(self):
     """Test read_all for CSV file with 2 labels."""
     dataset = spn.CSVFileDataset(
         self.data_path(["data_int1.csv", "data_int2.csv"]),
         num_vals=[255] * 3,
         defaults=[[101], [102], [103], [104], [105]],
         num_epochs=2,
         batch_size=3,
         shuffle=False,
         num_labels=2,
         min_after_dequeue=1000,
         num_threads=1,
         allow_smaller_final_batch=True)
     data = dataset.read_all()
     self.assertEqual(len(data), 2)
     np.testing.assert_array_equal(
         data[0],
         np.array(
             [[3, 4, 5], [8, 9, 10], [103, 14, 15], [18, 19, 20],
              [103, 24, 25], [28, 104, 30], [33, 104, 35], [38, 104, 40],
              [43, 104, 45], [48, 104, 50], [3, 4, 5], [8, 9, 10],
              [103, 14, 15], [18, 19, 20], [103, 24, 25], [28, 104, 30],
              [33, 104, 35], [38, 104, 40], [43, 104, 45], [48, 104, 50]],
             dtype=np.int32))
     np.testing.assert_array_equal(
         data[1],
         np.array([[1, 2], [6, 102], [11, 12], [16, 102], [21, 22],
                   [26, 27], [31, 32], [36, 37], [41, 42], [46, 47], [1, 2],
                   [6, 102], [11, 12], [16, 102], [21, 22], [26, 27],
                   [31, 32], [36, 37], [41, 42], [46, 47]],
                  dtype=np.int32))

示例#3

0

显示文件

    def test_image_gray_float_csv_writeall(self):
        # Read and write
        dataset1 = spn.ImageDataset(
            image_files=self.data_path("img_dir1/*-{*}.png"),
            format=spn.ImageFormat.FLOAT,
            num_epochs=1,
            batch_size=2,
            shuffle=False,
            ratio=1,
            crop=0,
            accurate=True,
            allow_smaller_final_batch=True)
        writer = spn.CSVDataWriter(path=self.out_path(self.cid() + ".csv"))

        data1 = dataset1.read_all()
        dataset1.write_all(writer)

        # Re-read
        dataset2 = spn.CSVFileDataset(files=self.out_path(self.cid() + ".csv"),
                                      num_vals=[None] * 25,
                                      defaults=[[b'']] + [[1.0]
                                                          for _ in range(25)],
                                      num_epochs=1,
                                      batch_size=2,
                                      shuffle=False,
                                      num_labels=1,
                                      allow_smaller_final_batch=True)
        data2 = dataset2.read_all()

        # Compare
        np.testing.assert_allclose(data1[0], data2[0])
        np.testing.assert_array_equal(data1[1], data2[1])

示例#4

0

显示文件

    def test_csv_data_writer(self):
        # Write
        path = self.out_path(self.cid() + ".csv")
        writer = spn.CSVDataWriter(path)

        arr1 = np.array([1, 2, 3, 4])
        arr2 = np.array([[1 / 1, 1 / 2], [1 / 3, 1 / 4], [1 / 5, 1 / 6],
                         [1 / 7, 1 / 8]])
        writer.write(arr2, arr1)
        writer.write(arr2, arr1)

        # Read
        dataset = spn.CSVFileDataset(path,
                                     num_vals=[None] * 2,
                                     defaults=[[1], [1.0], [1.0]],
                                     num_epochs=1,
                                     batch_size=10,
                                     shuffle=False,
                                     num_labels=1,
                                     min_after_dequeue=1000,
                                     num_threads=1,
                                     allow_smaller_final_batch=True)
        data = dataset.read_all()

        # Compare
        np.testing.assert_array_almost_equal(np.concatenate((arr2, arr2)),
                                             data[0])
        np.testing.assert_array_equal(np.concatenate((arr1, arr1)),
                                      data[1].flatten())

示例#5

0

显示文件

文件： test_data_csv.py 项目： hrstoyanov/libspn

    def test_labeled_csv_file_dataset_int(self):
        """Batch generation for CSV file with integer data and 2 labels"""
        # Note: shuffling is NOT tested
        dataset = spn.CSVFileDataset(
            self.data_path(["data_int1.csv", "data_int2.csv"]),
            num_vals=[255] * 3,
            defaults=[[101], [102], [103], [104], [105]],
            num_epochs=2,
            batch_size=3,
            shuffle=False,
            num_labels=2,
            min_after_dequeue=1000,
            num_threads=1,
            allow_smaller_final_batch=True)
        batches = [[
            np.array([[1, 2], [6, 102], [11, 12]], dtype=np.int32),
            np.array([[3, 4, 5], [8, 9, 10], [103, 14, 15]], dtype=np.int32)
        ],
                   [
                       np.array([[16, 102], [21, 22], [26, 27]],
                                dtype=np.int32),
                       np.array([[18, 19, 20], [103, 24, 25], [28, 104, 30]],
                                dtype=np.int32)
                   ],
                   [
                       np.array([[31, 32], [36, 37], [41, 42]],
                                dtype=np.int32),
                       np.array([[33, 104, 35], [38, 104, 40], [43, 104, 45]],
                                dtype=np.int32)
                   ],
                   [
                       np.array([[46, 47], [1, 2], [6, 102]], dtype=np.int32),
                       np.array([[48, 104, 50], [3, 4, 5], [8, 9, 10]],
                                dtype=np.int32)
                   ],
                   [
                       np.array([[11, 12], [16, 102], [21, 22]],
                                dtype=np.int32),
                       np.array([[103, 14, 15], [18, 19, 20], [103, 24, 25]],
                                dtype=np.int32)
                   ],
                   [
                       np.array([[26, 27], [31, 32], [36, 37]],
                                dtype=np.int32),
                       np.array([[28, 104, 30], [33, 104, 35], [38, 104, 40]],
                                dtype=np.int32)
                   ],
                   [
                       np.array([[41, 42], [46, 47]], dtype=np.int32),
                       np.array([[43, 104, 45], [48, 104, 50]], dtype=np.int32)
                   ]]
        # Since we changed the order of data in CSVFileDataset,
        # we also change the order in batches
        for b in batches:
            b[1], b[0] = b[0], b[1]

        self.generic_dataset_test(dataset, batches)

示例#6

0

显示文件

文件： test_data_csv.py 项目： hrstoyanov/libspn

    def test_labeled_csv_file_dataset_float(self):
        """Batch generation for CSV file with float data and 2 labels"""
        # Note: shuffling is NOT tested
        dataset = spn.CSVFileDataset(self.data_path("data_mix.csv"),
                                     num_vals=[None] * 3,
                                     defaults=[[101.0], [102.0], [103.0],
                                               [104.0], [105.0]],
                                     num_epochs=2,
                                     batch_size=3,
                                     shuffle=False,
                                     num_labels=2,
                                     min_after_dequeue=1000,
                                     num_threads=1,
                                     allow_smaller_final_batch=True)
        batches = [
            [
                np.array([[1., 2.], [6., 102.], [11., 12.]], dtype=np.float32),
                np.array([[3., 4., 5.], [8., 104., 10.], [103., 104., 15.]],
                         dtype=np.float32)
            ],
            [
                np.array([[16., 102.], [21., 22.], [1., 2.]],
                         dtype=np.float32),
                np.array([[18., 19., 20.], [103., 24., 25.], [3., 4., 5.]],
                         dtype=np.float32)
            ],
            [
                np.array([[6., 102.], [11., 12.], [16., 102.]],
                         dtype=np.float32),
                np.array([[8., 104., 10.], [103., 104., 15.], [18., 19., 20.]],
                         dtype=np.float32)
            ],
            [
                np.array([[21., 22.]], dtype=np.float32),
                np.array([[103., 24., 25.]], dtype=np.float32)
            ]
        ]
        # Since we changed the order of data in CSVFileDataset,
        # we also change the order in batches
        for b in batches:
            b[1], b[0] = b[0], b[1]

        self.generic_dataset_test(dataset, batches)

示例#7

0

显示文件

文件： test_data_csv.py 项目： hrstoyanov/libspn

 def test_unlabeled_csv_file_dataset_with_final_batch(self):
     """Batch generation (without smaller final batch) for CSV file
     with labels"""
     # Note: shuffling is NOT tested
     dataset = spn.CSVFileDataset(
         self.data_path(["data_int1.csv", "data_int2.csv"]),
         num_vals=[255] * 5,
         defaults=[[101], [102], [103], [104], [105]],
         num_epochs=2,
         batch_size=3,
         shuffle=False,
         num_labels=0,
         min_after_dequeue=1000,
         num_threads=1,
         allow_smaller_final_batch=True)
     batches = [
         np.array(
             [[1, 2, 3, 4, 5], [6, 102, 8, 9, 10], [11, 12, 103, 14, 15]],
             dtype=np.int32),
         np.array([[16, 102, 18, 19, 20], [21, 22, 103, 24, 25],
                   [26, 27, 28, 104, 30]],
                  dtype=np.int32),
         np.array([[31, 32, 33, 104, 35], [36, 37, 38, 104, 40],
                   [41, 42, 43, 104, 45]],
                  dtype=np.int32),
         np.array(
             [[46, 47, 48, 104, 50], [1, 2, 3, 4, 5], [6, 102, 8, 9, 10]],
             dtype=np.int32),
         np.array([[11, 12, 103, 14, 15], [16, 102, 18, 19, 20],
                   [21, 22, 103, 24, 25]],
                  dtype=np.int32),
         np.array([[26, 27, 28, 104, 30], [31, 32, 33, 104, 35],
                   [36, 37, 38, 104, 40]],
                  dtype=np.int32),
         np.array([[41, 42, 43, 104, 45], [46, 47, 48, 104, 50]],
                  dtype=np.int32)
     ]
     self.generic_dataset_test(dataset, batches)

示例#8

0

显示文件

文件： test_data_csv.py 项目： hrstoyanov/libspn

    def test_labeled_csv_file_dataset_int_onelabel(self):
        """Batch generation for CSV file with integer data and 1 label"""
        # Note: shuffling is NOT tested
        dataset = spn.CSVFileDataset(
            self.data_path(["data_int1.csv", "data_int2.csv"]),
            num_vals=[255] * 4,
            defaults=[[101], [102], [103], [104], [105]],
            num_epochs=2,
            batch_size=3,
            shuffle=False,
            num_labels=1,
            min_after_dequeue=1000,
            num_threads=1,
            allow_smaller_final_batch=True)
        batches = [
            [
                np.array([[1], [6], [11]], dtype=np.int32),
                np.array([[2, 3, 4, 5], [102, 8, 9, 10], [12, 103, 14, 15]],
                         dtype=np.int32)
            ],
            [
                np.array([[16], [21], [26]], dtype=np.int32),
                np.array(
                    [[102, 18, 19, 20], [22, 103, 24, 25], [27, 28, 104, 30]],
                    dtype=np.int32)
            ],
            [
                np.array([[31], [36], [41]], dtype=np.int32),
                np.array(
                    [[32, 33, 104, 35], [37, 38, 104, 40], [42, 43, 104, 45]],
                    dtype=np.int32)
            ],
            [
                np.array([[46], [1], [6]], dtype=np.int32),
                np.array([[47, 48, 104, 50], [2, 3, 4, 5], [102, 8, 9, 10]],
                         dtype=np.int32)
            ],
            [
                np.array([[11], [16], [21]], dtype=np.int32),
                np.array(
                    [[12, 103, 14, 15], [102, 18, 19, 20], [22, 103, 24, 25]],
                    dtype=np.int32)
            ],
            [
                np.array([[26], [31], [36]], dtype=np.int32),
                np.array(
                    [[27, 28, 104, 30], [32, 33, 104, 35], [37, 38, 104, 40]],
                    dtype=np.int32)
            ],
            [
                np.array([[41], [46]], dtype=np.int32),
                np.array([[42, 43, 104, 45], [47, 48, 104, 50]],
                         dtype=np.int32)
            ]
        ]
        # Since we changed the order of data in CSVFileDataset,
        # we also change the order in batches
        for b in batches:
            b[1], b[0] = b[0], b[1]

        self.generic_dataset_test(dataset, batches)