Пример #1
0
    def read_database(pool, stream):
        """
        Reads a training database from the specified CSV stream using the
        specified processing pool to improve I/O performance.

        :param pool: The processing pool to use.
        :param stream: The CSV stream to read from.
        :return: A database filled with data to train on.
        """
        classes = []
        cols = []
        data = []
        rows = []

        processor = partial(CsvIO.process_line, skip_class=False, skip_id=True)
        results = pool.map(processor, CsvIO.generate_lines(stream))

        for index, result in enumerate(results):
            classes.append(result.classz)
            cols.extend(result.cols)
            data.extend(result.data)
            rows.extend([index] * len(result.cols))

        return TrainingDatabase(
            np.array(classes, copy=False, dtype=np.uint8),
            SparseMatrix(np.array(data, copy=False, dtype=np.uint16),
                         np.array(rows, copy=False, dtype=np.uint32),
                         np.array(cols, copy=False, dtype=np.uint32),
                         (max(rows) + 1, max(cols) + 1)))
Пример #2
0
 def test_init_throws_when_row_dtype_is_not_integral(self):
     with self.assertRaises(ValueError):
         SparseMatrix(np.zeros(10), np.zeros(10, np.float),
                      np.zeros(10, np.int), (10, 10))
Пример #3
0
 def test_init_throws_when_row_and_column_dimensions_are_unequal(self):
     with self.assertRaises(ValueError):
         SparseMatrix(np.zeros(10), np.zeros(10, np.int),
                      np.zeros(8, np.int), (10, 10))