def test_negate_with_zero(self): vec = SparseVector.zero(4) expected = SparseVector.zero(4) result = -vec self.assertEqual(result, expected)
def test_add_with_zero_scalar(self): vec = SparseVector.from_list([1, 0, 2, 0, 3, 0, 4, 5]) scalar = 0 expected = SparseVector.from_list([1, 0, 2, 0, 3, 0, 4, 5]) result = vec + scalar self.assertEqual(result, expected)
def test_abs_with_zero(self): array = np.random.randint(0, 1, 20) vec = SparseVector.from_list(array) expected = SparseVector.from_list(array) result = vec.abs() self.assertEqual(result, expected)
def test_add_with_zero_vector(self): a = SparseVector.from_list([1, 0, 2, 0, 3, 0, 4, 5]) b = SparseVector.zero(8) expected = SparseVector.zero(8) result = a + b self.assertEqual(expected, result)
def test_compact_with_zero(self): vec = SparseVector(np.zeros(10, dtype=np.uint16), np.arange(10, dtype=np.uint32), 10) vec.compact() expected = SparseVector.zero(10) result = copy(vec) self.assertEqual(result, expected)
def test_negate_with_random(self): array = np.random.randint(0, 100, 20) vec = SparseVector.from_list(array) expected = SparseVector.from_list(array) expected.data = np.negative(expected.data) result = -vec self.assertEqual(result, expected)
def test_multiply_with_random_scalar(self): array = np.array([1, 0, 2, 0, 3, 0, 4, 5]) vec = SparseVector.from_list(array) scalar = np.random.randint(1, 100) expected = SparseVector.from_list(np.multiply(array, scalar)) result = vec * scalar self.assertEqual(result, expected)
def test_log2_with_zero(self): array = np.random.randint(0, 1, 20) vec = SparseVector.from_list(array) expected = SparseVector.from_list(array) expected.data = np.log2(expected.data) result = vec.log2() self.assertEqual(result, expected)
def test_abs_with_random(self): array = np.random.randint(-20, 20, 20) vec = SparseVector.from_list(array) expected = SparseVector.from_list(array) expected.data = np.abs(expected.data) result = vec.abs() self.assertEqual(result, expected)
def test_power_with_zero(self): array = np.random.randint(0, 1, 20) vec = SparseVector.from_list(array) a = np.random.randint(2, 10) expected = SparseVector.from_list(array) expected.data = np.power(expected.data, a) result = vec.power(a) self.assertEqual(result, expected)
def test_multiply_with_random_vector(self): array_a = np.array([1, 0, 2, 0, 3, 0, 4, 5]) array_b = np.random.randint(0, 100, array_a.size) a = SparseVector.from_list(array_a) b = SparseVector.from_list(array_b) expected = SparseVector.from_list(np.multiply(array_a, array_b)) result = a * b self.assertEqual(expected, result)
def test_divide_with_random_scalar(self): array = np.array([1, 0, 2, 0, 3, 0, 4, 5]) vec = SparseVector.from_list(array) scalar = np.random.randint(1, 100) expected = SparseVector.from_list(array) expected.data = np.divide(expected.data, scalar) result = vec / scalar self.assertEqual(result, expected)
def test_add_with_random_vector(self): array_a = np.array([1, 0, 2, 0, 3, 0, 4, 5]) array_b = np.array([2, 1, 0, 3, 4, 12, 0, 7]) a = SparseVector.from_list(array_a) b = SparseVector.from_list(array_b) expected = SparseVector.from_list([3, 0, 0, 0, 7, 0, 0, 12]) result = a + b self.assertEqual(expected, result)
def test_venn_with_zero(self): a = SparseVector.from_list([1, 0, 2, 0, 3, 4, 0, 5]) b = SparseVector.zero(8) expected0 = SparseVector.zero(8) expected1 = SparseVector.from_list([1, 0, 2, 0, 3, 4, 0, 5]) result0, result1 = a.venn(b) self.assertEqual(result0, expected0) self.assertEqual(result1, expected1)
def test_get_rows_using_simple_matrix(self): mat = SparseMatrix.from_list([[0, 2, 0], [0, 0, 3], [1, 0, 0]]) expected = [ SparseVector.from_list([0, 2, 0]), SparseVector.from_list([0, 0, 3]), SparseVector.from_list([1, 0, 0]) ] result = mat.get_rows() for ex, res in zip(expected, result): self.assertEqual(res, ex)
def test_add_with_random_scalar(self): array = np.array([1, 0, 2, 0, 3, 0, 4, 5]) vec = SparseVector.from_list(array) scalar = np.random.randint(1, 100) expected = SparseVector.from_list(array) expected.data = np.add(expected.data, np.full(expected.data.size, scalar)) expected.compact() result = vec + scalar self.assertEqual(result, expected)
def test_compact_with_random(self): data = np.random.randint(0, 10, 30, dtype=np.uint16) indices = np.arange(30, dtype=np.uint32) vec = SparseVector(data, indices, 30) vec.compact() zi = np.where(data == 0) expected = SparseVector(np.delete(data, zi), np.delete(indices, zi), 30) result = copy(vec) self.assertEqual(result, expected)
def test_get_row_using_simple_matrix(self): mat = SparseMatrix.from_list([[0, 2, 0], [0, 0, 3], [1, 0, 0]]) expected0 = SparseVector.from_list([0, 2, 0]) expected1 = SparseVector.from_list([0, 0, 3]) expected2 = SparseVector.from_list([1, 0, 0]) result0 = mat.get_row(0) result1 = mat.get_row(1) result2 = mat.get_row(2) self.assertEqual(expected0, result0) self.assertEqual(expected1, result1) self.assertEqual(expected2, result2)
def test_sum_with_zero(self): vec = SparseVector.zero(7) expected = 0 result = vec.sum() self.assertEqual(result, expected)
def test_value_at_when_value_is_zero(self): vec = SparseVector.zero(7) expected = [0] * 7 result = [vec.value_at(i) for i in range(vec.size)] self.assertEqual(result, expected)
def test_to_dense_with_zero(self): vec = SparseVector.zero(7) expected = np.zeros(7, dtype=np.uint16) result = vec.to_dense() self.assertTrue(np.array_equal(result, expected))
def test_divide_with_random_vector(self): array_a = np.random.randint(0, 100, 30) array_b = np.random.randint(0, 100, array_a.size) a = SparseVector.from_list(array_a) b = SparseVector.from_list(array_b) a_idx = np.in1d(a.indices, b.indices) b_idx = np.in1d(b.indices, a.indices) expected = SparseVector.from_list(array_a) expected.data = np.divide(a.data[a_idx], b.data[b_idx]) expected.indices = a.indices[a_idx] expected.size = a.size result = a / b self.assertEqual(result, expected)
def read_set(pool, stream): """ Resds a testing set from the specified CSV stream using the specified processing pool to improve I/O performance. :param pool: The processing pool to use. :param stream: The CSV stream to read from. :return: A set filled with some data to test with. """ tests = [] processor = partial(CsvIO.process_line, skip_class=True, skip_id=False) results = pool.map(processor, CsvIO.generate_lines(stream)) for result in results: tests.append( Test( result.id, SparseVector(data=np.array(result.data, copy=False, dtype=np.uint16), indices=np.array(result.cols, copy=False, dtype=np.uint32), size=61189))) return TestingSet(tests)
def test_vstack_with_single_vector(self): expected = SparseMatrix.from_list([[1, 0, 2, 0, 3, 4, 0, 5]]) result = SparseMatrix.vstack( [SparseVector.from_list([1, 0, 2, 0, 3, 4, 0, 5])]) self.assertEqual(result.shape, (1, 8)) self.assertEqual(result, expected)
def test_value_at_when_value_is_not_zero(self): array = np.random.randint(1, 10, 20) vec = SparseVector.from_list(array) expected = array.tolist() result = [vec.value_at(i) for i in range(vec.size)] self.assertEqual(result, expected)
def test_sum_with_random(self): array = np.random.randint(0, 100, 20) vec = SparseVector.from_list(array) expected = np.sum(array) result = vec.sum() self.assertEqual(result, expected)
def test_vstack_with_multiple_vectors(self): arrays = [np.random.randint(0, 10, 30) for _ in range(20)] vectors = [SparseVector.from_list(arrays[i]) for i in range(20)] expected = SparseMatrix.from_list(arrays) result = SparseMatrix.vstack(vectors) self.assertEqual(result, expected)
def test_to_dense_with_random(self): array = np.random.randint(0, 5, 30, dtype=np.uint16) vec = SparseVector.from_list(array) expected = np.copy(array) result = vec.to_dense() self.assertTrue(np.array_equal(result, expected))
def test_from_lists_with_no_unique_elements(self): vec = SparseVector.from_lists([], [], 5) self.assertEqual(vec.size, 5) self.assertTrue(np.array_equal(vec.data, np.array([], dtype=np.uint16))) self.assertTrue( np.array_equal(vec.indices, np.array([], dtype=np.uint32)))
def count_words(self, mat): """ Counts the total number of words in each feature in the specified sparse matrix. :param mat: The sparse matrix of word counts to use. :return: A tuple containing the total number of words in an entire matrix, and a sparse vector of resulting column sums. """ return mat.sum(), SparseVector.from_list( [column.sum() for column in mat.get_columns()], np.uint32)