def test_add_with_zero_scalar(self): vec = SparseVector.from_list([1, 0, 2, 0, 3, 0, 4, 5]) scalar = 0 expected = SparseVector.from_list([1, 0, 2, 0, 3, 0, 4, 5]) result = vec + scalar self.assertEqual(result, expected)
def test_abs_with_zero(self): array = np.random.randint(0, 1, 20) vec = SparseVector.from_list(array) expected = SparseVector.from_list(array) result = vec.abs() self.assertEqual(result, expected)
def test_multiply_with_random_scalar(self): array = np.array([1, 0, 2, 0, 3, 0, 4, 5]) vec = SparseVector.from_list(array) scalar = np.random.randint(1, 100) expected = SparseVector.from_list(np.multiply(array, scalar)) result = vec * scalar self.assertEqual(result, expected)
def test_log2_with_zero(self): array = np.random.randint(0, 1, 20) vec = SparseVector.from_list(array) expected = SparseVector.from_list(array) expected.data = np.log2(expected.data) result = vec.log2() self.assertEqual(result, expected)
def test_abs_with_random(self): array = np.random.randint(-20, 20, 20) vec = SparseVector.from_list(array) expected = SparseVector.from_list(array) expected.data = np.abs(expected.data) result = vec.abs() self.assertEqual(result, expected)
def test_negate_with_random(self): array = np.random.randint(0, 100, 20) vec = SparseVector.from_list(array) expected = SparseVector.from_list(array) expected.data = np.negative(expected.data) result = -vec self.assertEqual(result, expected)
def test_add_with_random_vector(self): array_a = np.array([1, 0, 2, 0, 3, 0, 4, 5]) array_b = np.array([2, 1, 0, 3, 4, 12, 0, 7]) a = SparseVector.from_list(array_a) b = SparseVector.from_list(array_b) expected = SparseVector.from_list([3, 0, 0, 0, 7, 0, 0, 12]) result = a + b self.assertEqual(expected, result)
def test_multiply_with_random_vector(self): array_a = np.array([1, 0, 2, 0, 3, 0, 4, 5]) array_b = np.random.randint(0, 100, array_a.size) a = SparseVector.from_list(array_a) b = SparseVector.from_list(array_b) expected = SparseVector.from_list(np.multiply(array_a, array_b)) result = a * b self.assertEqual(expected, result)
def test_divide_with_random_scalar(self): array = np.array([1, 0, 2, 0, 3, 0, 4, 5]) vec = SparseVector.from_list(array) scalar = np.random.randint(1, 100) expected = SparseVector.from_list(array) expected.data = np.divide(expected.data, scalar) result = vec / scalar self.assertEqual(result, expected)
def test_power_with_zero(self): array = np.random.randint(0, 1, 20) vec = SparseVector.from_list(array) a = np.random.randint(2, 10) expected = SparseVector.from_list(array) expected.data = np.power(expected.data, a) result = vec.power(a) self.assertEqual(result, expected)
def test_venn_with_zero(self): a = SparseVector.from_list([1, 0, 2, 0, 3, 4, 0, 5]) b = SparseVector.zero(8) expected0 = SparseVector.zero(8) expected1 = SparseVector.from_list([1, 0, 2, 0, 3, 4, 0, 5]) result0, result1 = a.venn(b) self.assertEqual(result0, expected0) self.assertEqual(result1, expected1)
def test_get_rows_using_simple_matrix(self): mat = SparseMatrix.from_list([[0, 2, 0], [0, 0, 3], [1, 0, 0]]) expected = [ SparseVector.from_list([0, 2, 0]), SparseVector.from_list([0, 0, 3]), SparseVector.from_list([1, 0, 0]) ] result = mat.get_rows() for ex, res in zip(expected, result): self.assertEqual(res, ex)
def test_add_with_random_scalar(self): array = np.array([1, 0, 2, 0, 3, 0, 4, 5]) vec = SparseVector.from_list(array) scalar = np.random.randint(1, 100) expected = SparseVector.from_list(array) expected.data = np.add(expected.data, np.full(expected.data.size, scalar)) expected.compact() result = vec + scalar self.assertEqual(result, expected)
def test_get_row_using_simple_matrix(self): mat = SparseMatrix.from_list([[0, 2, 0], [0, 0, 3], [1, 0, 0]]) expected0 = SparseVector.from_list([0, 2, 0]) expected1 = SparseVector.from_list([0, 0, 3]) expected2 = SparseVector.from_list([1, 0, 0]) result0 = mat.get_row(0) result1 = mat.get_row(1) result2 = mat.get_row(2) self.assertEqual(expected0, result0) self.assertEqual(expected1, result1) self.assertEqual(expected2, result2)
def test_divide_with_random_vector(self): array_a = np.random.randint(0, 100, 30) array_b = np.random.randint(0, 100, array_a.size) a = SparseVector.from_list(array_a) b = SparseVector.from_list(array_b) a_idx = np.in1d(a.indices, b.indices) b_idx = np.in1d(b.indices, a.indices) expected = SparseVector.from_list(array_a) expected.data = np.divide(a.data[a_idx], b.data[b_idx]) expected.indices = a.indices[a_idx] expected.size = a.size result = a / b self.assertEqual(result, expected)
def test_vstack_with_single_vector(self): expected = SparseMatrix.from_list([[1, 0, 2, 0, 3, 4, 0, 5]]) result = SparseMatrix.vstack( [SparseVector.from_list([1, 0, 2, 0, 3, 4, 0, 5])]) self.assertEqual(result.shape, (1, 8)) self.assertEqual(result, expected)
def test_from_list_with_no_unique_elements(self): vec = SparseVector.from_list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.assertEqual(vec.size, 10) self.assertTrue(np.array_equal(vec.data, np.array([], dtype=np.uint16))) self.assertTrue( np.array_equal(vec.indices, np.array([], dtype=np.uint32)))
def test_to_dense_with_random(self): array = np.random.randint(0, 5, 30, dtype=np.uint16) vec = SparseVector.from_list(array) expected = np.copy(array) result = vec.to_dense() self.assertTrue(np.array_equal(result, expected))
def test_sum_with_random(self): array = np.random.randint(0, 100, 20) vec = SparseVector.from_list(array) expected = np.sum(array) result = vec.sum() self.assertEqual(result, expected)
def test_value_at_when_value_is_not_zero(self): array = np.random.randint(1, 10, 20) vec = SparseVector.from_list(array) expected = array.tolist() result = [vec.value_at(i) for i in range(vec.size)] self.assertEqual(result, expected)
def test_vstack_with_multiple_vectors(self): arrays = [np.random.randint(0, 10, 30) for _ in range(20)] vectors = [SparseVector.from_list(arrays[i]) for i in range(20)] expected = SparseMatrix.from_list(arrays) result = SparseMatrix.vstack(vectors) self.assertEqual(result, expected)
def test_add_with_zero_vector(self): a = SparseVector.from_list([1, 0, 2, 0, 3, 0, 4, 5]) b = SparseVector.zero(8) expected = SparseVector.zero(8) result = a + b self.assertEqual(expected, result)
def test_read_set_with_multiple_rows(self): src = "0,0,1,0,2,0,3,4,0,5\n" + \ "1,6,0,7,0,8,9,0,10,0\n" + \ "2,0,11,0,12,0,13,14,0,15\n" with Pool(processes=4) as pool, StringIO(src) as stream: expected = TestingSet([ Test(0, SparseVector.from_list([0, 1, 0, 2, 0, 3, 4, 0, 5])), Test(1, SparseVector.from_list([6, 0, 7, 0, 8, 9, 0, 10, 0])), Test(2, SparseVector.from_list([0, 11, 0, 12, 0, 13, 14, 0, 15])) ]) for i in range(3): expected[i].query.indices = expected[i].query.indices + 1 expected[i].query.size = expected[i].query.size + 1 result = CsvIO.read_set(pool, stream) self.assertEqual(result, expected)
def count_words(self, mat): """ Counts the total number of words in each feature in the specified sparse matrix. :param mat: The sparse matrix of word counts to use. :return: A tuple containing the total number of words in an entire matrix, and a sparse vector of resulting column sums. """ return mat.sum(), SparseVector.from_list( [column.sum() for column in mat.get_columns()], np.uint32)
def test_venn_with_random(self): array_a = np.random.randint(0, 100, 30) array_b = np.random.randint(0, 100, 30) a = SparseVector.from_list(array_a) b = SparseVector.from_list(array_b) a_i = np.in1d(a.indices, b.indices) a_d = np.in1d(a.indices, np.setdiff1d(a.indices, a.indices[a_i])) expected0 = SparseVector(a.data[a_i], a.indices[a_i], a.size) expected1 = SparseVector(a.data[a_d], a.indices[a_d], a.size) result0, result1 = a.venn(b) self.assertEqual(result0, expected0) self.assertEqual(result1, expected1) self.assertEqual(result0.data.size + result1.data.size, a.data.size) self.assertEqual(result0.indices.size + result1.indices.size, a.indices.size)
def test_from_list_with_several_unique_elements(self): vec = SparseVector.from_list([0, 1, 2, 0, 3, 4, 0, 5, 6, 0, 7, 8]) self.assertEqual(vec.size, 12) self.assertTrue( np.array_equal(np.array([1, 2, 3, 4, 5, 6, 7, 8], dtype=np.uint16), vec.data)) self.assertTrue( np.array_equal( np.array([1, 2, 4, 5, 7, 8, 10, 11], dtype=np.uint32), vec.indices))
def test_read_set_with_single_row(self): src = "0,0,1,0,2,0,3,4,0,5" with Pool(processes=4) as pool, StringIO(src) as stream: expected = TestingSet( [Test(0, SparseVector.from_list([0, 1, 0, 2, 0, 3, 4, 0, 5]))]) expected[0].query.indices = expected[0].query.indices + 1 expected[0].query.size = expected[0].query.size + 1 result = CsvIO.read_set(pool, stream) self.assertEqual(result, expected)
def test_divide_with_zero_scalar_throws(self): with self.assertRaises(ZeroDivisionError): _ = SparseVector.from_list([1, 0, 2, 0, 3, 0, 4, 5]) / 0