def build_matrices(prescriptions, practices, dates): """ Accepts an iterable of prescriptions plus mappings of pratice codes and date strings to their respective row/column offsets. Yields tuples of the form: bnf_code, items_matrix, quantity_matrix, actual_cost_matrix, net_cost_matrix Where the matrices contain the prescribed values for that presentation for every practice and date. """ max_row = max(practices.values()) max_col = max(dates.values()) shape = (max_row + 1, max_col + 1) grouped_by_bnf_code = groupby(prescriptions, lambda row: row[0]) for bnf_code, row_group in grouped_by_bnf_code: items_matrix = sparse_matrix(shape, integer=True) quantity_matrix = sparse_matrix(shape, integer=False) actual_cost_matrix = sparse_matrix(shape, integer=True) net_cost_matrix = sparse_matrix(shape, integer=True) for _, practice, date, items, quantity, actual_cost, net_cost in row_group: practice_offset = practices[practice] date_offset = dates[date] items_matrix[practice_offset, date_offset] = items quantity_matrix[practice_offset, date_offset] = quantity actual_cost_matrix[practice_offset, date_offset] = actual_cost net_cost_matrix[practice_offset, date_offset] = net_cost yield MatrixRow( bnf_code, finalise_matrix(items_matrix), finalise_matrix(quantity_matrix), finalise_matrix(actual_cost_matrix), finalise_matrix(net_cost_matrix), )
def build_matrices(practice_statistics, practices, dates): """ Accepts an iterable of practice statistics, plus mappings of pratice codes and date strings to their respective row/column offsets. Yields pairs of the form: statistic_name, matrix Where the matrix contains the values for that statistic for each practice and date. """ max_row = max(practices.values()) max_col = max(dates.values()) shape = (max_row + 1, max_col + 1) matrices = {} for statistic_name, practice, date, value in practice_statistics: try: practice_offset = practices[practice] except KeyError: # Because we download all practice statistics for a given date # range we end up including practices which have not prescribed at # all during this period and hence which aren't included in our # list of known practices. We just want to ignore these. continue date_offset = dates[date] try: matrix = matrices[statistic_name] except KeyError: matrix = sparse_matrix(shape, integer=isinstance(value, int)) matrices[statistic_name] = matrix matrix[practice_offset, date_offset] = value logger.info("Writing %s practice statistics matrices to SQLite", len(matrices)) for statistic_name, matrix in sorted(matrices.items()): yield statistic_name, finalise_matrix(matrix)
def make_matrix(self, sparse, integer): matrix = sparse_matrix(self.shape, integer=integer) sample_density = 0.4 if sparse else 1 for coords in self._random_coords(self.shape, sample_density): value = self.random.randrange(1024) if integer else random.random() matrix[coords] = value matrix = finalise_matrix(matrix) return matrix
def make_matrix(self, sparse, integer): matrix = sparse_matrix(self.shape, integer=integer) sample_density = 0.1 if sparse else 1 for coords in self._random_coords(self.shape, sample_density): value = self.random.randrange(1024) if integer else random.random() matrix[coords] = value matrix = finalise_matrix(matrix) # Make sure we get back the type of matrix we're expecting assert hasattr(matrix, "todense") == sparse return matrix
def sum_rows(rows): """ Accepts mutliple rows of matrices and sums the matrices in each column """ first_row = rows[0] accumulators = [ sparse_matrix(matrix.shape, integer=is_integer(matrix)) for matrix in first_row ] for row in rows: for accumulator, matrix in zip(accumulators, row): accumulator += matrix return [finalise_matrix(matrix) for matrix in accumulators]
def make_matrix(self, random, sparse, integer): shape = (4, 4) if sparse: matrix = sparse_matrix(shape, integer=integer) else: dtype = numpy.int_ if integer else numpy.float_ matrix = numpy.zeros(shape, dtype=dtype) coords = map(random.randrange, shape) value = random.randrange(128) if integer else random.random() matrix[coords] = value if sparse: matrix = finalise_matrix(matrix) return matrix
def make_matrix(self, random, sparse, integer): shape = (16, 4) if sparse: matrix = sparse_matrix(shape, integer=integer) else: dtype = numpy.int_ if integer else numpy.float_ matrix = numpy.zeros(shape, dtype=dtype) coords = list(map(random.randrange, shape)) value = random.randrange(128) if integer else random.random() matrix[coords] = value if sparse: matrix = finalise_matrix(matrix) # Check we've got the type of matrix we're expecting assert hasattr(matrix, "todense") == sparse return matrix
def test_integer_matrices_are_converted_to_smallest_type(self): matrix = sparse_matrix((4, 4), integer=True) for coords in self._random_coords(matrix.shape, sample_density=0.5): matrix[coords] = self.random.randint(1, 127) finalised = finalise_matrix(matrix) self.assertEqual(finalised.dtype, numpy.uint8)
def test_sufficiently_dense_matrices_are_converted_to_ndarrays(self): matrix = sparse_matrix((4, 4)) for coords in self._random_coords(matrix.shape, sample_density=0.8): matrix[coords] = self.random.random() finalised = finalise_matrix(matrix) self.assertIsInstance(finalised, numpy.ndarray)
def test_sufficiently_sparse_matrices_remain_sparse(self): matrix = sparse_matrix((4, 4)) for coords in self._random_coords(matrix.shape, sample_density=0.1): matrix[coords] = self.random.random() finalised = finalise_matrix(matrix) self.assertIsInstance(finalised, SparseMatrixBase)