def update_from_triplets(self, triplets: sparse.Triplets): self.triplets = triplets self.representation = sparse.representation_from_triplets( self.spec, *self.triplets, self.matmul_options, debug_name=self.name)
def update_slots_from_triplets(self, slot_triplets: Mapping[str, sparse.Triplets]): """ Update the host side representation of the sparse slot with a new set of triplets. The row and column indices must be identical to those for the sparse weights. The on device representation will not be updated until you run the op returned from the layer's 'update_sparsity_op()' method. """ slot_representations = { name: sparse.representation_from_triplets(self.weights.spec, *triplet, self.weights.matmul_options, debug_name=name + "(slot)") for name, triplet in slot_triplets.items() } for name, representation in slot_representations.items(): current_slot = self.sparse_slots[name] if current_slot.np_variable.shape != representation.nz_values.shape: raise RuntimeError( "New slot shape is not compatible. " f"Slot {name}: New: {representation.nz_values.shape} != old: {current_slot.shape}" ) self.sparse_slots[name] = SparseSlot( np_variable=representation.nz_values, tf_variable=current_slot.tf_variable, placeholder=current_slot.placeholder)
def update_momentum_from_triplets(self, new_momentum_triplets): momentum_data = sparse.representation_from_triplets( self.spec, *new_momentum_triplets) if self.sparse_momentum.shape != momentum_data.nz_values.shape: raise RuntimeError( "New momentum shape is not compatible. " f"New: {momentum_data.nz_values.shape} != old: {self.sparse_momentum.shape}" ) self.sparse_momentum = momentum_data.nz_values
def test_device_version_equality_ipu2(self): from ipu_sparse_ops import sparse bs = 16 block_mask = np.array([[1, 0, 0], [0, 1, 0], [1, 1, 0], [0, 0, 1]]) mask = np.kron(block_mask, np.ones(shape=[bs, bs])).astype(int) n_els = np.count_nonzero(mask) dense = np.zeros_like(mask) dense[np.nonzero(mask)] = np.arange(n_els) opts = {"metaInfoBucketOversizeProportion": 1} t = sparse.triplets_from_dense(dense) spec = sparse.matmul_spec_from_max(dense.shape[1], [2, dense.shape[0]], max_non_zeros=n_els, block_size=1, dtype=tf.float32) # from device device_r = sparse.representation_from_triplets(spec, *t, opts, ipu_version=0) device_t_rt = sparse.triplets_from_representation(spec, device_r, opts, ipu_version=0) # from version version_r = sparse.representation_from_triplets(spec, *t, opts, ipu_version=2) version_t_rt = sparse.triplets_from_representation(spec, version_r, opts, ipu_version=2) assert_equal(device_r.metainfo_state, version_r.metainfo_state) assert_equal(device_r.nz_values, version_r.nz_values) assert_equal(device_t_rt, version_t_rt)
def test_representation_round_trip_elements(self): from ipu_sparse_ops import sparse bs = 16 block_mask = np.array([[1, 0, 0], [0, 1, 0], [1, 1, 0], [0, 0, 1]]) mask = np.kron(block_mask, np.ones(shape=[bs, bs])).astype(int) n_els = np.count_nonzero(mask) dense = np.zeros_like(mask) dense[np.nonzero(mask)] = np.arange(n_els) opts = {"metaInfoBucketOversizeProportion": 1} t = sparse.triplets_from_dense(dense) spec = sparse.matmul_spec_from_max(dense.shape[1], [2, dense.shape[0]], max_non_zeros=n_els, block_size=1, dtype=tf.float32) r = sparse.representation_from_triplets(spec, *t, opts) t_rt = sparse.triplets_from_representation(spec, r, opts) dense_rt = sparse.dense_from_triplets(spec, *t_rt) assert_equal(dense, dense_rt)
def test_representation_round_trip_blocks(self): from ipu_sparse_ops import sparse for bs in [4, 8, 16]: # Create a mask that describes the non-zero block structure: block_mask = np.array([[1, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]]) n_blocks = np.count_nonzero(block_mask) # From that produce an element-wise mask using a Kronecker product: mask = np.kron(block_mask, np.ones(shape=[bs, bs])).astype(int) n_els = np.count_nonzero(mask) # Make a dense matrix from the element-wise mask and fill with random values: dense = np.zeros_like(mask, dtype=np.float32) values = np.random.rand(n_els) dense[np.nonzero(mask)] = values # Make the spec for the sparse matmul: opts = {"metaInfoBucketOversizeProportion": 1} spec = sparse.matmul_spec_from_max(dense.shape[1], [2, dense.shape[0]], max_non_zeros=n_blocks, block_size=bs, dtype=tf.float32) # Make triplets indices from the block mask: t = sparse.triplets_from_dense(block_mask) # Then fill in triplet's values by extracting the blocks # from the dense matrix (this can't be done by reshaping): t_block = sparse.Triplets( t.row_indices, t.col_indices, sparse.blocks_at_indices(t.row_indices, t.col_indices, bs, dense)) # Convert to on device representation and back and check the # result is the dense matrix we sytarted with: r = sparse.representation_from_triplets(spec, *t_block, opts) t_rt = sparse.triplets_from_representation(spec, r, opts) dense_rt = sparse.dense_from_triplets(spec, *t_rt) assert_equal(dense, dense_rt) # Check triplets from dense returns original triplets: td = sparse.triplets_from_dense(dense_rt, bs) assert_equal(t_block.row_indices, td.row_indices) assert_equal(t_block.col_indices, td.col_indices) assert_equal(t_block.values, td.values)
def __init__(self, spec: sparse.MatmulSpec, triplets: list, bias=False, relu=False, generator=None): self.spec = spec t0 = time.perf_counter() self.data = sparse.representation_from_triplets(self.spec, *triplets) self.triplets = triplets t1 = time.perf_counter() logger.info(f"Random triplets created in {t1-t0:0.3f} seconds") # Each layer needs momentum data that shares the same sparse representation # as the non-zero value data (initialise momentum to zero): self.sparse_momentum = np.zeros_like(self.data.nz_values) logger.info( f"Created sparse values and momentum with shapes {self.data.nz_values.shape} {self.sparse_momentum.shape}" ) self.bias = bias self.relu = relu self.bias_init = tf.zeros_initializer() self.generator = generator
}) # Check all the results: # Convert the sparse gradient metainfo back to triplets and then use those row and col indices # to index the dense reference weight gradient: sparse_data = sparse.SparseRepresentation(fc.data.metainfo_state, sparse_weight_grad[0]) triplets = sparse.triplets_from_representation(fc.spec, sparse_data) reference_grad_nzvalues = sparse.values_at_indices(triplets[0], triplets[1], reference_weight_grad[0]) # Convert the dense reference weight gradient to a sparse one using the same mask # that we used for the weights so we can compare the nzvalues against the sparse grad: _, _, values = sparse.triplets_from_dense(reference_weight_grad[0]) sparse_data = sparse.representation_from_triplets(fc.spec, *triplets) reference_grad_nzvalues = sparse_data.nz_values # Need to set tolerances for fp32 as numpy is set for doubles by default: rtol = 1e-05 atol = 1e-06 if not np.allclose( reference_result, sparse_result, rtol=rtol, atol=atol, equal_nan=True): print(f"Reference result:\n{reference_result}") print(f"Sparse result:\n{sparse_result}") diff = reference_result - sparse_result print(f"Difference:\n{diff}") diff_triplet = sparse.triplets_from_dense(diff) print( f"Difference triplets:\nrows: {diff_triplet[0]}\ncols: {diff_triplet[1]}\nvalues: {diff_triplet[2]}"
# to index the dense reference weight gradient: sparse_data = sparse.SparseRepresentation(fc.weights.get_metainfo(), sparse_weight_grad[0]) triplets = sparse.triplets_from_representation(fc.weights.spec, sparse_data, fc.weights.matmul_options) if args.block_size == 1: reference_grad_nzvalues = sparse.values_at_indices( triplets[0], triplets[1], reference_weight_grad) else: reference_grad_nzvalues = sparse.blocks_at_indices( triplets[0], triplets[1], args.block_size, reference_weight_grad) # Convert the dense reference weight gradient to a sparse one using the same mask # that we used for the weights so we can compare the nzvalues against the sparse grad: dense_data = sparse.representation_from_triplets(fc.weights.spec, triplets[0], triplets[1], reference_grad_nzvalues, fc.weights.matmul_options) # Set tolerances appropriately as numpy is set for doubles by default: if args.pattern == 'random_sign_ones': rtol = 0 atol = 0 elif args.data_type == 'fp16': rtol = 1e-03 atol = 1e-02 elif args.pattern == 'random_orthogonal': rtol = 1e-07 atol = 1e-06 else: rtol = 1e-05 atol = 1e-06
def update_triplets(self, new_triplets): self.triplets = new_triplets self.data = sparse.representation_from_triplets( self.spec, *self.triplets)
sparse_data = sparse.SparseRepresentation( embedding.projection.weights.get_metainfo(), tied_grad_w[0]) triplets = sparse.triplets_from_representation(matmul_spec, sparse_data, matmul_opts) # Reference grad is transposed with respect to popsparse one (third Jacobian is the reduction gradient wrt. weights): ref_grad_reduced = np.transpose(reference_grads_w) if args.block_size == 1: reference_grad_nzvalues = sparse.values_at_indices( triplets[0], triplets[1], ref_grad_reduced) else: reference_grad_nzvalues = sparse.blocks_at_indices( triplets[0], triplets[1], args.block_size, ref_grad_reduced) # Convert the dense reference weight gradient to a sparse one using the same mask # that we used for the weights so we can compare the nzvalues against the sparse grad: dense_data = sparse.representation_from_triplets(matmul_spec, triplets[0], triplets[1], reference_grad_nzvalues, matmul_opts) if logger.level == logging.getLevelName("DEBUG"): print(f"Tied grad-w triplets:\n{triplets}") print( f"Tied grad-w dense:\n{np.transpose(sparse.dense_from_triplets(matmul_spec, *triplets))}" ) print(f"Ref grad-w:\n{ref_grad_reduced}") if not np.allclose(dense_data.nz_values, tied_grad_w, rtol=rtol, atol=atol, equal_nan=True): print(f"Reference weight grad (sparsified):\n{dense_data.nz_values}")