def op(self, startEdge, fromVertex, toVertex): """The definition of the operator function. The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the separate lists. :param startEdge: Indices into toVertex where edges start. :type startEdge: list. :param fromVertex: The from-vertex of each edge. :type fromVertex: list. :param toVertex: The to-vertex of each edge. :type toVertex: list. :return Counts of triangles per edge. """ iEdge = ops.position_in(toVertex.shape)[0] count = ops.output(toVertex.shape, ops.uint64) nTriangle = ops.variable(0, ops.uint64) iFromVertex = ops.variable(fromVertex[iEdge], fromVertex.dtype) iFromEdge = ops.variable(startEdge[iFromVertex], startEdge.dtype) iFromEdgeEnd = ops.variable(startEdge[iFromVertex+1], startEdge.dtype) iiFromVertex = ops.variable(toVertex[iFromEdge], toVertex.dtype) iToVertex = ops.variable(toVertex[iEdge], toVertex.dtype) iToEdge = ops.variable(startEdge[iToVertex], startEdge.dtype) iToEdgeEnd = ops.variable(startEdge[iToVertex+1], startEdge.dtype) iiToVertex = ops.variable(toVertex[iToEdge], toVertex.dtype) nMerge = iToEdgeEnd-iToEdge + iFromEdgeEnd-iFromEdge # Maximum number of merges. # This construction is a work-around for simulating the function of a while loop. #TODO([email protected]): Replace this construct by a while loop once it is available in ovl. for iMerge in ops.arange(nMerge): doMerge = ops.logical_and(iFromEdge < iFromEdgeEnd, iToEdge < iToEdgeEnd) doMerge = ops.logical_and(doMerge, iiFromVertex < iToVertex) with ops.if_(doMerge): with ops.if_(iiFromVertex < iiToVertex): iFromEdge <<= iFromEdge+1 iiFromVertex <<= toVertex[iFromEdge] with ops.elif_(iiFromVertex > iiToVertex): iToEdge <<= iToEdge+1 iiToVertex <<= toVertex[iToEdge] with ops.else_(): nTriangle <<= nTriangle+1 iFromEdge <<= iFromEdge+1 iToEdge <<= iToEdge+1 iiFromVertex <<= toVertex[iFromEdge] iiToVertex <<= toVertex[iToEdge] #TODO([email protected]): Use a reduction function that computes a partial or complete sum. count[iEdge] = nTriangle # Save the triangles for each edge. return count
def expm1(x): """ Define the expm1 operator by defining the its operator function to be .. math:: out_{i} = exp(x_{i}) - 1.0 :param x: The input tensor :return: Element-wise exp(x) - 1 :Examples: .. doctest:: >>> import numpy as np >>> from opveclib import evaluate >>> from opveclib.examples import expm1 >>> a = np.array([1e-10, -1e-10]) >>> evaluate(expm1(a)) array([ 1.00000000e-10, -1.00000000e-10]) >>> np.expm1(a) array([ 1.00000000e-10, -1.00000000e-10]) >>> ones = np.ones_like(a) >>> np.exp(a) - ones array([ 1.00000008e-10, -1.00000008e-10]) """ output = ovl.output_like(x) pos = ovl.position_in(x.shape) e = ovl.exp(x[pos]) # note, this is an example of the use of the OVL conditional operators with ovl.if_(ovl.logical_and(ovl.isinf(x[pos]), x[pos] > 0.0)): output[pos] = x[pos] with ovl.elif_(e == 1.0): output[pos] = x[pos] with ovl.elif_((e - 1.0) == -1.0): output[pos] = -1.0 with ovl.else_(): output[pos] = (e - 1.0) * x[pos] / ovl.log(e) return output
def expm1(x): """ Define the expm1 operator by defining the its operator function to be .. math:: out_{i} = exp(x_{i}) - 1.0 :param x: The input tensor :return: Element-wise exp(x) - 1 :Examples: .. doctest:: >>> import numpy as np >>> from opveclib import evaluate >>> from opveclib.examples import expm1 >>> a = np.array([1e-10, -1e-10]) >>> evaluate(expm1(a)) array([ 1.00000000e-10, -1.00000000e-10]) >>> np.expm1(a) array([ 1.00000000e-10, -1.00000000e-10]) >>> ones = np.ones_like(a) >>> np.exp(a) - ones array([ 1.00000008e-10, -1.00000008e-10]) """ output = ovl.output_like(x) pos = ovl.position_in(x.shape) e = ovl.exp(x[pos]) # note, this is an example of the use of the OVL conditional operators with ovl.if_(ovl.logical_and(ovl.isinf(x[pos]), x[pos] > 0.0)): output[pos] = x[pos] with ovl.elif_(e == 1.0): output[pos] = x[pos] with ovl.elif_ ((e - 1.0) == -1.0): output[pos] = -1.0 with ovl.else_(): output[pos] = (e - 1.0) * x[pos] / ovl.log(e) return output
def log1p(x): """ Define the log1p operator by defining the its operator function to be .. math:: out_{i} = log(1.0 + x_{i}) :param x: The input tensor :return: Element-wise log(1 + x) :Examples: .. doctest:: >>> import numpy as np >>> from opveclib import evaluate >>> from opveclib.examples import log1p >>> a = np.array([1e-99, -1e-99]) >>> evaluate(log1p(a)) array([ 1.00000000e-99, -1.00000000e-99]) >>> np.log1p(a) array([ 1.00000000e-99, -1.00000000e-99]) >>> ones = np.ones_like(a) >>> np.log(ones + a) array([ 0., 0.]) """ output = ovl.output_like(x) pos = ovl.position_in(x.shape) u = 1.0 + x[pos] d = u - 1.0 # note, this is an example of the use of the OVL conditional operators with ovl.if_(ovl.logical_and(ovl.isinf(x[pos]), x[pos] > 0.0)): output[pos] = x[pos] with ovl.elif_(d == 0): output[pos] = x[pos] with ovl.else_(): output[pos] = ovl.log(u) * x[pos] / d return output
def conv_1d(x, v, kernel_orientation='as-is', stride=1, mode='same', data_format='NCE'): """ Define the operator function. :param x: An input tensor of shape [num_batches, num_channels, num_elements]. :param v: A filter/kernel of shape [num_filters, num_channels, kernel_size]. :param kernel_orientation: The orientation of the kernel to use: 'as-is' or 'flipped'. This language is used rather than 'convolution' or 'cross-correlation' since the terms have become overloaded and ambiguous across some fields. As defined in https://en.wikipedia.org/wiki/Cross-correlation#Properties, 'as-is' yields the cross-correlation and 'flipped' yields the convolution. :param stride: kernel stride to use. :param mode: border mode: 'same', 'valid', or 'full' :param data_format: order of the dimensions in the input: 'NCE', 'NEC' etc. :return: an output tensor of shape [num_batches, num_filters, num_elements] """ if kernel_orientation != 'as-is' and kernel_orientation != 'flipped': raise ValueError("kernel_orientation must be 'as-is' or 'flipped'") # resolve data layout based on data_format input assert x.rank == 3 assert len(data_format) == 3 assert data_format.count('N') == 1 assert data_format.count('C') == 1 assert data_format.count('E') == 1 n_axis = data_format.find('N') c_axis = data_format.find('C') e_axis = data_format.find('E') num_elements = x.shape[e_axis] num_channels = x.shape[c_axis] num_batches = x.shape[n_axis] assert v.rank == 3 if num_channels != v.shape[c_axis]: raise ValueError( 'Channel axis size of input must match that of the filter.') num_filters = v.shape[n_axis] filter_size = v.shape[e_axis] left_apron = filter_size // 2 right_apron = filter_size - left_apron - 1 if not isinstance(stride, int) or stride < 1 or stride > num_elements: raise ValueError('Stride must be a positive integer') if mode == 'same': if filter_size > num_elements: raise ValueError('filter size, ' + str(filter_size) + ', cannot be larger than number of elements, ' + str(num_elements)) starting_element = -left_apron ending_element = num_elements - left_apron elif mode == 'valid': if filter_size > num_elements: raise ValueError('filter size, ' + str(filter_size) + ', cannot be larger than number of elements, ' + str(num_elements)) starting_element = 0 ending_element = num_elements - (left_apron + right_apron) elif mode == 'full': starting_element = -(filter_size - 1) ending_element = num_elements else: raise ValueError("mode must be 'same', 'valid', or 'full'.") output_elements = (ending_element - starting_element) output_shape = [0, 0, 0] output_shape[n_axis] = num_batches output_shape[c_axis] = num_filters output_shape[e_axis] = output_elements output = ovl.output(output_shape, x.dtype) filters_per_worker = 1 filter_workers, filter_remainder = divmod(num_filters, filters_per_worker) if filter_remainder > 0: filter_workers += 1 batches_per_worker = 1 batch_workers, batch_remainder = divmod(num_batches, batches_per_worker) if batch_remainder > 0: batch_workers += 1 elements_per_worker = 10 element_workers, element_remainder = divmod(output_elements, elements_per_worker) if element_remainder > 0: element_workers += 1 workgroup_shape = [batch_workers, filter_workers, element_workers] ovl.logger.debug(u' workgroup_shape: ' + str(workgroup_shape)) pos = ovl.position_in(workgroup_shape) cur_batch_block = pos[0] cur_filter_block = pos[1] cur_element_block = pos[2] num_block_batches = ovl.variable(batches_per_worker, ovl.uint32) if batch_remainder > 0: with ovl.if_(cur_batch_block == batch_workers - 1): num_block_batches <<= batch_remainder num_block_filters = ovl.variable(filters_per_worker, ovl.uint32) if filter_remainder > 0: with ovl.if_(cur_filter_block == filter_workers - 1): num_block_filters <<= filter_remainder num_block_elements = ovl.variable(elements_per_worker, ovl.uint32) if element_remainder > 0: with ovl.if_(cur_element_block == element_workers - 1): num_block_elements <<= element_remainder accum = ovl.zeros( (batches_per_worker, filters_per_worker, elements_per_worker), ovl.float64) #4*4 filter_block = ovl.zeros((filters_per_worker, filter_size), v.dtype) #4*10 input_block = ovl.zeros((batches_per_worker, filter_size), x.dtype) #4*10 for cur_channel in ovl.arange(num_channels): # load all filters for this channel for intra_block_filter in ovl.arange(filters_per_worker): for f_pos in ovl.arange(filter_size): filter_index = [None, None, None] filter_index[c_axis] = cur_channel filter_index[n_axis] = ovl.cast( intra_block_filter, ovl.uint32) + cur_filter_block * filters_per_worker if kernel_orientation == 'as-is': filter_index[e_axis] = f_pos elif kernel_orientation == 'flipped': filter_index[e_axis] = filter_size - f_pos - 1 else: raise ValueError( "kernel_orientation must be 'as-is' or 'flipped'") filter_block[intra_block_filter, f_pos] = v[filter_index] # load initial inputs for this channel buffer_head = ovl.variable(0, ovl.uint32) for intra_block_batch in ovl.arange(num_block_batches): cur_batch = intra_block_batch + cur_batch_block * batches_per_worker for f_pos in ovl.arange(filter_size): x_index = [None, None, None] x_index[c_axis] = cur_channel x_index[n_axis] = cur_batch x_elem_index = starting_element + ovl.cast( cur_element_block * elements_per_worker, ovl.uint64) + ovl.cast(f_pos, ovl.uint64) x_index[e_axis] = x_elem_index index_in_bounds = ovl.logical_and(x_elem_index >= 0, x_elem_index < num_elements) with ovl.if_(index_in_bounds): input_block[intra_block_batch, f_pos] = x[x_index] with ovl.else_(): input_block[intra_block_batch, f_pos] = 0 for intra_block_element in ovl.arange(num_block_elements): cur_elem = intra_block_element + cur_element_block * elements_per_worker for intra_block_batch in ovl.arange(num_block_batches): cur_batch = intra_block_batch + cur_batch_block * batches_per_worker for intra_block_filter in ovl.arange(num_block_filters): for f_pos in ovl.arange(filter_size): x_pos = (buffer_head + ovl.cast(f_pos, ovl.uint32)) % filter_size cur_x = ovl.cast(input_block[intra_block_batch, x_pos], ovl.float64) cur_v = ovl.cast( filter_block[intra_block_filter, f_pos], ovl.float64) accum[intra_block_batch, intra_block_filter, intra_block_element] = \ accum[intra_block_batch, intra_block_filter, intra_block_element] + cur_x * cur_v # load new element x_index = [None, None, None] x_index[c_axis] = cur_channel x_index[n_axis] = cur_batch x_elem_index = starting_element + cur_elem + filter_size x_index[e_axis] = x_elem_index index_in_bounds = ovl.logical_and(x_elem_index >= 0, x_elem_index < num_elements) with ovl.if_(index_in_bounds): input_block[intra_block_batch, buffer_head] = x[x_index] with ovl.else_(): input_block[intra_block_batch, buffer_head] = 0 buffer_head <<= (buffer_head + 1) % filter_size for intra_block_batch in ovl.arange(num_block_batches): cur_batch = intra_block_batch + cur_batch_block * batches_per_worker for intra_block_filter in ovl.arange(num_block_filters): cur_filter = intra_block_filter + cur_filter_block * filters_per_worker for intra_block_element in ovl.arange(num_block_elements): cur_elem = intra_block_element + cur_element_block * elements_per_worker output_index = [None, None, None] output_index[n_axis] = cur_batch output_index[e_axis] = cur_elem output_index[c_axis] = cur_filter output[output_index] = ovl.cast( accum[intra_block_batch, intra_block_filter, intra_block_element], output.dtype) return output
def graph_triangle_count(startEdge, fromVertex, toVertex): """Counts the triangles in an undirected graph. Notice that this method assumes that the graph is given as an adjacency list where all lists with vertex neighbors are sorted. The parallel algorithm uses the following strategy. We map one thread per edge, This is also called the edge-based iterator strategy. The idea behind the algorithm is: 1. Go over all edges (u, v). 2. The neighboring indices for vertex u are N(u) and for vertex v are N(v). 3. Increment the triangle counter by | N(u) /\ N(v) | where /\ is the set intersection operator. We enforce an order on the vertices that avoids counting the same triangle three times, instead each triangle is counted once. Attributes: None. The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the separate lists. :param startEdge: Indices into toVertex where edges start. :type startEdge: list. :param fromVertex: The from-vertex of each edge. :type fromVertex: list. :param toVertex: The to-vertex of each edge. :type toVertex: list. :return: Counts of triangles per edge. """ iEdge = ovl.position_in(toVertex.shape)[0] count = ovl.output(toVertex.shape, ovl.uint64) nTriangle = ovl.variable(0, ovl.uint64) iFromVertex = ovl.variable(fromVertex[iEdge], fromVertex.dtype) iFromEdge = ovl.variable(startEdge[iFromVertex], startEdge.dtype) iFromEdgeEnd = ovl.variable(startEdge[iFromVertex + 1], startEdge.dtype) iiFromVertex = ovl.variable(toVertex[iFromEdge], toVertex.dtype) iToVertex = ovl.variable(toVertex[iEdge], toVertex.dtype) iToEdge = ovl.variable(startEdge[iToVertex], startEdge.dtype) iToEdgeEnd = ovl.variable(startEdge[iToVertex + 1], startEdge.dtype) iiToVertex = ovl.variable(toVertex[iToEdge], toVertex.dtype) nMerge = iToEdgeEnd - iToEdge + iFromEdgeEnd - iFromEdge # Maximum number of merges. # This construction is a work-around for simulating the function of a while loop. #TODO([email protected]): Replace this construct by a while loop once it is available in ovl. for iMerge in ovl.arange(nMerge): doMerge = ovl.logical_and(iFromEdge < iFromEdgeEnd, iToEdge < iToEdgeEnd) doMerge = ovl.logical_and(doMerge, iiFromVertex < iToVertex) with ovl.if_(doMerge): with ovl.if_(iiFromVertex < iiToVertex): iFromEdge <<= iFromEdge + 1 iiFromVertex <<= toVertex[iFromEdge] with ovl.elif_(iiFromVertex > iiToVertex): iToEdge <<= iToEdge + 1 iiToVertex <<= toVertex[iToEdge] with ovl.else_(): nTriangle <<= nTriangle + 1 iFromEdge <<= iFromEdge + 1 iToEdge <<= iToEdge + 1 iiFromVertex <<= toVertex[iFromEdge] iiToVertex <<= toVertex[iToEdge] #TODO([email protected]): Use a reduction function that computes a partial or complete sum. count[iEdge] = nTriangle # Save the triangles for each edge. return count
def conv_1d(x, v, kernel_orientation='as-is', stride=1, mode='same', data_format='NCE'): """ Define the operator function. :param x: An input tensor of shape [num_batches, num_channels, num_elements]. :param v: A filter/kernel of shape [num_filters, num_channels, kernel_size]. :param kernel_orientation: The orientation of the kernel to use: 'as-is' or 'flipped'. This language is used rather than 'convolution' or 'cross-correlation' since the terms have become overloaded and ambiguous across some fields. As defined in https://en.wikipedia.org/wiki/Cross-correlation#Properties, 'as-is' yields the cross-correlation and 'flipped' yields the convolution. :param stride: kernel stride to use. :param mode: border mode: 'same', 'valid', or 'full' :param data_format: order of the dimensions in the input: 'NCE', 'NEC' etc. :return: an output tensor of shape [num_batches, num_filters, num_elements] """ if kernel_orientation != 'as-is' and kernel_orientation != 'flipped': raise ValueError("kernel_orientation must be 'as-is' or 'flipped'") # resolve data layout based on data_format input assert x.rank == 3 assert len(data_format) == 3 assert data_format.count('N') == 1 assert data_format.count('C') == 1 assert data_format.count('E') == 1 n_axis = data_format.find('N') c_axis = data_format.find('C') e_axis = data_format.find('E') num_elements = x.shape[e_axis] num_channels = x.shape[c_axis] num_batches = x.shape[n_axis] assert v.rank == 3 if num_channels != v.shape[c_axis]: raise ValueError('Channel axis size of input must match that of the filter.') num_filters = v.shape[n_axis] filter_size = v.shape[e_axis] left_apron = filter_size // 2 right_apron = filter_size - left_apron - 1 if not isinstance(stride, int) or stride < 1 or stride > num_elements: raise ValueError('Stride must be a positive integer') if mode == 'same': if filter_size > num_elements: raise ValueError('filter size, ' + str(filter_size) + ', cannot be larger than number of elements, ' + str(num_elements)) starting_element = -left_apron ending_element = num_elements - left_apron elif mode == 'valid': if filter_size > num_elements: raise ValueError('filter size, ' + str(filter_size) + ', cannot be larger than number of elements, ' + str(num_elements)) starting_element = 0 ending_element = num_elements - (left_apron + right_apron) elif mode == 'full': starting_element = -(filter_size - 1) ending_element = num_elements else: raise ValueError("mode must be 'same', 'valid', or 'full'.") output_elements = (ending_element - starting_element) output_shape = [0, 0, 0] output_shape[n_axis] = num_batches output_shape[c_axis] = num_filters output_shape[e_axis] = output_elements output = ovl.output(output_shape, x.dtype) filters_per_worker = 1 filter_workers, filter_remainder = divmod(num_filters, filters_per_worker) if filter_remainder > 0: filter_workers += 1 batches_per_worker = 1 batch_workers, batch_remainder = divmod(num_batches, batches_per_worker) if batch_remainder > 0: batch_workers += 1 elements_per_worker = 10 element_workers, element_remainder = divmod(output_elements, elements_per_worker) if element_remainder > 0: element_workers += 1 workgroup_shape = [batch_workers, filter_workers, element_workers] ovl.logger.debug(u' workgroup_shape: ' + str(workgroup_shape)) pos = ovl.position_in(workgroup_shape) cur_batch_block = pos[0] cur_filter_block = pos[1] cur_element_block = pos[2] num_block_batches = ovl.variable(batches_per_worker, ovl.uint32) if batch_remainder > 0: with ovl.if_(cur_batch_block == batch_workers-1): num_block_batches <<= batch_remainder num_block_filters = ovl.variable(filters_per_worker, ovl.uint32) if filter_remainder > 0: with ovl.if_(cur_filter_block == filter_workers-1): num_block_filters <<= filter_remainder num_block_elements = ovl.variable(elements_per_worker, ovl.uint32) if element_remainder > 0: with ovl.if_(cur_element_block == element_workers-1): num_block_elements <<= element_remainder accum = ovl.zeros((batches_per_worker, filters_per_worker, elements_per_worker), ovl.float64) #4*4 filter_block = ovl.zeros((filters_per_worker, filter_size), v.dtype) #4*10 input_block = ovl.zeros((batches_per_worker, filter_size), x.dtype) #4*10 for cur_channel in ovl.arange(num_channels): # load all filters for this channel for intra_block_filter in ovl.arange(filters_per_worker): for f_pos in ovl.arange(filter_size): filter_index = [None, None, None] filter_index[c_axis] = cur_channel filter_index[n_axis] = ovl.cast(intra_block_filter, ovl.uint32) + cur_filter_block * filters_per_worker if kernel_orientation == 'as-is': filter_index[e_axis] = f_pos elif kernel_orientation == 'flipped': filter_index[e_axis] = filter_size - f_pos - 1 else: raise ValueError("kernel_orientation must be 'as-is' or 'flipped'") filter_block[intra_block_filter, f_pos] = v[filter_index] # load initial inputs for this channel buffer_head = ovl.variable(0, ovl.uint32) for intra_block_batch in ovl.arange(num_block_batches): cur_batch = intra_block_batch + cur_batch_block*batches_per_worker for f_pos in ovl.arange(filter_size): x_index = [None, None, None] x_index[c_axis] = cur_channel x_index[n_axis] = cur_batch x_elem_index = starting_element + ovl.cast(cur_element_block * elements_per_worker, ovl.uint64) + ovl.cast(f_pos, ovl.uint64) x_index[e_axis] = x_elem_index index_in_bounds = ovl.logical_and(x_elem_index >= 0, x_elem_index < num_elements) with ovl.if_(index_in_bounds): input_block[intra_block_batch, f_pos] = x[x_index] with ovl.else_(): input_block[intra_block_batch, f_pos] = 0 for intra_block_element in ovl.arange(num_block_elements): cur_elem = intra_block_element + cur_element_block*elements_per_worker for intra_block_batch in ovl.arange(num_block_batches): cur_batch = intra_block_batch + cur_batch_block*batches_per_worker for intra_block_filter in ovl.arange(num_block_filters): for f_pos in ovl.arange(filter_size): x_pos = (buffer_head + ovl.cast(f_pos, ovl.uint32)) % filter_size cur_x = ovl.cast(input_block[intra_block_batch, x_pos], ovl.float64) cur_v = ovl.cast(filter_block[intra_block_filter, f_pos], ovl.float64) accum[intra_block_batch, intra_block_filter, intra_block_element] = \ accum[intra_block_batch, intra_block_filter, intra_block_element] + cur_x * cur_v # load new element x_index = [None, None, None] x_index[c_axis] = cur_channel x_index[n_axis] = cur_batch x_elem_index = starting_element + cur_elem + filter_size x_index[e_axis] = x_elem_index index_in_bounds = ovl.logical_and(x_elem_index >= 0, x_elem_index < num_elements) with ovl.if_(index_in_bounds): input_block[intra_block_batch, buffer_head] = x[x_index] with ovl.else_(): input_block[intra_block_batch, buffer_head] = 0 buffer_head <<= (buffer_head + 1) % filter_size for intra_block_batch in ovl.arange(num_block_batches): cur_batch = intra_block_batch + cur_batch_block*batches_per_worker for intra_block_filter in ovl.arange(num_block_filters): cur_filter = intra_block_filter + cur_filter_block*filters_per_worker for intra_block_element in ovl.arange(num_block_elements): cur_elem = intra_block_element + cur_element_block*elements_per_worker output_index = [None, None, None] output_index[n_axis] = cur_batch output_index[e_axis] = cur_elem output_index[c_axis] = cur_filter output[output_index] = ovl.cast(accum[intra_block_batch, intra_block_filter, intra_block_element], output.dtype) return output
def triangles_op(startEdge, fromVertex, toVertex): """Counts the triangles in an undirected graph. Notice that this method assumes that the graph is given as an adjacency list where all lists with vertex neighbors are sorted. The parallel algorithm uses the following strategy. We map one thread per edge, This is also called the edge-based iterator strategy. The idea behind the algorithm is: 1. Go over all edges (u, v). 2. The neighboring indices for vertex u are N(u) and for vertex v are N(v). 3. Increment the triangle counter by | N(u) /\ N(v) | where /\ is the set intersection operator. We enforce an order on the vertices that avoids counting the same triangle three times, instead each triangle is counted once. Attributes: None. The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the separate lists. :param startEdge: Indices into toVertex where edges start. :type startEdge: list. :param fromVertex: The from-vertex of each edge. :type fromVertex: list. :param toVertex: The to-vertex of each edge. :type toVertex: list. :return: Counts of triangles per edge. """ iEdge = ovl.position_in(toVertex.shape)[0] count = ovl.output(toVertex.shape, ovl.uint64) nTriangle = ovl.variable(0, ovl.uint64) iFromVertex = ovl.variable(fromVertex[iEdge], fromVertex.dtype) iFromEdge = ovl.variable(startEdge[iFromVertex], startEdge.dtype) iFromEdgeEnd = ovl.variable(startEdge[iFromVertex + 1], startEdge.dtype) iiFromVertex = ovl.variable(toVertex[iFromEdge], toVertex.dtype) iToVertex = ovl.variable(toVertex[iEdge], toVertex.dtype) iToEdge = ovl.variable(startEdge[iToVertex], startEdge.dtype) iToEdgeEnd = ovl.variable(startEdge[iToVertex + 1], startEdge.dtype) iiToVertex = ovl.variable(toVertex[iToEdge], toVertex.dtype) nMerge = iToEdgeEnd-iToEdge + iFromEdgeEnd-iFromEdge # Maximum number of merges. # This construction is a work-around for simulating the function of a while loop. #TODO([email protected]): Replace this construct by a while loop once it is available in ovl. for iMerge in ovl.arange(nMerge): doMerge = ovl.logical_and(iFromEdge < iFromEdgeEnd, iToEdge < iToEdgeEnd) doMerge = ovl.logical_and(doMerge, iiFromVertex < iToVertex) with ovl.if_(doMerge): with ovl.if_(iiFromVertex < iiToVertex): iFromEdge <<= iFromEdge+1 iiFromVertex <<= toVertex[iFromEdge] with ovl.elif_(iiFromVertex > iiToVertex): iToEdge <<= iToEdge+1 iiToVertex <<= toVertex[iToEdge] with ovl.else_(): nTriangle <<= nTriangle+1 iFromEdge <<= iFromEdge+1 iToEdge <<= iToEdge+1 iiFromVertex <<= toVertex[iFromEdge] iiToVertex <<= toVertex[iToEdge] #TODO([email protected]): Use a reduction function that computes a partial or complete sum. count[iEdge] = nTriangle # Save the triangles for each edge. return count