Exemplo n.º 1
0
def add_cpu(node: Node, alloc_map: Dict[str, np.ndarray],
            config: Config) -> Callable[[], None]:
    """Add Kernel (CPU version)

    This function creates a kernel which adds two vectors on CPU

    Z = X + Y

    Args:
        node (node): A source node with operator `add`
        alloc_map (dict): The dictionary of names->allocations

    Returns:
        fn: A new kernel Z = X + Y
    """

    if node.get_operator() != "Add":
        raise ValueError("Node operator should be add, not {}".format(
            node.get_operator()))

    x_io = node.inputs["A"]
    y_io = node.inputs["B"]
    z_io = node.outputs["C"]

    x = x_io.get_data(alloc_map)
    y = y_io.get_data(alloc_map)
    z = z_io.get_data(alloc_map)

    def fn():
        np.copyto(z, x + y)

    return fn
Exemplo n.º 2
0
def build_kernel(node: Node, alloc_map: Dict[str, np.ndarray],
                 config: Config) -> Callable[[], None]:
    """
    For each node in graph build a function for execution on the correct device
    """

    oper = node.get_operator()
    if oper == ops.ADD:
        if node.device_type == "cpu":
            return kernels.add_cpu(node, alloc_map, config)
        else:
            return kernels.add_gpu(node, alloc_map, config)
    if oper == ops.O2P_LOAD:
        return kernels.load_cpu(node, alloc_map, config)
    if oper == ops.O2P_STORE:
        return kernels.store_cpu(node, alloc_map, config)
    if oper == ops.O2P_COPY:
        return kernels.copy(node, alloc_map, config)

    if oper == ops.CONV:
        if node.device_type == "cpu":
            return kernels.conv_cpu(node, alloc_map, config)
        else:
            return kernels.conv_gpu(node, alloc_map, config)
    if oper == ops.BATCH_NORM:
        if node.device_type == "cpu":
            return kernels.batchnorm_cpu(node, alloc_map, config)
        else:
            return kernels.batchnorm_gpu(node, alloc_map, config)
    if oper == ops.RELU:
        if node.device_type == "cpu":
            return kernels.relu_cpu(node, alloc_map, config)
        else:
            return kernels.relu_gpu(node, alloc_map, config)
    if oper == ops.MAXPOOL:
        if node.device_type == "cpu":
            return kernels.maxpool_cpu(node, alloc_map, config)
        else:
            return kernels.maxpool_gpu(node, alloc_map, config)
    if oper == ops.GLOBALAVERAGEPOOL:
        if node.device_type == "cpu":
            return kernels.globalAveragePool_cpu(node, alloc_map, config)
        else:
            return kernels.globalAveragePool_gpu(node, alloc_map, config)

    if oper == ops.AVERAGE_POOL:
        if node.device_type == "cpu":
            return kernels.average_pool_cpu(node, alloc_map, config)
        else:
            return kernels.average_pool_gpu(node, alloc_map, config)

    if oper == ops.PAD:
        if node.device_type == "cpu":
            return kernels.pad_cpu(node, alloc_map, config)
        else:
            raise NotImplementedError()

    if oper == ops.FLATTEN:
        if node.device_type == "cpu":
            return kernels.flatten_cpu(node, alloc_map, config)
        else:
            return kernels.flatten_gpu(node, alloc_map, config)

    if oper == ops.RESHAPE:
        if node.device_type == "cpu":
            return kernels.reshape_cpu(node, alloc_map, config)
        else:
            return kernels.reshape_gpu(node, alloc_map, config)

    if oper == ops.GEMM:
        if node.device_type == "cpu":
            return kernels.gemm_cpu(node, alloc_map, config)
        else:
            return kernels.gemm_gpu(node, alloc_map, config)

    if oper == ops.DROPOUT:
        if node.device_type == "cpu":
            return kernels.dropout_cpu(node, alloc_map, config)
        else:
            return kernels.dropout_gpu(node, alloc_map, config)

    if oper == ops.CLIP:
        if node.device_type == "cpu":
            return kernels.clip_v6_cpu(node, alloc_map, config)
        else:
            return kernels.clip_v6_gpu(node, alloc_map, config)

    if oper == ops.REDUCE_MEAN:
        if node.device_type == "cpu":
            return kernels.reduce_mean_cpu(node, alloc_map, config)
        else:
            return kernels.reduce_mean_gpu(node, alloc_map, config)

    if oper == ops.O2P_GRAPH_HEAD:
        return None

    raise ValueError(f"Operator {oper} not supported")