Пример #1
0
def run_cast_storage_synthetic():
    def dense_to_sparse(m, n, density, ctx, repeat, stype):
        set_default_context(ctx)
        data_shape = (m, n)
        dns_data = rand_ndarray(data_shape, stype, density).tostype('default')
        dns_data.wait_to_read()

        # do one warm up run, verify correctness
        assert same(
            mx.nd.cast_storage(dns_data, stype).asnumpy(), dns_data.asnumpy())

        # start benchmarking
        cost = measure_cost(repeat, mx.nd.cast_storage, dns_data, stype)
        results = '{:10.1f} {:>10} {:8d} {:8d} {:10.2f}'.format(
            density * 100, str(ctx), m, n, cost * 1000)
        print(results)

    check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(args.num_omp_threads)))

    # params
    # m           number of rows
    # n           number of columns
    # density     density of the matrix
    # num_repeat  number of benchmark runs to average over
    # contexts    mx.cpu(), mx.gpu()
    #             note: benchmark different contexts separately; to benchmark cpu, compile without CUDA
    # benchmarks  dns_to_csr, dns_to_rsp
    m = [512, 512]
    n = [50000, 100000]
    density = [1.00, 0.80, 0.60, 0.40, 0.20, 0.10, 0.05, 0.02, 0.01]
    num_repeat = 10
    contexts = [mx.gpu()]
    benchmarks = ["dns_to_csr", "dns_to_rsp"]

    # run benchmark
    for b in benchmarks:
        stype = ''
        print("==================================================")
        if b is "dns_to_csr":
            stype = 'csr'
            print(" cast_storage benchmark: dense to csr, size m x n ")
        elif b is "dns_to_rsp":
            stype = 'row_sparse'
            print(" cast_storage benchmark: dense to rsp, size m x n ")
        else:
            print("invalid benchmark: %s" % b)
            continue
        print("==================================================")
        headline = '{:>10} {:>10} {:>8} {:>8} {:>10}'.format(
            'density(%)', 'context', 'm', 'n', 'time(ms)')
        print(headline)
        for i in range(len(n)):
            for ctx in contexts:
                for den in density:
                    dense_to_sparse(m[i], n[i], den, ctx, num_repeat, stype)
            print("")
        print("")
Пример #2
0
def main():
    args = parse_args()
    lhs_row_dim = int(args.lhs_row_dim)
    lhs_col_dim = int(args.lhs_col_dim)
    rhs_col_dim = int(args.rhs_col_dim)
    density = float(args.density)
    lhs_stype = args.lhs_stype
    rhs_stype = args.rhs_stype
    if args.rhs_density:
        rhs_density = float(args.rhs_density)
    else:
        rhs_density = density
    dot_func = mx.nd.sparse.dot if lhs_stype == "csr" else mx.nd.dot
    check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(args.num_omp_threads)))
    bench_dot(lhs_row_dim, lhs_col_dim, rhs_col_dim, density, rhs_density,
              dot_func, False, lhs_stype, rhs_stype, args.only_storage)
Пример #3
0
def test_dot_synthetic(data_dict):
    """benchmark sparse mxnet dot and scipy dot operator with matrices of given density.
    `t_sparse` is the runtime of the invoked sparse dot operator in ms, while `t_dense` is the
    runtime of dot(dns, dns), with the same matrices except that they are in default storage type.
    """

    # Benchmark MXNet and Scipys dot operator
    def bench_dot(lhs_shape,
                  rhs_shape,
                  lhs_stype,
                  rhs_stype,
                  lhs_den,
                  rhs_den,
                  trans_lhs,
                  ctx,
                  num_repeat=10,
                  fw="mxnet",
                  distribution="uniform"):
        set_default_context(ctx)
        assert fw == "mxnet" or fw == "scipy"
        # Set funcs
        dot_func_sparse = mx.nd.sparse.dot if fw == "mxnet" else sp.spmatrix.dot
        dot_func_dense = mx.nd.dot if fw == "mxnet" else np.dot
        # Create matrix instances
        lhs_nd = rand_ndarray(lhs_shape,
                              lhs_stype,
                              density=lhs_den,
                              distribution=distribution)
        # only uniform distribution supported for rhs
        if rhs_stype == 'csr':
            rhs_nd = rand_ndarray(rhs_shape,
                                  rhs_stype,
                                  density=rhs_den,
                                  distribution=distribution)
        else:
            rhs_nd = rand_ndarray(rhs_shape,
                                  rhs_stype,
                                  density=rhs_den,
                                  distribution="uniform")
        lhs_dns = None
        rhs_dns = None
        dense_cost = None
        sparse_cost = None

        if fw == "mxnet":
            lhs_dns = lhs_nd if lhs_stype == 'default' else lhs_nd.tostype(
                'default')
            rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.tostype(
                'default')
            # One warm up run, verify correctness
            out = dot_func_sparse(lhs_nd, rhs_dns, trans_lhs)
            out_expected = dot_func_dense(lhs_dns, rhs_dns, trans_lhs)
            assert_almost_equal(out.asnumpy(),
                                out_expected.asnumpy(),
                                rtol=1e-1,
                                atol=1e-1)
            sparse_cost = measure_cost(num_repeat, False, False,
                                       dot_func_sparse, lhs_nd, rhs_nd,
                                       trans_lhs)
            dense_cost = measure_cost(num_repeat, False, False, dot_func_dense,
                                      lhs_dns, rhs_dns, trans_lhs)
        else:
            lhs_dns = lhs_nd.asnumpy()
            rhs_dns = rhs_nd.asnumpy()
            lhs_nd = sp.csr_matrix(lhs_nd.asnumpy())
            rhs_nd = rhs_nd.asnumpy()
            # One warm up run, verify correctness
            lhs_nd_copy = sp.spmatrix.transpose(
                lhs_nd) if trans_lhs else lhs_nd
            out = dot_func_sparse(lhs_nd_copy, rhs_dns)
            sparse_cost = measure_cost(num_repeat, trans_lhs, False,
                                       dot_func_sparse, lhs_nd, rhs_nd)
            dense_cost = measure_cost(num_repeat, trans_lhs, True,
                                      dot_func_dense, lhs_dns, rhs_dns)

        speedup = dense_cost / sparse_cost
        # Print results
        m = lhs_shape[0]
        k = lhs_shape[1]
        n = rhs_shape[1]
        result_pattern = '{:15.1f} {:15.1f} {:>10} {:8d} {:8d} {:8d} {:13.2f} {:13.2f} {:8.2f}'
        results = result_pattern.format(lhs_den * 100, rhs_den * 100, str(ctx),
                                        m, k, n, sparse_cost * 1000,
                                        dense_cost * 1000, speedup)
        print(results)

    def print_benchmark_info(lhs, rhs, lhs_trans, fw):
        trans_str = "^T" if lhs_trans else ""
        print("========================================================")
        print("  %s sparse dot benchmark: dot(%s, %s) = %s  ") % (fw, lhs, rhs,
                                                                  rhs)
        print("  (matrix multiplication: (m x k)%s * (k x n) = m x n)  ") % (
            trans_str)
        print("========================================================")
        headline_pattern = '{:>15} {:>15} {:>10} {:>8} {:>8} {:>8} {:>13} {:>13} {:>8}'
        headline = headline_pattern.format('lhs_density(%)', 'rhs_density(%)',
                                           'context', 'm', 'k', 'n',
                                           't_sparse(ms)', 't_dense(ms)',
                                           'speedup')
        print(headline)

    def run_benchmark(ctx=None,
                      lhs="csr",
                      lhs_trans=False,
                      rhs="dns",
                      fw="mxnet",
                      rhs_density=1,
                      distribution="uniform"):

        if rhs_density > 1 or rhs_density < 0:
            raise ValueError("rhs_density has to be between 0 and 1")

        print_benchmark_info(lhs, rhs, lhs_trans, fw)

        if rhs == "csr":
            lhs_stype = "default"
            rhs_stype = "csr"
            assert (lhs_stype == 'default'), "Only dot(default, csr) supported"
            # Arrange dimensions according to use case. For below csr will have num_rows << num_cols
            feature_dim_list = data_dict['batch_size']
            batch_size_list = data_dict['m']
            output_dim_list = data_dict['feature_dim']
            density_list = data_dict['density']
            default_output_index = data_dict['default_index']['feature_dim']
            default_density_index = data_dict['default_index']['density']
            default_feature_index = data_dict['default_index']['batch_size']
            default_batch_size_index = data_dict['default_index']['output_dim']
            num_repeat = data_dict['num_repeat']

        else:
            lhs_stype = "csr"
            rhs_stype = "row_sparse" if rhs == "rsp" else "default"

            feature_dim_list = data_dict['feature_dim']
            output_dim_list = data_dict['m']
            batch_size_list = data_dict['batch_size']
            density_list = data_dict['density']

            default_output_index = data_dict['default_index']['output_dim']
            default_batch_size_index = data_dict['default_index']['batch_size']
            default_feature_index = data_dict['default_index']['feature_dim']
            default_density_index = data_dict['default_index']['density']
            num_repeat = data_dict['num_repeat']

        for output_dim in output_dim_list:
            if lhs_trans:
                output_row_dim = batch_size_list[default_batch_size_index]
            else:
                output_row_dim = feature_dim_list[default_feature_index]
            bench_dot((batch_size_list[default_batch_size_index],
                       feature_dim_list[default_feature_index]),
                      (output_row_dim, output_dim),
                      lhs_stype,
                      rhs_stype,
                      density_list[default_density_index],
                      rhs_density,
                      lhs_trans,
                      ctx,
                      num_repeat=num_repeat,
                      fw=fw,
                      distribution=distribution)

        for feature_dim in feature_dim_list:
            if lhs_trans:
                output_row_dim = batch_size_list[default_batch_size_index]
            else:
                output_row_dim = feature_dim
            bench_dot((batch_size_list[default_batch_size_index], feature_dim),
                      (output_row_dim, output_dim_list[default_output_index]),
                      lhs_stype,
                      rhs_stype,
                      density_list[default_density_index],
                      rhs_density,
                      lhs_trans,
                      ctx,
                      num_repeat=num_repeat,
                      fw=fw,
                      distribution=distribution)

        for batch_size in batch_size_list:
            if lhs_trans:
                output_row_dim = batch_size
            else:
                output_row_dim = feature_dim_list[default_feature_index]
            bench_dot((batch_size, feature_dim_list[default_feature_index]),
                      (output_row_dim, output_dim_list[default_output_index]),
                      lhs_stype,
                      rhs_stype,
                      density_list[default_density_index],
                      rhs_density,
                      lhs_trans,
                      ctx,
                      num_repeat=num_repeat,
                      fw=fw,
                      distribution=distribution)

        for density in density_list:
            if lhs_trans:
                output_row_dim = batch_size_list[default_batch_size_index]
            else:
                output_row_dim = feature_dim_list[default_feature_index]
            bench_dot((batch_size_list[default_batch_size_index],
                       feature_dim_list[default_feature_index]),
                      (output_row_dim, output_dim_list[default_output_index]),
                      lhs_stype,
                      rhs_stype,
                      density,
                      density,
                      lhs_trans,
                      ctx,
                      num_repeat=num_repeat,
                      fw=fw,
                      distribution=distribution)

    check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(ARGS.num_omp_threads)))
    context = mx.gpu() if ARGS.gpu else mx.cpu()
    # TODO(anirudh): make the data dicts to config which can be passed at runtime
    distributions = ["uniform", "powerlaw"]
    for distribution in distributions:
        run_benchmark(context,
                      lhs="csr",
                      rhs="default",
                      lhs_trans=False,
                      fw="mxnet",
                      rhs_density=1,
                      distribution=distribution)
        run_benchmark(context,
                      lhs="csr",
                      rhs="default",
                      lhs_trans=True,
                      fw="mxnet",
                      rhs_density=1,
                      distribution=distribution)
        run_benchmark(context,
                      lhs="csr",
                      rhs="rsp",
                      lhs_trans=False,
                      fw="mxnet",
                      rhs_density=0.05,
                      distribution=distribution)
        run_benchmark(context,
                      lhs="default",
                      rhs="csr",
                      lhs_trans=False,
                      fw="mxnet",
                      rhs_density=0.001,
                      distribution=distribution)
        if not ARGS.gpu:
            run_benchmark(context,
                          lhs="csr",
                          rhs="default",
                          lhs_trans=False,
                          fw="scipy",
                          rhs_density=1,
                          distribution=distribution)
            run_benchmark(context,
                          lhs="csr",
                          rhs="default",
                          lhs_trans=True,
                          fw="scipy",
                          rhs_density=1,
                          distribution=distribution)
Пример #4
0
def test_dot_synthetic():
    """benchmark mx.nd.dot(sparse_ndarray, dense_ndarray) with given density.
    `t_sparse` is the time cost of dot(csr, dns), while `t_dense` is the time cost
    of dot(dns, dns), with the same matrix except that it is in default storage type.
    """
    def measure_cost_forward_baseline(repeat, dot, lhs, rhs):
        start = time.time()
        for i in range(repeat):
            dot(lhs, rhs)
        end = time.time()
        diff = end - start
        return diff / repeat

    def measure_cost_backward_baseline(repeat, dot, transpose, lhs, rhs):
        start = time.time()
        for i in range(repeat):
            dot(transpose(lhs), rhs)
        end = time.time()
        diff = end - start
        return diff / repeat

    def bench_dot_forward(m, k, n, density, ctx, repeat):
        set_default_device(ctx)
        dns = mx.nd.random.uniform(shape=(k, n)).copyto(ctx)
        data_shape = (m, k)
        csr_data = rand_ndarray(data_shape, 'csr', density)
        dns_data = csr_data.tostype('default')
        rhs_dns_np = dns.asnumpy()
        lhs_csr_sp = sp.csr_matrix(dns_data.asnumpy())  # csr in scipy
        lhs_dns_np = lhs_csr_sp.tostype('default')

        data = [dns_data, csr_data]
        costs = []
        for d in data:
            dns.wait_to_read()
            d.wait_to_read()
            cost = measure_cost(repeat, mx.nd.dot, d, dns)
            costs.append(cost)
        ratio = costs[0] / costs[1]

        costs_baseline = []
        cost = measure_cost_forward_baseline(repeat, np.dot, lhs_dns_np,
                                             rhs_dns_np)
        costs_baseline.append(cost)
        cost = measure_cost_forward_baseline(repeat, sp.spmatrix.dot,
                                             lhs_csr_sp, rhs_dns_np)
        costs_baseline.append(cost)
        ratio_baseline = costs_baseline[0] / costs_baseline[1]
        fmt = "%0.1f\t\t%s\t%d\t%d\t%d\t%0.2f\t\t\t%0.2f\t%0.5f\t\t%0.2f\t\t\t\t%0.6f\t%0.5f"
        print(fmt %
              (density * 100, str(ctx), n, m, k, ratio, costs[0], costs[1],
               ratio_baseline, costs_baseline[0], costs_baseline[1]))

    def bench_dot_backward(m, k, n, density, ctx, repeat):
        set_default_device(ctx)
        dns = mx.nd.random.uniform(shape=(m, n)).copyto(ctx)
        data_shape = (m, k)
        csr_data = rand_ndarray(data_shape, 'csr', density)
        dns_data = csr_data.tostype('default')
        rhs_dns_np = dns.asnumpy()
        lhs_csr_sp = sp.csr_matrix(dns_data.asnumpy())
        lhs_dns_np = lhs_csr_sp.tostype('default')

        data = [dns_data, csr_data]
        costs = []
        for d in data:
            dns.wait_to_read()
            d.wait_to_read()
            cost = measure_cost(repeat, mx.nd.dot, d, dns, transpose_a=True)
            costs.append(cost)
        ratio = costs[0] / costs[1]

        costs_baseline = []
        cost = measure_cost_backward_baseline(repeat, np.dot, np.transpose,
                                              lhs_dns_np, rhs_dns_np)
        costs_baseline.append(cost)
        cost = measure_cost_backward_baseline(repeat, sp.spmatrix.dot,
                                              sp.spmatrix.transpose,
                                              lhs_csr_sp, rhs_dns_np)
        costs_baseline.append(cost)
        ratio_baseline = costs_baseline[0] / costs_baseline[1]
        fmt = "%0.1f\t\t%s\t%d\t%d\t%d\t%0.2f\t\t\t%0.2f\t%0.5f\t\t%0.2f\t\t\t\t%0.6f\t%0.5f"
        print(fmt %
              (density * 100, str(ctx), n, m, k, ratio, costs[0], costs[1],
               ratio_baseline, costs_baseline[0], costs_baseline[1]))

    print("A = sparse NDArray of shape(m, k)")
    print("B = dense NDArray of shape(k, n)")
    print("dot_forward\tdot(csr, dns)")
    print('density(%)\tcontext\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse'
          '\tt_scipy_dense/t_scipy_sparse\tt_scipy_dense\tt_scipy_sparse')

    check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(args.num_omp_threads)))
    # TODO(haibin) make these runtime options
    m = 512
    k = [50000, 100000]
    n = [64, 128]
    density = [
        1.00, 0.90, 0.70, 0.50, 0.30, 0.20, 0.10, 0.07, 0.05, 0.02, 0.01,
        0.005, 0.001
    ]
    num_repeat = 10
    # contexts = [mx.cpu(), mx.gpu(0)]
    contexts = [mx.cpu()]
    for i in range(2):
        for ctx in contexts:
            for den in density:
                bench_dot_forward(m, k[i], n[i], den, ctx, num_repeat)

    print("dot_backward\tdot(csr.T, dns)")
    print('density(%)\tcontext\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse'
          '\tt_scipy_dense/t_scipy_sparse\tt_scipy_dense\tt_scipy_sparse')
    for i in range(2):
        for ctx in contexts:
            for den in density:
                bench_dot_backward(m, k[i], n[i], den, ctx, num_repeat)
Пример #5
0
    logger.info('Running model %s for inference', symbol_file)

    acc_m = mx.metric.create('acc')
    mod = mx.mod.Module(symbol=sym,
                        context=ctx,
                        data_names=['csr_data', 'dns_data'],
                        label_names=[
                            label_name,
                        ])
    mod.bind(for_training=False,
             data_shapes=data.provide_data,
             label_shapes=data.provide_label)
    mod.set_params(arg_params, aux_params)

    check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(args.num_omp_threads)))
    batch_data = []
    nbatch = 0
    while nbatch < args.num_batches:
        for batch in data:
            batch_data.append(batch)
            nbatch += 1
            if nbatch < args.num_batches:
                continue
            else:
                break
        data.hard_reset()
    #for data warmup
    wi = args.num_warmup
    i = 0
    for batch in batch_data: