def test_dot_real(data_dict): """Dot operator testing with real datasets""" data_dir = os.path.join(os.getcwd(), 'data') path = os.path.join(data_dir, data_dict['data_name']) if not os.path.exists(path): get_bz2_data( data_dir, data_dict['data_name'], data_dict['url'], data_dict['data_origin_name'] ) assert os.path.exists(path) k = data_dict['feature_dim'] m = data_dict['m'] batch_size_list = data_dict['batch_size'] default_output_index = data_dict['default_index']['output_dim'] default_batch_size_index = data_dict['default_index']['batch_size'] density = estimate_density(path, data_dict['feature_dim']) num_batches = data_dict['num_batches'] assert default_batch_size_index < len(batch_size_list) assert default_output_index < len(m) if ARGS.verbose: print("Running Benchmarking on %r data") % data_dict['data_mini'] print('{:>15} {:>10} {:>10} {:>10} {:>20} {:>15} {:>15} {:>10} {:>10}'.format('density(%)', 'n', 'm', 'k', 't_dense/t_sparse', 't_dense(ms)', 't_sparse(ms)', 'is_transpose', 'rhs_rsp')) for output_dim in m: _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches, transpose=True) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches, rsp=True) for batch_size in batch_size_list: _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, m[default_output_index], density, batch_size, num_batches) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, m[default_output_index], density, batch_size, num_batches, transpose=True) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches, rsp=True)
def test_dot_real(data_dict): """Dot operator testing with real datasets""" data_dir = os.path.join(os.getcwd(), 'data') path = os.path.join(data_dir, data_dict['data_name']) if not os.path.exists(path): get_bz2_data(data_dir, data_dict['data_name'], data_dict['url'], data_dict['data_origin_name']) assert os.path.exists(path) k = data_dict['feature_dim'] m = data_dict['m'] batch_size_list = data_dict['batch_size'] default_output_index = data_dict['default_index']['output_dim'] default_batch_size_index = data_dict['default_index']['batch_size'] density = estimate_density(path, data_dict['feature_dim']) num_batches = data_dict['num_batches'] assert default_batch_size_index < len(batch_size_list) assert default_output_index < len(m) if ARGS.verbose: print("Running Benchmarking on %r data") % data_dict['data_mini'] print('{:>15} {:>10} {:>10} {:>10} {:>20} {:>15} {:>15} {:>10} {:>10}'. format('density(%)', 'n', 'm', 'k', 't_dense/t_sparse', 't_dense(ms)', 't_sparse(ms)', 'is_transpose', 'rhs_rsp')) for output_dim in m: _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches, transpose=True) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches, rsp=True) for batch_size in batch_size_list: _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, m[default_output_index], density, batch_size, num_batches) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, m[default_output_index], density, batch_size, num_batches, transpose=True) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches, rsp=True)
def test_dot_real(data_dict): def get_iter(path, data_shape, batch_size): data_train = mx.io.LibSVMIter(data_libsvm=path, data_shape=data_shape, batch_size=batch_size) data_iter = iter(data_train) return data_iter data_dir = os.path.join(os.getcwd(), 'data') path = os.path.join(data_dir, data_dict['data_name']) if not os.path.exists(path): get_bz2_data(data_dir, data_dict['data_name'], data_dict['url'], data_dict['data_origin_name']) assert os.path.exists(path) k = data_dict['feature_dim'] m = data_dict['m'] density = estimate_density(path, data_dict['feature_dim']) mini_path = os.path.join(data_dir, data_dict['data_mini']) if not os.path.exists(mini_path): os.system("head -n 2000 %r > %r" % (path, mini_path)) assert os.path.exists(mini_path) print("Running Benchmarking on %r data" % data_dict['data_mini']) for batch_size in data_dict[ 'batch_size']: # iterator through different batch size of choice print("batch_size is %d" % batch_size) # model data_shape = (k, ) train_iter = get_iter(mini_path, data_shape, batch_size) weight = mx.nd.random.uniform(low=0, high=1, shape=(k, m)) csr_data = [] dns_data = [] num_batch = 0 for batch in train_iter: data = train_iter.getdata() csr_data.append(data) dns_data.append(data.tostype('default')) num_batch += 1 bag_of_data = [csr_data, dns_data] num_repeat = 5 costs = [] for d in bag_of_data: weight.wait_to_read() cost = 0. count = 0 for d_batch in d: d_batch.wait_to_read() cost += measure_cost(num_repeat, mx.nd.dot, d_batch, weight) count += 1 costs.append(cost / count) t_sparse = costs[0] t_dense = costs[1] ratio = t_dense / t_sparse print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse') fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f" print(fmt % (density * 100, batch_size, m, k, ratio, t_dense, t_sparse))
def test_dot_real(data_dict): def get_iter(path, data_shape, batch_size): data_train = mx.io.LibSVMIter(data_libsvm=path, data_shape=data_shape, batch_size=batch_size) data_iter = iter(data_train) return data_iter data_dir = os.path.join(os.getcwd(), 'data') path = os.path.join(data_dir, data_dict['data_name']) if not os.path.exists(path): get_bz2_data( data_dir, data_dict['data_name'], data_dict['url'], data_dict['data_origin_name'] ) assert os.path.exists(path) k = data_dict['feature_dim'] m = data_dict['m'] density = estimate_density(path, data_dict['feature_dim']) mini_path = os.path.join(data_dir, data_dict['data_mini']) if not os.path.exists(mini_path): os.system("head -n 2000 %r > %r" % (path, mini_path)) assert os.path.exists(mini_path) print "Running Benchmarking on %r data" % data_dict['data_mini'] for batch_size in data_dict['batch_size']: # iterator through different batch size of choice print "batch_size is %d" % batch_size # model data_shape = (k, ) train_iter = get_iter(mini_path, data_shape, batch_size) weight = mx.nd.random.uniform(low=0, high=1, shape=(k, m)) csr_data = [] dns_data = [] num_batch = 0 for batch in train_iter: data = train_iter.getdata() csr_data.append(data) dns_data.append(data.tostype('default')) num_batch += 1 bag_of_data = [csr_data, dns_data] num_repeat = 5 costs = [] for d in bag_of_data: weight.wait_to_read() cost = 0. count = 0 for d_batch in d: d_batch.wait_to_read() cost += measure_cost(num_repeat, mx.nd.dot, d_batch, weight) count += 1 costs.append(cost/count) t_sparse = costs[0] t_dense = costs[1] ratio = t_dense / t_sparse print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse') fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f" print(fmt % (density * 100, batch_size, m, k, ratio, t_dense, t_sparse))