def evaluate_qsg(w0, b0, num_levels, bucket_size): # initial parameters nn_settings['initial_w'] = w0 nn_settings['initial_b'] = b0 # quantizer nn_settings['quantizer'] = 'qsg' nn_settings['bucket_sizes'] = bucket_size nn_settings['num_levels'] = num_levels nn_settings['H'] = None nn = CifarNetModel() nn.create_network(nn_settings) et = np.zeros(num_evals) for n in range(num_evals): x, y = db_sess.run([db_images, db_labels]) start = time.time() for _ in range(iter_per_eval): qw, sw, qb, sb = nn.quantized_gradients(x, y) et[n] = (time.time() - start) return et
def evaluate_qcssg(w0, b0, num_levels, H, err_feedback, feedback_beta): # initial parameters nn_settings['initial_w'] = w0 nn_settings['initial_b'] = b0 # quantizer nn_settings['quantizer'] = 'quantized-cs' nn_settings['num_levels'] = num_levels nn_settings['H'] = H nn_settings['error_feedback'] = err_feedback nn_settings['feedback_weight'] = feedback_beta nn = CifarNetModel() nn.create_network(nn_settings) et = np.zeros(num_evals) for n in range(num_evals): x, y = db_sess.run([db_images, db_labels]) start = time.time() for _ in range(iter_per_eval): qw, sw, qb, sb = nn.quantized_gradients(x, y) et[n] = (time.time() - start) return et
def test(): bucket_sizes = [[320, 64], [320, 64], [384, 384], [384, 192], [192, 10]] layer_shapes = [[5, 5, 3, 64], [5, 5, 64, 64], [2304, 384], [384, 192], [192, 10]] bucket_size = bucket_sizes[layer_index][0] layer_shape = layer_shapes[layer_index] # load hadamard matrices H = load_hadamard_matrix(n=bucket_size) # load/train initial model model_fname = os.path.join(output_folder, 'model.npz') if not os.path.exists(model_fname): w0, b0 = train_base_model() np.savez(model_fname, *w0, *b0) else: data = np.load(model_fname, encoding='latin1') keys = np.sort(list(data.keys())) num_layers = len(keys) // 2 w0 = [data[keys[k]] for k in range(num_layers)] b0 = [data[keys[k]] for k in range(num_layers, 2 * num_layers)] data.close() # create the neural network nn_settings['initial_w'] = w0 nn_settings['initial_b'] = b0 nn = CifarNetModel() nn.create_network(nn_settings) db_sess.run(initializer_op['test']) x, y = db_sess.run([db_images, db_labels]) print('Model accuracy: ', nn.accuracy(x, y)) db_sess.run(initializer_op['train']) # evaluate QSG # fname = os.path.join(output_folder, 'qsg.mat') # evaluate_qsg(nn, bucket_size, fname) # evaluate dithered transformed sg # fname = os.path.join(output_folder, 'dqtsg.mat') # evaluate_dqtsg(nn, H, fname) # evaluate quantized compressive sampling fname = os.path.join(output_folder, 'qcssg.mat') evaluate_qcssg(nn, H, fname) # evaluate top-k sg fname = os.path.join(output_folder, 'topk.mat') evaluate_topksg(nn, fname) # evaluate spectral atomo fname = os.path.join(output_folder, 'sp_atomo.mat') evaluate_atomo(nn, fname)
def evaluate_base_model(): # training is done using batch-size=256 nn_settings['initial_w'] = None nn_settings['initial_b'] = None nn = CifarNetModel() nn.create_network(nn_settings) for _ in range(15): x, y = db_sess.run([db_images, db_labels]) nn.train(x, y) w0, b0 = nn.get_weights() et = np.zeros(num_evals) for n in range(num_evals): x, y = db_sess.run([db_images, db_labels]) start = time.time() for _ in range(iter_per_eval): gw, gb = nn.get_gradients(x, y) et[n] = (time.time() - start) return w0, b0, et
def evaluate_topksg(w0, b0, K): # initial parameters nn_settings['initial_w'] = w0 nn_settings['initial_b'] = b0 # quantizer nn_settings['quantizer'] = 'topk' nn_settings['K'] = K nn = CifarNetModel() nn.create_network(nn_settings) et = np.zeros(num_evals) for n in range(num_evals): x, y = db_sess.run([db_images, db_labels]) start = time.time() for _ in range(iter_per_eval): qw, sw, qb, sb = nn.quantized_gradients(x, y) et[n] = (time.time() - start) return et
def train_base_model(w0=None, b0=None): # training is done using batch-size=256 nn_settings['initial_w'] = w0 nn_settings['initial_b'] = b0 nn = CifarNetModel() nn.create_network(nn_settings) for _ in range(150): x, y = db_sess.run([db_images, db_labels]) nn.train(x, y) w0, b0 = nn.get_weights() return w0, b0
def evaluate_1bit(num_workers, nn_params, seed): # create database cifar10_db = Cifar10Dataset(db_settings=cifar10_settings) cifar10_db.load_from_file() db_graph = tf.Graph() with db_graph.as_default(): tf.random.set_random_seed(seed) db_images, db_labels, db_initializer = cifar10_db.create_dataset( ['train', 'test'], total_batch_size, 16) db_sess = tf.Session(graph=db_graph) db_sess.run(db_initializer['test']) test_images, test_labels = db_sess.run([db_images, db_labels]) db_sess.run(db_initializer['train']) # create neural network model nn_model = CifarNetModel() nn_model.create_network(nn_params) nn_model.initialize() # create workers and server workers = [dt_1bit.WorkerNode(nn_model) for _ in range(num_workers)] server = dt_1bit.AggregationNode() entropy = np.zeros(max_iterations) accuracy = np.zeros(max_iterations) batch_size = total_batch_size // num_workers for n in range(max_iterations): x, y = db_sess.run([db_images, db_labels]) rec_rate = 0 server.reset_node() for k in range(num_workers): # 1- get quantized gradients x_batch = x[k * batch_size:(k + 1) * batch_size] y_batch = y[k * batch_size:(k + 1) * batch_size] q_gW, c_gW, q_gb, c_gb = workers[k].get_quantized_gradients( x_batch, y_batch) # 2- compute entropy rec_rate += (np.sum([v.size for v in c_gW]) + np.sum(v.size for v in c_gb)) r = np.sum([cmp.compute_entropy(v, 2) for v in q_gW]) + np.sum( [cmp.compute_entropy(v, 2) for v in q_gb]) entropy[n] += r # 3- aggregate gradients server.receive_gradient(q_gW, c_gW, q_gb, c_gb) # apply the gradients to the nn model gW, gb = server.get_aggregated_gradients() nn_model.apply_gradients( gW, gb ) # since they all use the same underlying nn model, no need to apply for all accuracy[n] = nn_model.accuracy(test_images, test_labels) if n % 50 == 0: print('{0:03d}: learning rate={1:.4f}, accuracy={2:.2f}'.format( n, nn_model.learning_rate(), accuracy[n] * 100)) wf, bf = nn_model.get_weights() # computing raw rates r = np.sum([v.size for v in wf]) + np.sum( v.size for v in bf) # number of parameters, represented by 1 bit raw_rate = r * num_workers + 32 * rec_rate entropy = entropy + 32 * rec_rate return accuracy, entropy, raw_rate, wf, bf
def evaluate_basemodel(nn_params, seed): cifar10_db = Cifar10Dataset(db_settings=cifar10_settings) cifar10_db.load_from_file() db_graph = tf.Graph() with db_graph.as_default(): tf.random.set_random_seed(seed) db_images, db_labels, db_initializer = cifar10_db.create_dataset( ['train', 'test'], total_batch_size, 16) db_sess = tf.Session(graph=db_graph) db_sess.run(db_initializer['test']) test_images, test_labels = db_sess.run([db_images, db_labels]) db_sess.run(db_initializer['train']) nn = CifarNetModel() nn.create_network(nn_params) nn.initialize() accuracy = np.zeros(max_iterations) for n in range(max_iterations): x, y = db_sess.run([db_images, db_labels]) nn.train(x, y) accuracy[n] = nn.accuracy(test_images, test_labels) if n % 100 == 0: print('{0:03d}: learning rate={1:.4f}, accuracy={2:.2f}'.format( n, nn.learning_rate(), accuracy[n] * 100)) # final weights of the trained nn wf, bf = nn.get_weights() parameter_size = np.sum([v.size for v in wf]) + np.sum([v.size for v in bf]) raw_rate = parameter_size * 32 return accuracy, raw_rate, wf, bf
def evaluate_ndqsg(num_workers, nn_params, ndqsg_params, seed): # create database cifar10_db = Cifar10Dataset(db_settings=cifar10_settings) cifar10_db.load_from_file() db_graph = tf.Graph() with db_graph.as_default(): tf.random.set_random_seed(seed) db_images, db_labels, db_initializer = cifar10_db.create_dataset( ['train', 'test'], total_batch_size, 16) db_sess = tf.Session(graph=db_graph) db_sess.run(db_initializer['test']) test_images, test_labels = db_sess.run([db_images, db_labels]) db_sess.run(db_initializer['train']) # create neural network model nn_model = CifarNetModel() nn_model.create_network(nn_params) nn_model.initialize() # create workers and server ratio = ndqsg_params.get('ratio', 0.5) clip_thr = ndqsg_params.get('gradient-clip', None) num_levels = ndqsg_params.get('num-levels', ((3), (3, 1))) bucket_size = ndqsg_params.get('bucket-size', None) workers = [dt_ndqsg.WorkerNode(nn_model) for _ in range(num_workers)] server = dt_ndqsg.AggregationNode(num_workers) alphabet_size = np.zeros( num_workers) # alphabet size of the quantized gradients for w_id in range(num_workers): dt_seed = np.random.randint(dt_ndqsg.min_seed, dt_ndqsg.max_seed) if w_id < (num_workers * ratio): q_levels = num_levels[0] alphabet_size[w_id] = 2 * q_levels + 1 else: q_levels = num_levels[1] rho = q_levels[0] // q_levels[1] alphabet_size[w_id] = 2 * (rho // 2) + 1 workers[w_id].set_quantizer(dt_seed, clip_thr, bucket_size, q_levels, alpha=1.0) server.set_quantizer(w_id, dt_seed, bucket_size, q_levels, alpha=1.0) avg_bits = np.mean(np.log2(alphabet_size)) entropy = np.zeros(max_iterations) accuracy = np.zeros(max_iterations) batch_size = total_batch_size // num_workers for n in range(max_iterations): x, y = db_sess.run([db_images, db_labels]) rec_rate = 0 server.reset_node() for k in range(num_workers): # 1- get quantized gradients x_batch = x[k * batch_size:(k + 1) * batch_size] y_batch = y[k * batch_size:(k + 1) * batch_size] qw, sw, qb, sb = workers[k].get_quantized_gradients( x_batch, y_batch) # 2- aggregate gradients server.receive_gradient(k, qw, sw, qb, sb) # 3- compute entropy rec_rate += (np.sum([v.size for v in sw]) + np.sum(v.size for v in sb) ) # the reconstruction points r = np.sum([cmp.compute_entropy(v) for v in qw]) + np.sum( [cmp.compute_entropy(v) for v in qb]) entropy[n] += r gW, gb = server.get_aggregated_gradients() nn_model.apply_gradients(gW, gb) accuracy[n] = nn_model.accuracy(test_images, test_labels) if n % 50 == 0: print('{0:03d}: learning rate={1:.4f}, accuracy={2:.2f}'.format( n, nn_model.learning_rate(), accuracy[n] * 100)) wf, bf = nn_model.get_weights() # computing raw rates r = np.sum([v.size for v in wf]) + np.sum(v.size for v in bf) # number of parameters raw_rate = r * avg_bits * num_workers + rec_rate * 32 entropy = entropy + rec_rate * 32 return accuracy, entropy, raw_rate, wf, bf