def __init__(self, x, y, l, window, opt, lr, init_emb, dim_emb, dim_hidden, n_vocab, L2_reg, unit, sim='cos', n_layers=1, activation=tanh): self.tr_inputs = [x, y, l] self.pr_inputs = [x, y, l] self.x = x # 1D: batch_size * l * 2, 2D: window; elem=word_id self.y = y # 1D: batch_size; elem=label self.l = l # scalar: elem=sentence length batch_size = y.shape[0] n_cands = x.shape[0] / batch_size / l self.pad = build_shared_zeros((1, dim_emb)) if init_emb is None: self.emb = theano.shared(sample_weights(n_vocab - 1, dim_emb)) else: self.emb = theano.shared(init_emb) self.E = T.concatenate([self.pad, self.emb], 0) self.W_out = theano.shared(sample_weights(dim_hidden, dim_hidden)) self.params = [self.emb, self.W_out] """ Input Layer """ e = self.E[x] # e: 1D: batch_size * l * 2, 2D: window, 3D: dim_emb x_in = e.reshape((batch_size * n_cands, l, -1)) """ Intermediate Layer """ # h: 1D: n_batch * n_cands, 2D: dim_emb h, params = cnn.layers(x_in, window, dim_emb, dim_hidden, n_layers, activation) self.params.extend(params) """ Output Layer """ h = h.reshape((batch_size, n_cands, -1)) h_1 = h[T.arange(batch_size), 0] h_2 = h[T.arange(batch_size), 1:] if sim == 'cos': y_score = cosign_similarity(h_1, h_2) else: y_score = T.batched_dot(T.dot(h_1, self.W_out), h_2.dimshuffle(0, 2, 1)) y_score_hat = T.max(y_score, 1) """ Objective Function """ self.nll = max_margin_loss(y_score_hat, y_score[T.arange(batch_size), y]) self.L2_sqr = regularization(self.params) self.cost = self.nll + L2_reg * self.L2_sqr / 2. """ Optimization """ if opt == 'adagrad': self.update = ada_grad(cost=self.cost, params=self.params, lr=lr) elif opt == 'ada_delta': self.update = ada_delta(cost=self.cost, params=self.params) elif opt == 'adam': self.update = adam(cost=self.cost, params=self.params, lr=lr) else: self.update = sgd(cost=self.cost, params=self.params, lr=lr) """ Predicts """ y_hat = T.argmax(y_score, 1) """ Check Accuracies """ self.correct = T.eq(y_hat, y)
def __init__(self, n_in, n_hidden, activation_fn=T.tanh): self.W_ih = theano.shared(sample_weights(n_in, n_hidden)) self.W_hh = theano.shared(sample_weights(n_hidden, n_hidden)) self.b_h = theano.shared(np.zeros(n_hidden, dtype=dtype)) self.h0 = theano.shared(np.zeros((1, n_hidden), dtype=dtype)) self.params = [self.W_ih, self.W_hh, self.b_h, self.h0] self.activation_fn = activation_fn
def train_dataloader(self): transf = self.default_transforms() if self.train_transforms is None else self.train_transforms dataset = self.DATASET(self.data_dir, train=True, download=False, transform=transf, **self.extra_args) train_length = len(dataset) dataset_train, _ = random_split( dataset, [train_length - self.val_split, self.val_split], generator=torch.Generator().manual_seed(self.seed) ) s_weights = utils.sample_weights(np.asarray( dataset_train.dataset.targets)[dataset_train.indices]) if self.accelerator == 'ddp' or self.accelerator == 'ddp2': sampler = None # DistributedSampler(dataset_train) shuffle = False else: sampler = WeightedRandomSampler(s_weights, num_samples=len(s_weights), replacement=True) shuffle = False loader = DataLoader( dataset_train, batch_size=self.batch_size, shuffle=shuffle, sampler=sampler, num_workers=self.num_workers, drop_last=True, pin_memory=True ) return loader
def __init__(self, n_in, n_hidden, n_batch=1, init_state_params=True, activation_fn=T.tanh): self.W = theano.shared(sample_weights(n_in, 3*n_hidden)) self.T = theano.shared(sample_weights(n_hidden, 3*n_hidden)) self.b = theano.shared(np.zeros(3*n_hidden, dtype=dtype)) self.params = [self.W, self.T, self.b] self.h0 = theano.shared(np.zeros((n_batch, n_hidden), dtype=dtype)) self.c0 = theano.shared(np.zeros((n_batch, n_hidden), dtype=dtype)) if init_state_params: self.params += [self.h0, self.c0] self.n_in = n_in self.n_hidden = n_hidden self.n_batch = n_batch self.activation_fn = activation_fn
def layers(x, window, dim_emb, dim_hidden, n_layers, activation=tanh): params = [] zero = T.zeros((1, 1, dim_emb * window), dtype=theano.config.floatX) def zero_pad_gate(matrix): return T.neq(T.sum(T.eq(matrix, zero), 2, keepdims=True), dim_emb * window) for i in xrange(n_layers): if i == 0: W = theano.shared(sample_weights(dim_emb * window, dim_hidden)) h = T.max(zero_pad_gate(x) * relu(T.dot(x, W)), 1) # h = T.max(T.dot(x, W), 1) else: W = theano.shared(sample_weights(dim_hidden, dim_hidden)) h = activation(T.dot(h, W)) params.append(W) return h, params
def __init__(self, n_filters, stack_size, n_row, n_col, stride=(1,1), border_mode='valid', activation_fn=T.tanh): W_init = sample_weights(stack_size*n_row*n_col, n_filters).T W_init = W_init.reshape(n_filters, stack_size, n_row, n_col) self.W_ih = theano.shared(W_init.astype(dtype)) W_init = sample_weights(n_filters*n_row*n_col, n_filters).T W_init = W_init.reshape(n_filters, n_filters, n_row, n_col) self.W_hh = theano.shared(W_init.astype(dtype)) self.b_h = theano.shared(np.zeros(n_filters, dtype=dtype)) self.params = [self.W_ih, self.W_hh, self.b_h] self.border_mode = border_mode self.stride = stride self.activation_fn = activation_fn
def __init__(self, n_filters, stack_size, n_row, n_col, stride=(1,1), border_mode='valid'): W_init = sample_weights(stack_size*n_row*n_col, n_filters).T W_init = W_init.reshape(n_filters, stack_size, n_row, n_col) self.W = theano.shared(W_init.astype(dtype)) self.b = theano.shared(np.zeros(n_filters, dtype=dtype)) self.params = [self.W, self.b] self.border_mode = border_mode self.stride = stride
def __init__(self, n_in, n_hidden, n_batch=1, init_state_params=True, activation_fn=T.tanh): self.W = theano.shared(sample_weights(n_in, n_hidden)) self.U = theano.shared(sample_weights(n_hidden, n_hidden)) self.b = theano.shared(np.zeros(n_hidden, dtype=dtype)) self.Wz = theano.shared(sample_weights(n_in, n_hidden)) self.Uz = theano.shared(sample_weights(n_hidden, n_hidden)) self.bz = theano.shared(np.zeros(n_hidden, dtype=dtype)) self.Wr = theano.shared(sample_weights(n_in, n_hidden)) self.Ur = theano.shared(sample_weights(n_hidden, n_hidden)) self.br = theano.shared(np.zeros(n_hidden, dtype=dtype)) self.params = [self.W, self.U, self.b, self.Wz, self.Uz, self.bz, self.Wr, self.Ur, self.br] self.h0 = theano.shared(np.zeros((n_batch, n_hidden), dtype=dtype)) if init_state_params: self.params += [self.h0] self.n_batch = n_batch self.activation_fn = activation_fn
def __init__(self, n_filters, stack_size, n_row, n_col, stride=(1, 1), border_mode='valid', activation_fn=T.tanh): W_init = sample_weights(stack_size * n_row * n_col, n_filters).T W_init = W_init.reshape(n_filters, stack_size, n_row, n_col) self.W_ih = theano.shared(W_init.astype(dtype)) W_init = sample_weights(n_filters * n_row * n_col, n_filters).T W_init = W_init.reshape(n_filters, n_filters, n_row, n_col) self.W_hh = theano.shared(W_init.astype(dtype)) self.b_h = theano.shared(np.zeros(n_filters, dtype=dtype)) self.params = [self.W_ih, self.W_hh, self.b_h] self.border_mode = border_mode self.stride = stride self.activation_fn = activation_fn
def __init__(self, n_in, n_hidden, n_batch=None, activation_fn=T.tanh): self.W = theano.shared(sample_weights(n_in, 4*n_hidden)) self.T = theano.shared(sample_weights(n_hidden, 4*n_hidden)) self.b = theano.shared(np.zeros(4*n_hidden, dtype=dtype)) self.params = [self.W, self.T, self.b] if n_batch is None: self.h0 = theano.shared(np.zeros((1, n_hidden), dtype=dtype)) self.c0 = theano.shared(np.zeros((1, n_hidden), dtype=dtype)) self.params += [self.h0, self.c0] self.h_init = [T.tile(self.h0, (v.shape[1], 1)), T.tile(self.c0, (v.shape[1], 1))] else: self.h0 = theano.shared(np.zeros((n_batch, n_hidden), dtype=dtype)) self.c0 = theano.shared(np.zeros((n_batch, n_hidden), dtype=dtype)) self.h_init = [self.h0, self.c0] self.n_in = n_in self.n_hidden = n_hidden self.activation_fn = activation_fn
def __init__(self, n_filters, stack_size, n_row, n_col, stride=(1, 1), border_mode='valid'): W_init = sample_weights(stack_size * n_row * n_col, n_filters).T W_init = W_init.reshape(n_filters, stack_size, n_row, n_col) self.W = theano.shared(W_init.astype(dtype)) self.b = theano.shared(np.zeros(n_filters, dtype=dtype)) self.params = [self.W, self.b] self.border_mode = border_mode self.stride = stride
def __init__(self, w, d, n_layers, vocab_size, n_in=32, n_h=32, n_words=1000, batch_size=32, activation=tanh): self.w = w self.d = d """model parameters""" self.n_layers = n_layers self.vocab_size = vocab_size self.n_in = n_in self.n_h = n_h self.n_y = vocab_size self.n_words = n_words self.batch_size = batch_size self.activation = activation """embeddings""" self.emb = theano.shared(sample_weights(self.vocab_size, self.n_in)) """initial parameters""" self.x = self.emb[self.w] # x: 1D: n_words * batch_size, 2D: n_in self.c0 = theano.shared(np.zeros((self.batch_size, n_h), dtype=theano.config.floatX)) self.h0 = self.activation(self.c0) """layers and parameters""" self.layers, self.params, self.layer_output = self.layers(n_layers=n_layers) self.y = self.layer_output[-1] # y: 1D: n_words, 2D: batch_size, 3D: vocab_size if n_layers % 2 == 0: self.y = self.y[::-1] self.p_y_given_x = self.y.dimshuffle((1, 0, 2)).reshape((n_words * batch_size, vocab_size)) self.nll = -T.mean(T.log(self.p_y_given_x)[T.arange(d.shape[0]), d]) self.y_pred = T.argmax(self.p_y_given_x, axis=1) self.errors = T.neq(self.y_pred, d)
def __init__(self, n_i=32, n_h=32, activation=tanh): self.activation = activation """input gate parameters""" self.W_xi = theano.shared(sample_weights(n_i, n_h)) self.W_hi = theano.shared(sample_weights(n_h, n_h)) """forget gate parameters""" self.W_xf = theano.shared(sample_weights(n_i, n_h)) self.W_hf = theano.shared(sample_weights(n_h, n_h)) """cell parameters""" self.W_xc = theano.shared(sample_weights(n_i, n_h)) self.W_hc = theano.shared(sample_weights(n_h, n_h)) """output gate parameters""" self.W_xo = theano.shared(sample_weights(n_i, n_h)) self.W_ho = theano.shared(sample_weights(n_h, n_h)) self.params = [ self.W_xi, self.W_hi, self.W_xf, self.W_hf, self.W_xc, self.W_hc, self.W_xo, self.W_ho ]
def __init__(self, n_i=32, n_h=32, activation=tanh): self.activation = activation """input gate parameters""" self.W_xi = theano.shared(sample_weights(n_i, n_h)) self.W_hi = theano.shared(sample_weights(n_h, n_h)) """forget gate parameters""" self.W_xf = theano.shared(sample_weights(n_i, n_h)) self.W_hf = theano.shared(sample_weights(n_h, n_h)) """cell parameters""" self.W_xc = theano.shared(sample_weights(n_i, n_h)) self.W_hc = theano.shared(sample_weights(n_h, n_h)) """output gate parameters""" self.W_xo = theano.shared(sample_weights(n_i, n_h)) self.W_ho = theano.shared(sample_weights(n_h, n_h)) self.params = [self.W_xi, self.W_hi, self.W_xf, self.W_hf, self.W_xc, self.W_hc, self.W_xo, self.W_ho]
def __init__(self, n_in, n_hidden, activation_fn=T.tanh): n_i = n_c = n_o = n_f = n_hidden self.W_xi = theano.shared(sample_weights(n_in, n_i)) self.W_hi = theano.shared(sample_weights(n_hidden, n_i)) self.W_ci = theano.shared(sample_weights(n_c, n_i)) self.b_i = theano.shared(np.zeros(n_i, dtype=dtype)) self.W_xf = theano.shared(sample_weights(n_in, n_f)) self.W_hf = theano.shared(sample_weights(n_hidden, n_f)) self.W_cf = theano.shared(sample_weights(n_c, n_f)) self.b_f = theano.shared(np.zeros(n_f, dtype=dtype)) self.W_xc = theano.shared(sample_weights(n_in, n_c)) self.W_hc = theano.shared(sample_weights(n_hidden, n_c)) self.b_c = theano.shared(np.zeros(n_c, dtype=dtype)) self.W_xo = theano.shared(sample_weights(n_in, n_o)) self.W_ho = theano.shared(sample_weights(n_hidden, n_o)) self.W_co = theano.shared(sample_weights(n_c, n_o)) self.b_o = theano.shared(np.zeros(n_o, dtype=dtype)) self.c0 = theano.shared(np.zeros((1, n_hidden), dtype=dtype)) self.h0 = theano.shared(np.zeros((1, n_hidden), dtype=dtype)) self.params = [self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf, self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_xo, self.W_ho, self.W_co, self.b_o, self.c0, self.h0] self.activation_fn = activation_fn
def __init__(self, model_settings): # self.dim_model = model_settings['dim_model'] # 100 self.dim_world = model_settings['dim_world'] # 78 # it is the dim of raw world input # raw world input is NOT one-hot vector self.dim_lang = model_settings['dim_lang'] # 524 self.dim_action = model_settings['dim_action'] # 4 # theano.tensor.var.TensorVariable ,.tensor.var.TensorVariable # drop_out related stuff self.drop_out_rate = model_settings['drop_out_rate'] # 0.9 assert ( self.drop_out_rate <= numpy.float32(1.0) ) self.rnd_gen = RandomStreams(seed=12345) self.drop_out_layer = self.rnd_gen.uniform( (self.dim_model,)) < self.drop_out_rate # List of boolean variable of shape [100] # print "drop_out_layer=",self.drop_out_layer.eval() # [100] """ drop_out_layer= [ True True False True True True True True True True True True True True True False True True True False True True True True True True True True True True True False True True True True True True True True True True True False True False True False True True True True True True True True True False True True True True True True True True True True True True True True True True True True True True True False True True True True True True True True True True True False True True True True False True True True] """ self.drop_out_layer_gen = theano.function( [], self.drop_out_layer ) # # print "dim of model, world, lang and action is : ", self.dim_model, self.dim_world, self.dim_lang, self.dim_action # """ identity matrix of shape [524*524] """ self.Emb_lang_sparse = theano.shared( numpy.identity(self.dim_lang, dtype=dtype), name='Emb_lang_sparse' ) # print "self.Emb_lang_sparse=",self.Emb_lang_sparse.shape.eval() # this is the I-matrix that stands for idx of tokens # """ Matrix of shape [524*100] """ self.Emb_enc_forward = theano.shared( utils.sample_weights(self.dim_lang, self.dim_model), name='Emb_enc_forward' ) # print "self.Emb_enc_forward=",self.Emb_enc_forward.eval()," ", self.Emb_enc_forward.shape.eval() """ Matrix of shape [200*400] """ self.W_enc_forward = theano.shared( utils.sample_weights( 2 * self.dim_model, 4 * self.dim_model ), name='W_enc_forward' ) # print "self.W_enc_forward=",self.W_enc_forward.eval()," ", self.W_enc_forward.shape.eval() """ Matrix of shape [400] """ self.b_enc_forward = theano.shared( numpy.zeros((4 * self.dim_model,), dtype=dtype), name='b_enc_forward' ) # print "self.b_enc_forward=",self.b_enc_forward.eval()," ", self.b_enc_forward.shape.eval() # """ Matrix of shape [524*100] """ self.Emb_enc_backward = theano.shared( utils.sample_weights(self.dim_lang, self.dim_model), name='Emb_enc_backward' ) """ Matrix of shape [200*400] """ self.W_enc_backward = theano.shared( utils.sample_weights( 2 * self.dim_model, 4 * self.dim_model ), name='W_enc_backward' ) """ Matrix of shape [400] """ self.b_enc_backward = theano.shared( numpy.zeros((4 * self.dim_model,), dtype=dtype), name='b_enc_backward' ) # """ Matrix of shape [724*100] """ self.W_att_scope = theano.shared( utils.sample_weights( self.dim_lang + 2 * self.dim_model, self.dim_model ), name='W_att_scope' ) """ Matrix of shape [100*100] """ self.W_att_target = theano.shared( utils.sample_weights( self.dim_model, self.dim_model ), name='W_att_target' ) # print "self.W_att_target=",self.W_att_target.eval()," ", self.W_att_target.shape.eval() """ Matrix of shape [100] """ self.b_att = theano.shared( numpy.zeros((self.dim_model,), dtype=dtype), name='b_att' ) # """ Matrix of shape [78*100] """ self.Emb_dec = theano.shared( utils.sample_weights(self.dim_world, self.dim_model), name='Emb_dec' ) """ Matrix of shape [924*400] """ self.W_dec = theano.shared( utils.sample_weights( self.dim_lang + 4 * self.dim_model, 4 * self.dim_model ), name='W_dec' ) # print "self.W_dec=",self.W_dec.eval()," ", self.W_dec.shape.eval() """ Matrix of shape [400] """ self.b_dec = theano.shared( numpy.zeros((4 * self.dim_model,), dtype=dtype), name='b_dec' ) # """ Matrix of shape [824*100] """ self.W_out_hz = theano.shared( utils.sample_weights( self.dim_lang + 3 * self.dim_model, self.dim_model ), name='W_out_hz' ) """ Matrix of shape [100*4] """ self.W_out = theano.shared( utils.sample_weights( self.dim_model, self.dim_action ), name='W_out' ) # """ Matrix of shape [100] """ self.c0 = theano.shared( numpy.zeros((self.dim_model,), dtype=dtype), name='c0' ) """ Matrix of shape [100] """ self.h0 = theano.shared( numpy.zeros((self.dim_model,), dtype=dtype), name='h0' ) # self.params = [ self.Emb_enc_forward, self.W_enc_forward, self.b_enc_forward, self.Emb_enc_backward, self.W_enc_backward, self.b_enc_backward, # self.W_att_scope, self.W_att_target, self.b_att, self.Emb_dec, self.W_dec, self.b_dec, self.W_out_hz, self.W_out ] # self.cost = None self.grad_params = None self.log_prob = None
verbose=1) early_stop = cb.EarlyStopping(patience=10, restore_best_weights=True, monitor=args.metrics, verbose=1) training = model.fit(train_sample, train_labels, validation_data=(valid_sample, valid_labels), callbacks=[check_point, early_stop], epochs=args.n_epochs, verbose=args.verbose, class_weight=None if args.n_classes == 2 else class_weights(train_labels), sample_weight=sample_weights(train_sample, train_labels, args.n_classes, args.weight_type, args.output_dir), batch_size=max(1, n_gpus) * int(args.batch_size)) model.load_weights(weight_file) else: train_labels = [] training = None # RESULTS AND PLOTTING SECTION if args.cross_valid == 'ON': valid_probs = cross_validation(valid_sample, valid_labels, scalars, model, args.output_dir, args.n_folds) print('MERGING ALL FOLDS AND PREDICTING CLASSES ...') if args.cross_valid == 'OFF': print('\nValidation sample', args.n_valid, 'class predictions:')
def __init__(self, n_in): self.gamma = theano.shared(np.squeeze(sample_weights(1,n_in))) self.beta = theano.shared(np.zeros(n_in, dtype=dtype)) self.params = [self.gamma, self.beta]
def __init__(self, n_in, n_output, selection_threshold=1.): self.W = theano.shared(sample_weights(n_in, n_output)) self.params = [self.W] self.selection_threshold = selection_threshold
def __init__(self, n_i=32, n_h=45): self.W = theano.shared(sample_weights(n_i, n_h)) self.params = [self.W]
func_args = (data_file, total_var, args.n_train, args.n_tracks, args.n_classes, args.train_cuts) train_sample, train_labels = make_sample(*func_args); sample_composition(train_sample) if args.resampling == 'ON': train_sample, train_labels = balance_sample(train_sample, train_labels) if args.scaling and args.model_in != '': train_sample = load_scaler(train_sample, scalars, args.output_dir+'/'+args.scaler_in) if args.scaling and args.model_in == '': scaler_out = args.output_dir+'/'+args.scaler_out train_sample, valid_sample = apply_scaler(train_sample, valid_sample, scalars, scaler_out) compo_matrix(valid_labels, train_labels=train_labels); print() model_out = args.output_dir+'/'+args.model_out check_point = cb.ModelCheckpoint(model_out, save_best_only =True, monitor=args.metrics, verbose=1) early_stop = cb.EarlyStopping(patience=10, restore_best_weights=True, monitor=args.metrics, verbose=1) training = model.fit( train_sample, train_labels, validation_data=(valid_sample,valid_labels), callbacks=[check_point,early_stop], epochs=args.n_epochs, verbose=args.verbose, class_weight=class_weights(train_labels), sample_weight=sample_weights(train_sample, train_labels, args.n_classes, args.weight_type, args.output_dir), batch_size=max(1,n_gpus)*int(args.batch_size) ) model.load_weights(model_out) else: train_labels = []; training = None # RESULTS AND PLOTTING SECTION if args.cross_valid == 'ON': valid_probs = cross_validation(valid_sample, valid_labels, scalars, model, args.output_dir, args.n_folds) print('MERGING ALL FOLDS AND PREDICTING CLASSES ...') if args.cross_valid == 'OFF': print('\nValidation sample', args.n_valid, 'class predictions:') valid_probs = model.predict(valid_sample, batch_size=20000, verbose=args.verbose); print() valid_results(valid_sample, valid_labels, valid_probs, train_labels, training, args.output_dir, args.plotting) if args.results_out != '': print('Saving validation results to:', args.output_dir+'/'+args.results_out, '\n') valid_sample = {key:valid_sample[key] for key in other_var}
def federated_learning(communication_rounds=1, epochs_per_round=1, saving=False, sampling_idx_layers=None, sampling_idx_all=None): client_list, sampling_types, samples_data_loaders = get_sample_data_loaders() client_names = np.array(['Client-{}'.format(i) for i in range(4)]) train_loaders = [] test_loaders = [] for client_name in client_names: train_loader, test_loader = get_cifar_data_loader(client_name, batch_size=batch_size) train_loaders.append(train_loader) test_loaders.append(test_loader) # Initiate Parameters server = Server(start_round=0, checkpoint_path=os.path.join(CHECKPOINTS_DIR, 'Server'), device=device) n_paras = sum(p.numel() for p in server.model.parameters()) print('Total n_paras: {}'.format(n_paras)) layer_names = server.model.layer_names n_layers = len(layer_names) info_file = os.path.join(HISTORY_DIR, 'model_info') np.savez_compressed(info_file, layer_names=layer_names) federated_clients = [] for client_name, train_loader, test_loader in zip(client_names, train_loaders, test_loaders): federated_clients.append(Client(client_name=client_name, checkpoint_path=os.path.join(CHECKPOINTS_DIR, client_name), train_loader=train_loader, test_loader=test_loader, device=device)) weights0_layers, weights0_all = sample_weights(server.model, sampling_idx_layers, sampling_idx_all) weights0_file = os.path.join(WEIGHTS_DIR, 'weights_0') np.savez_compressed(weights0_file, layers=weights0_layers, all=weights0_all) torch.save(server.model.state_dict(), os.path.join(CHECKPOINTS_DIR, 'model_0.cp')) cosines = {} for client_name in client_list: cosines[client_name] = [[] for _ in range(n_layers + 1)] total_accuracy_list = [[] for _ in client_list] last_time = time() # Start federated learning for i in range(communication_rounds): print('Communication Round {} | Time: {}'.format(i, time() - last_time)) last_time = time() pre_model = CIFARModel().cuda() pre_model.load_state_dict(server.model.state_dict()) global_parameters = server.get_parameters() local_parameters = [] # Federated Learning for client in federated_clients: client.set_parameters(global_parameters) client.run(n_epochs=epochs_per_round, save_last=True) local_parameters.append(client.get_parameters()) server.aggregate(local_parameters) server.save(suffix='_r{}'.format(i)) if saving: server_weights_layers, server_weights_all = sample_weights(server.model, sampling_idx_layers, sampling_idx_all) weights_file = os.path.join(WEIGHTS_DIR, 'Server_r{}'.format(i)) np.savez_compressed(weights_file, layers=server_weights_layers, all=server_weights_all) for client_id, client_name in enumerate(client_list): client = federated_clients[np.where(client_names == client_name)[0][0]] model = server.model model.eval() results = {} for sampling_type in sampling_types: print('Predicting {} {}'.format(client_name, sampling_type)) data_loader = samples_data_loaders[sampling_type][client_id] predictions = [] total, correct = 0, 0 with torch.no_grad(): for inputs, labels in data_loader: inputs = inputs.float().to(device) labels = labels.long().to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) predictions.append(predicted.cpu().numpy()) if sampling_type == 'local': total += inputs.size(0) correct += (predicted == labels).sum().item() results[sampling_type] = np.concatenate(predictions) if sampling_type == 'local': print(' Total Acc:', correct / total) total_accuracy_list[client_id].append(correct / total) output_file = os.path.join(OUTPUTS_DIR, '{}_Server_r{}'.format(client_name, i)) np.savez_compressed(output_file, **results) client_weights_layers, client_weights_all = sample_weights(client.model, sampling_idx_layers, sampling_idx_all) weights_file = os.path.join(WEIGHTS_DIR, '{}_r{}'.format(client_name, i)) np.savez_compressed(weights_file, layers=client_weights_layers, all=client_weights_all) update_cosines(pre_model, client.model, server.model, cosines[client_name]) # Test for client in federated_clients: if client.name in client_list: client.set_parameters(server.get_parameters()) client.test() loss_list = [client.history['loss'] for client in federated_clients if client.name in client_list] val_acc_list = [client.history['val_acc'] for client in federated_clients if client.name in client_list] np.savez_compressed(VAL_FILE, client_names=client_names, loss=loss_list, val_acc=val_acc_list, tot_acc=total_accuracy_list) cosines_file = os.path.join(WEIGHTS_DIR, 'cosines') np.savez_compressed(cosines_file, **cosines)
def __init__(self, settings): print "initializing Sel Gen model ... " self.size_batch = settings['size_batch'] self.num_sel = numpy.float32(settings['num_sel']) self.coef = numpy.float32(settings['coef']) # if settings['path_pre_train'] == None: self.dim_model = settings['dim_model'] self.dim_lang = settings['dim_lang'] self.dim_info = settings['dim_info'] # initialize variables self.Emb_enc_forward = theano.shared(utils.sample_weights( self.dim_info, self.dim_model), name='Emb_enc_forward') self.Emb_enc_backward = theano.shared(utils.sample_weights( self.dim_info, self.dim_model), name='Emb_enc_backward') self.W_enc_forward = theano.shared(utils.sample_weights( 2 * self.dim_model, 4 * self.dim_model), name='W_enc_forward') self.W_enc_backward = theano.shared(utils.sample_weights( 2 * self.dim_model, 4 * self.dim_model), name='W_enc_backward') self.b_enc_forward = theano.shared(numpy.zeros( (4 * self.dim_model, ), dtype=dtype), name='b_enc_forward') self.b_enc_backward = theano.shared(numpy.zeros( (4 * self.dim_model, ), dtype=dtype), name='b_enc_backward') # self.W_pre_att = theano.shared(utils.sample_weights( self.dim_info + 2 * self.dim_model, self.dim_model), name='W_pre_att') self.b_pre_att = theano.shared(numpy.zeros((self.dim_model, ), dtype=dtype), name='b_pre_att') # self.W_att = theano.shared(utils.sample_weights( self.dim_model, self.dim_model), name='W_att') self.U_att = theano.shared(utils.sample_weights( self.dim_info + 2 * self.dim_model, self.dim_model), name='U_att') self.b_att = theano.shared(numpy.zeros((self.dim_model, ), dtype=dtype), name='b_att') # self.Emb_dec = theano.shared(utils.sample_weights( self.dim_lang, self.dim_model), name='Emb_dec') self.W_dec = theano.shared(utils.sample_weights( self.dim_info + 4 * self.dim_model, 4 * self.dim_model), name='W_dec') self.b_dec = theano.shared(numpy.zeros((4 * self.dim_model, ), dtype=dtype), name='b_dec') self.L_0 = theano.shared(utils.sample_weights( self.dim_model, self.dim_lang), name='L_0') self.L = theano.shared(utils.sample_weights( self.dim_info + 3 * self.dim_model, self.dim_model), name='L') # else: # path_pre_train = os.path.abspath(settings['path_pre_train']) with open(path_pre_train, 'rb') as f: model_pre_train = pickle.load(f) # self.Emb_enc_forward = theano.shared( model_pre_train['Emb_enc_forward']) self.Emb_enc_backward = theano.shared( model_pre_train['Emb_enc_backward']) self.W_enc_forward = theano.shared( model_pre_train['W_enc_forward']) self.W_enc_backward = theano.shared( model_pre_train['W_enc_backward']) self.b_enc_forward = theano.shared( model_pre_train['b_enc_forward']) self.b_enc_backward = theano.shared( model_pre_train['b_enc_backward']) # self.W_pre_att = theano.shared(model_pre_train['W_pre_att']) self.b_pre_att = theano.shared(model_pre_train['b_pre_att']) # self.W_att = theano.shared(model_pre_train['W_att']) self.U_att = theano.shared(model_pre_train['U_att']) self.b_att = theano.shared(model_pre_train['b_att']) # self.Emb_dec = theano.shared(model_pre_train['Emb_dec']) self.W_dec = theano.shared(model_pre_train['W_dec']) self.b_dec = theano.shared(model_pre_train['b_dec']) self.L_0 = theano.shared(model_pre_train['L_0']) self.L = theano.shared(model_pre_train['L']) # self.dim_model = self.Emb_enc_forward.shape[1] self.dim_lang = self.Emb_dec.shape[0] self.dim_info = self.Emb_enc_forward.shape[0] # # self.h_0_mat = tensor.zeros((self.size_batch, self.dim_model), dtype=dtype) self.c_0_mat = tensor.zeros((self.size_batch, self.dim_model), dtype=dtype) # self.params = [ self.Emb_enc_forward, self.Emb_enc_backward, self.W_enc_forward, self.W_enc_backward, self.b_enc_forward, self.b_enc_backward, self.W_pre_att, self.b_pre_att, self.W_att, self.U_att, self.b_att, self.Emb_dec, self.W_dec, self.b_dec, self.L_0, self.L ] self.grad_params = None self.cost = None
def __init__(self, model_settings): # self.dim_model = model_settings['dim_model'] self.dim_world = model_settings['dim_world'] # it is the dim of raw world input # raw world input is NOT one-hot vector self.dim_lang = model_settings['dim_lang'] self.dim_action = model_settings['dim_action'] # print "dim of model, world, lang and action is : ", self.dim_model, self.dim_world, self.dim_lang, self.dim_action # self.Emb_lang_sparse = theano.shared( numpy.identity(self.dim_lang, dtype=dtype), name='Emb_lang_sparse' ) # this is the I-matrix that stands for idx of tokens # self.Emb_enc_forward = theano.shared( utils.sample_weights(self.dim_lang, self.dim_model), name='Emb_enc_forward' ) self.W_enc_forward = theano.shared( utils.sample_weights( 2*self.dim_model, 4*self.dim_model ), name='W_enc_forward' ) self.b_enc_forward = theano.shared( numpy.zeros((4*self.dim_model, ), dtype=dtype), name='b_enc_forward' ) # self.Emb_enc_backward = theano.shared( utils.sample_weights(self.dim_lang, self.dim_model), name='Emb_enc_backward' ) self.W_enc_backward = theano.shared( utils.sample_weights( 2*self.dim_model, 4*self.dim_model ), name='W_enc_backward' ) self.b_enc_backward = theano.shared( numpy.zeros((4*self.dim_model, ), dtype=dtype), name='b_enc_backward' ) # self.W_att_scope = theano.shared( utils.sample_weights( self.dim_lang+2*self.dim_model, self.dim_model ), name='W_att_scope' ) self.W_att_target = theano.shared( utils.sample_weights( self.dim_model, self.dim_model ), name='W_att_target' ) self.b_att = theano.shared( numpy.zeros((self.dim_model, ), dtype=dtype), name='b_att' ) # self.Emb_dec = theano.shared( utils.sample_weights(self.dim_world, self.dim_model), name='Emb_dec' ) self.W_dec = theano.shared( utils.sample_weights( self.dim_lang+4*self.dim_model, 4*self.dim_model ), name='W_dec' ) self.b_dec = theano.shared( numpy.zeros((4*self.dim_model, ), dtype=dtype), name='b_dec' ) # self.W_out_hz = theano.shared( utils.sample_weights( self.dim_lang+3*self.dim_model, self.dim_model ), name='W_out_hz' ) self.W_out = theano.shared( utils.sample_weights( self.dim_model, self.dim_action ), name='W_out' ) # self.c0 = theano.shared( numpy.zeros((self.dim_model, ), dtype=dtype), name='c0' ) self.h0 = theano.shared( numpy.zeros((self.dim_model, ), dtype=dtype), name='h0' ) # self.params = [ self.Emb_enc_forward, self.W_enc_forward, self.b_enc_forward, self.Emb_enc_backward, self.W_enc_backward, self.b_enc_backward, # self.W_att_scope, self.W_att_target, self.b_att, self.Emb_dec, self.W_dec, self.b_dec, self.W_out_hz, self.W_out ] # self.cost = None self.grad_params = None
def __init__(self, model_settings): # self.dim_model = model_settings['dim_model'] self.dim_world = model_settings['dim_world'] # it is the dim of raw world input # raw world input is NOT one-hot vector self.dim_lang = model_settings['dim_lang'] self.dim_action = model_settings['dim_action'] # # drop_out related stuff self.drop_out_rate = model_settings['drop_out_rate'] assert( self.drop_out_rate <= numpy.float32(1.0) ) self.rnd_gen = RandomStreams(seed=12345) self.drop_out_layer = self.rnd_gen.uniform((self.dim_model,)) < self.drop_out_rate self.drop_out_layer_gen = theano.function( [], self.drop_out_layer ) # # print "dim of model, world, lang and action is : ", self.dim_model, self.dim_world, self.dim_lang, self.dim_action # self.Emb_lang_sparse = theano.shared( numpy.identity(self.dim_lang, dtype=dtype), name='Emb_lang_sparse' ) # this is the I-matrix that stands for idx of tokens # self.Emb_enc_forward = theano.shared( utils.sample_weights(self.dim_lang, self.dim_model), name='Emb_enc_forward' ) self.W_enc_forward = theano.shared( utils.sample_weights( 2*self.dim_model, 4*self.dim_model ), name='W_enc_forward' ) self.b_enc_forward = theano.shared( numpy.zeros((4*self.dim_model, ), dtype=dtype), name='b_enc_forward' ) # self.Emb_enc_backward = theano.shared( utils.sample_weights(self.dim_lang, self.dim_model), name='Emb_enc_backward' ) self.W_enc_backward = theano.shared( utils.sample_weights( 2*self.dim_model, 4*self.dim_model ), name='W_enc_backward' ) self.b_enc_backward = theano.shared( numpy.zeros((4*self.dim_model, ), dtype=dtype), name='b_enc_backward' ) # self.W_att_scope = theano.shared( utils.sample_weights( self.dim_lang+2*self.dim_model, self.dim_model ), name='W_att_scope' ) self.W_att_target = theano.shared( utils.sample_weights( self.dim_model, self.dim_model ), name='W_att_target' ) self.b_att = theano.shared( numpy.zeros((self.dim_model, ), dtype=dtype), name='b_att' ) # self.Emb_dec = theano.shared( utils.sample_weights(self.dim_world, self.dim_model), name='Emb_dec' ) self.W_dec = theano.shared( utils.sample_weights( self.dim_lang+4*self.dim_model, 4*self.dim_model ), name='W_dec' ) self.b_dec = theano.shared( numpy.zeros((4*self.dim_model, ), dtype=dtype), name='b_dec' ) # self.W_out_hz = theano.shared( utils.sample_weights( self.dim_lang+3*self.dim_model, self.dim_model ), name='W_out_hz' ) self.W_out = theano.shared( utils.sample_weights( self.dim_model, self.dim_action ), name='W_out' ) # self.c0 = theano.shared( numpy.zeros((self.dim_model, ), dtype=dtype), name='c0' ) self.h0 = theano.shared( numpy.zeros((self.dim_model, ), dtype=dtype), name='h0' ) # self.params = [ self.Emb_enc_forward, self.W_enc_forward, self.b_enc_forward, self.Emb_enc_backward, self.W_enc_backward, self.b_enc_backward, # self.W_att_scope, self.W_att_target, self.b_att, self.Emb_dec, self.W_dec, self.b_dec, self.W_out_hz, self.W_out ] # self.cost = None self.grad_params = None
def __init__(self, n_in, n_output): self.W = theano.shared(sample_weights(n_in, n_output)) self.b = theano.shared(np.zeros(n_output, dtype=dtype)) self.params = [self.W, self.b]