def _load_model_and_optimizer(fea_dict,model,config,arch_dict,use_cuda,multi_gpu,to_do): inp_out_dict = fea_dict nns, costs = model_init(inp_out_dict,model,config,arch_dict,use_cuda,multi_gpu,to_do) optimizers = optimizer_init(nns,config,arch_dict) for net in nns.keys(): pt_file_arch=config[arch_dict[net][0]]['arch_pretrain_file'] if pt_file_arch!='none': if use_cuda: checkpoint_load = torch.load(pt_file_arch) else: checkpoint_load = torch.load(pt_file_arch, map_location='cpu') nns[net].load_state_dict(checkpoint_load['model_par']) if net in optimizers: optimizers[net].load_state_dict(checkpoint_load['optimizer_par']) optimizers[net].param_groups[0]['lr']=float(config[arch_dict[net][0]]['arch_lr']) # loading lr of the cfg file for pt if multi_gpu: nns[net] = torch.nn.DataParallel(nns[net]) return nns, costs, optimizers, inp_out_dict
def grid_search(train_loader, test_loader, comm_matrix, num_rounds, epochs, num_clients, net='net', optimizer='sgd', lrs=np.logspace(-12, -1, 13, base=2.0)): """ Runs a decentralized optimization algorithme for the given learning rates for a number of rounds, over some network. Outputs the accuracies and returns them. Params: train_loader (array): the list of all train datasets, one per client test_loader (array): the list of test datasets, one per client comm_matrix (numpy.array): the communication matric modeling the network num_rounds (int): the number of data exchanges between nodes epochs (int): the number of optimization steps between each communication (minimum 1) num_clients (int): the number of clients in the network net (string): the neural network framework we use optimizer (string): the chosen optimizer, SGD by default lrs (array): the list of stepsizes to test Returns: accs (array): the corresponding accuracies, with the same shape as lrs """ accs = [] for lr in lrs: global_model, client_models = model_init(num_clients, net) opt = optimizer_init(client_models, lr, optimizer) loss, test_loss, acc = 0.0, 0.0, 0.0 for r in range(num_rounds): for i in range(num_clients): loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs) diffuse_params(client_models, comm_matrix) average_models(global_model, client_models) test_loss, acc = evaluate(global_model, test_loader) print('lr %g | average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (lr, loss / num_clients, test_loss, acc)) accs.append(acc) return accs
def run_nn(data_name,data_set,data_end_index,fea_dict,lab_dict,arch_dict,cfg_file,processed_first,next_config_file): # This function processes the current chunk using the information in cfg_file. In parallel, the next chunk is load into the CPU memory # Reading chunk-specific cfg file (first argument-mandatory file) if not(os.path.exists(cfg_file)): sys.stderr.write('ERROR: The config file %s does not exist!\n'%(cfg_file)) sys.exit(0) else: config = configparser.ConfigParser() config.read(cfg_file) # Setting torch seed seed=int(config['exp']['seed']) torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) # Reading config parameters output_folder=config['exp']['out_folder'] use_cuda=strtobool(config['exp']['use_cuda']) multi_gpu=strtobool(config['exp']['multi_gpu']) to_do=config['exp']['to_do'] info_file=config['exp']['out_info'] model=config['model']['model'].split('\n') forward_outs=config['forward']['forward_out'].split(',') forward_normalize_post=list(map(strtobool,config['forward']['normalize_posteriors'].split(','))) forward_count_files=config['forward']['normalize_with_counts_from'].split(',') require_decodings=list(map(strtobool,config['forward']['require_decoding'].split(','))) use_cuda=strtobool(config['exp']['use_cuda']) save_gpumem=strtobool(config['exp']['save_gpumem']) is_production=strtobool(config['exp']['production']) if to_do=='train': batch_size=int(config['batches']['batch_size_train']) if to_do=='valid': batch_size=int(config['batches']['batch_size_valid']) if to_do=='forward': batch_size=1 # ***** Reading the Data******** if processed_first: # Reading all the features and labels for this chunk shared_list=[] p=threading.Thread(target=read_lab_fea, args=(cfg_file,is_production,shared_list,output_folder,)) p.start() p.join() data_name=shared_list[0] data_end_index=shared_list[1] fea_dict=shared_list[2] lab_dict=shared_list[3] arch_dict=shared_list[4] data_set=shared_list[5] # converting numpy tensors into pytorch tensors and put them on GPUs if specified if not(save_gpumem) and use_cuda: data_set=torch.from_numpy(data_set).float().cuda() else: data_set=torch.from_numpy(data_set).float() # Reading all the features and labels for the next chunk shared_list=[] p=threading.Thread(target=read_lab_fea, args=(next_config_file,is_production,shared_list,output_folder,)) p.start() # Reading model and initialize networks inp_out_dict=fea_dict [nns,costs]=model_init(inp_out_dict,model,config,arch_dict,use_cuda,multi_gpu,to_do) # optimizers initialization optimizers=optimizer_init(nns,config,arch_dict) # pre-training and multi-gpu init for net in nns.keys(): pt_file_arch=config[arch_dict[net][0]]['arch_pretrain_file'] if pt_file_arch!='none': checkpoint_load = torch.load(pt_file_arch) nns[net].load_state_dict(checkpoint_load['model_par']) optimizers[net].load_state_dict(checkpoint_load['optimizer_par']) optimizers[net].param_groups[0]['lr']=float(config[arch_dict[net][0]]['arch_lr']) # loading lr of the cfg file for pt if multi_gpu: nns[net] = torch.nn.DataParallel(nns[net]) if to_do=='forward': post_file={} for out_id in range(len(forward_outs)): if require_decodings[out_id]: out_file=info_file.replace('.info','_'+forward_outs[out_id]+'_to_decode.ark') else: out_file=info_file.replace('.info','_'+forward_outs[out_id]+'.ark') post_file[forward_outs[out_id]]=open_or_fd(out_file,output_folder,'wb') # check automatically if the model is sequential seq_model=is_sequential_dict(config,arch_dict) # ***** Minibatch Processing loop******** if seq_model or to_do=='forward': N_snt=len(data_name) N_batches=int(N_snt/batch_size) else: N_ex_tr=data_set.shape[0] N_batches=int(N_ex_tr/batch_size) beg_batch=0 end_batch=batch_size snt_index=0 beg_snt=0 start_time = time.time() # array of sentence lengths arr_snt_len=shift(shift(data_end_index, -1,0)-data_end_index,1,0) arr_snt_len[0]=data_end_index[0] loss_sum=0 err_sum=0 inp_dim=data_set.shape[1] for i in range(N_batches): max_len=0 if seq_model: max_len=int(max(arr_snt_len[snt_index:snt_index+batch_size])) inp= torch.zeros(max_len,batch_size,inp_dim).contiguous() for k in range(batch_size): snt_len=data_end_index[snt_index]-beg_snt N_zeros=max_len-snt_len # Appending a random number of initial zeros, tge others are at the end. N_zeros_left=random.randint(0,N_zeros) # randomizing could have a regularization effect inp[N_zeros_left:N_zeros_left+snt_len,k,:]=data_set[beg_snt:beg_snt+snt_len,:] beg_snt=data_end_index[snt_index] snt_index=snt_index+1 else: # features and labels for batch i if to_do!='forward': inp= data_set[beg_batch:end_batch,:].contiguous() else: snt_len=data_end_index[snt_index]-beg_snt inp= data_set[beg_snt:beg_snt+snt_len,:].contiguous() beg_snt=data_end_index[snt_index] snt_index=snt_index+1 # use cuda if use_cuda: inp=inp.cuda() if to_do=='train': # Forward input, with autograd graph active outs_dict=forward_model(fea_dict,lab_dict,arch_dict,model,nns,costs,inp,inp_out_dict,max_len,batch_size,to_do,forward_outs) for opt in optimizers.keys(): optimizers[opt].zero_grad() outs_dict['loss_final'].backward() # Gradient Clipping (th 0.1) #for net in nns.keys(): # torch.nn.utils.clip_grad_norm_(nns[net].parameters(), 0.1) for opt in optimizers.keys(): if not(strtobool(config[arch_dict[opt][0]]['arch_freeze'])): optimizers[opt].step() else: with torch.no_grad(): # Forward input without autograd graph (save memory) outs_dict=forward_model(fea_dict,lab_dict,arch_dict,model,nns,costs,inp,inp_out_dict,max_len,batch_size,to_do,forward_outs) if to_do=='forward': for out_id in range(len(forward_outs)): out_save=outs_dict[forward_outs[out_id]].data.cpu().numpy() if forward_normalize_post[out_id]: # read the config file counts = load_counts(forward_count_files[out_id]) out_save=out_save-np.log(counts/np.sum(counts)) # save the output write_mat(output_folder,post_file[forward_outs[out_id]], out_save, data_name[i]) else: loss_sum=loss_sum+outs_dict['loss_final'].detach() err_sum=err_sum+outs_dict['err_final'].detach() # update it to the next batch beg_batch=end_batch end_batch=beg_batch+batch_size # Progress bar if to_do == 'train': status_string="Training | (Batch "+str(i+1)+"/"+str(N_batches)+")"+" | L:" +str(round(loss_sum.cpu().item()/(i+1),3)) if i==N_batches-1: status_string="Training | (Batch "+str(i+1)+"/"+str(N_batches)+")" if to_do == 'valid': status_string="Validating | (Batch "+str(i+1)+"/"+str(N_batches)+")" if to_do == 'forward': status_string="Forwarding | (Batch "+str(i+1)+"/"+str(N_batches)+")" progress(i, N_batches, status=status_string) elapsed_time_chunk=time.time() - start_time loss_tot=loss_sum/N_batches err_tot=err_sum/N_batches # clearing memory del inp, outs_dict, data_set # save the model if to_do=='train': for net in nns.keys(): checkpoint={} if multi_gpu: checkpoint['model_par']=nns[net].module.state_dict() else: checkpoint['model_par']=nns[net].state_dict() checkpoint['optimizer_par']=optimizers[net].state_dict() out_file=info_file.replace('.info','_'+arch_dict[net][0]+'.pkl') torch.save(checkpoint, out_file) if to_do=='forward': for out_name in forward_outs: post_file[out_name].close() # Write info file with open(info_file, "w") as text_file: text_file.write("[results]\n") if to_do!='forward': text_file.write("loss=%s\n" % loss_tot.cpu().numpy()) text_file.write("err=%s\n" % err_tot.cpu().numpy()) text_file.write("elapsed_time_chunk=%f\n" % elapsed_time_chunk) text_file.close() # Getting the data for the next chunk (read in parallel) p.join() data_name=shared_list[0] data_end_index=shared_list[1] fea_dict=shared_list[2] lab_dict=shared_list[3] arch_dict=shared_list[4] data_set=shared_list[5] # converting numpy tensors into pytorch tensors and put them on GPUs if specified if not(save_gpumem) and use_cuda: data_set=torch.from_numpy(data_set).float().cuda() else: data_set=torch.from_numpy(data_set).float() return [data_name,data_set,data_end_index,fea_dict,lab_dict,arch_dict]
def run_probas(train_loader, test_loader, comm_matrix, num_rounds, epochs, num_clients, failure_rounds, corr='global', net='net', optimizer='sgd', lr=0.1): """ Runs a decentralized optimization algorithm for the given learning rate for a number of rounds, over some network. Links may fail for some rounds according to a pre-defined probabilistic model. Outputs the accuracies and returns them. Params: train_loader (array): the list of all train datasets, one per client test_loader (array): the list of test datasets, one per client comm_matrix (numpy.array): the communication matric modeling the network num_rounds (int): the number of data exchanges between nodes epochs (int): the number of optimization steps between each communication (minimum 1) num_clients (int): the number of clients in the network failure_rounds (array): list representing the number of failing links at each round corr (string): the correction policy, global by default net (string): the neural network framework we use optimizer (string): the chosen optimizer, SGD by default lr (double): the learning rate for the optimizaion algorithm Returns: global_model (nn.Module): the final global neural network averaging all the clients client_models (array of Net): the list of all the final client neural networks accs (array): the corresponding accuracies, with the same shape as lrs """ assert corr in ['global', 'local', 'none'] accs = [] global_model, client_models = model_init(num_clients, net) opt = optimizer_init(client_models, lr, optimizer) loss, test_loss, acc = 0.0, 0.0, 0.0 for r in range(num_rounds): num_failures = np.count_nonzero(failure_rounds == r) if corr == 'global': actual_comm_matrix = network_failures_global( comm_matrix, num_failures) elif corr == 'local': actual_comm_matrix = network_failures_local( comm_matrix, num_failures) else: # corr == 'none' actual_comm_matrix = network_failures_no_correction( comm_matrix, num_failures) for i in range(num_clients): loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs) diffuse_params(client_models, actual_comm_matrix) average_models(global_model, client_models) test_loss, acc = evaluate(global_model, test_loader) print('%d-th round' % r) print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc)) accs.append(acc) return global_model, client_models, accs
def run_latency_changing_topo(train_loader, test_loader, num_rounds, epochs, num_clients, latency_nodes, net='net', optimizer='sgd', lr=0.1): """ Runs a decentralized optimization algorithm for the given learning rate for a number of rounds, over some network. Some nodes send their weights with a one-rounds latency, for the entire execution. The network topology evolves over time. Outputs the accuracies and returns them. Params: train_loader (array): the list of all train datasets, one per client test_loader (array): the list of test datasets, one per client comm_matrix (numpy.array): the communication matric modeling the network num_rounds (int): the number of data exchanges between nodes epochs (int): the number of optimization steps between each communication (minimum 1) num_clients (int): the number of clients in the network latency_nodes (array): the list of delayed nodes net (string): the neural network framework we use optimizer (string): the chosen optimizer, SGD by default lr (double): the learning rate for the optimizaion algorithm Returns: global_model (nn.Module): the final global neural network averaging all the clients client_models (array of Net): the list of all the final client neural networks accs (array): the corresponding accuracies, with the same shape as lrs """ accs = [] global_model, client_models = model_init(num_clients, net) opt = optimizer_init(client_models, lr, optimizer) topos = ['centralized', 'ring', 'grid'] topo = np.random.choice(topos) comm_matrix = create_mixing_matrix(topo, num_clients) loss, test_loss, acc = 0.0, 0.0, 0.0 for r in range(num_rounds): old_client_models = client_models old_topo = topo old_comm_matrix = comm_matrix topo = np.random.choice(topos) # client update for i in range(num_clients): loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs) # diffuse params diffuse_params_latency(client_models, comm_matrix, latency_nodes) if (r > 0): diffuse_params_latency( old_client_models, old_comm_matrix, np.setdiff1d(np.array(range(num_clients)), latency_nodes)) print("old topo: {}, new topo: {}".format(old_topo, topo)) average_models(global_model, client_models) test_loss, acc = evaluate(global_model, test_loader) print('%d-th round' % r) print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc)) accs.append(acc) return global_model, client_models, accs
def run_latency_per_round(train_loader, test_loader, comm_matrix, num_rounds, epochs, num_clients, latency_nodes, latency_rounds, net='net', optimizer='sgd', lr=0.1): """ Runs a decentralized optimization algorithm for the given learning rate for a number of rounds, over some network. Some nodes send their weights with a one-rounds latency, only during specific rounds. Outputs the accuracies and returns them. Params: train_loader (array): the list of all train datasets, one per client test_loader (array): the list of test datasets, one per client comm_matrix (numpy.array): the communication matric modeling the network num_rounds (int): the number of data exchanges between nodes epochs (int): the number of optimization steps between each communication (minimum 1) num_clients (int): the number of clients in the network latency_nodes (array): the list of delayed nodes latency_rounds (array): the rounds at which latency will occur across the network net (string): the neural network framework we use optimizer (string): the chosen optimizer, SGD by default lr (double): the learning rate for the optimizaion algorithm Returns: global_model (nn.Module): the final global neural network averaging all the clients client_models (array of Net): the list of all the final client neural networks accs (array): the corresponding accuracies, with the same shape as lrs """ accs = [] global_model, client_models = model_init(num_clients, net) opt = optimizer_init(client_models, lr, optimizer) loss, test_loss, acc = 0.0, 0.0, 0.0 for r in range(num_rounds): old_client_models = client_models # client update for i in range(num_clients): loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs) # diffuse params if (r in latency_rounds): diffuse_params_latency(client_models, comm_matrix, latency_nodes) print("round {}, delay".format(r)) elif (r in latency_rounds + 1): diffuse_params(client_models, comm_matrix) diffuse_params_latency( old_client_models, comm_matrix, np.setdiff1d(np.array(range(num_clients)), latency_nodes)) print("round {}, delay recovery".format(r)) else: diffuse_params(client_models, comm_matrix) print("round {}, normal".format(r)) average_models(global_model, client_models) test_loss, acc = evaluate(global_model, test_loader) print('%d-th round' % r) print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc)) accs.append(acc) return global_model, client_models, accs
start_time = time.time() if not (save_gpumem) and use_cuda: data_set = torch.from_numpy(data_set).float().cuda() else: data_set = torch.from_numpy(data_set).float() elapsed_time_load = time.time() - start_time # Reading model and initialize networks inp_out_dict = fea_dict [nns, costs] = model_init(inp_out_dict, model, config, arch_dict, use_cuda, multi_gpu, to_do) # optimizers initialization optimizers = optimizer_init(nns, config, arch_dict) # pre-training for net in nns.keys(): pt_file_arch = config[arch_dict[net][0]]['arch_pretrain_file'] if pt_file_arch != 'none': checkpoint_load = torch.load(pt_file_arch) nns[net].load_state_dict(checkpoint_load['model_par']) optimizers[net].load_state_dict(checkpoint_load['optimizer_par']) optimizers[net].param_groups[0]['lr'] = float(config[ arch_dict[net][0]]['arch_lr']) # loading lr of the cfg file for pt if to_do == 'forward': post_file = {}
def run_topos(train_loader, test_loader, num_rounds, epochs, num_clients, topos, shuffle='random', net='net', optimizer='sgd', lr=0.1): """ Runs a decentralized optimization algorithm for the given learning rate for a number of rounds, over some network. Outputs the accuracies and returns them. Params: train_loader (array): the list of all train datasets, one per client test_loader (array): the list of test datasets, one per client num_rounds (int): the number of data exchanges between nodes epochs (int): the number of optimization steps between each communication (minimum 1) num_clients (int): the number of clients in the network topos (array): list of possible network topologies shuffle (string): defines how topology evolves over time, randomly by default net (string): the neural network framework we use optimizer (string): the chosen optimizer, SGD by default lr (double): the learning rate for the optimizaion algorithm Returns: global_model (nn.Module): the final global neural network averaging all the clients client_models (array of Net): the list of all the final client neural networks accs (array): the corresponding accuracies, with the same shape as lrs """ assert shuffle in ['random', 'modulo', 'fraction'] accs = [] global_model, client_models = model_init(num_clients, net) opt = optimizer_init(client_models, lr, optimizer) loss, test_loss, acc = 0.0, 0.0, 0.0 for r in range(num_rounds): for i in range(num_clients): loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs) if shuffle == 'fraction': t = int(r * 5 / num_rounds) comm_matrix = create_mixing_matrix(topos[t], num_clients) elif shuffle == 'modulo': t = r % 5 comm_matrix = create_mixing_matrix(topos[t], num_clients) else: # shuffle == 'random' t = np.random.choice(range(5)) comm_matrix = create_mixing_matrix(topos[t], num_clients) diffuse_params(client_models, comm_matrix) average_models(global_model, client_models) test_loss, acc = evaluate(global_model, test_loader) print('%d-th round, %s topology' % (r, topos[t])) print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc)) accs.append(acc) return global_model, client_models, accs