train=False, download=False, transform=test_transform) else: print('Model must be {} or {}!'.format('MnistCNN', 'AlexNet')) sys.exit(-1) train_bsz = args.train_bsz test_bsz = args.test_bsz workers = [v + 1 for v in range(args.workers_num)] train_data = partition_dataset(train_dataset, workers) test_data = partition_dataset(test_dataset, workers) this_rank = args.this_rank train_data = select_dataset(workers, this_rank, train_data, batch_size=train_bsz) test_data = select_dataset(workers, this_rank, test_data, batch_size=test_bsz) world_size = len(workers) + 1 class MyManager(BaseManager): pass MyManager.register('get_queue') MyManager.register('get_param') MyManager.register('get_stop_signal') manager = MyManager(address=(args.ps_ip, 5000), authkey=b'queue')
train_dataset = datasets.ImageFolder(args.data_dir, train=True, download=False, transform=train_transform) test_dataset = datasets.ImageFolder(args.data_dir, train=False, download=False, transform=test_transform) else: print('Model must be {} or {}!'.format('MnistCNN', 'AlexNet')) sys.exit(-1) models.append(model) train_bsz = args.train_bsz train_bsz /= len(workers) train_bsz = int(train_bsz) train_data = partition_dataset(train_dataset, workers) train_data_list = [] for i in workers: train_data_sub = select_dataset(workers, i, train_data, batch_size=train_bsz) train_data_list.append(train_data_sub) test_bsz = 400 # 用所有的测试数据测试 test_data = DataLoader(test_dataset, batch_size=test_bsz, shuffle = False) iterations_epoch = int(len(train_dataset) / args.train_bsz) save_path = str(args.save_path) save_path = save_path.rstrip('/') p = TorchProcess(target=init_processes, args=(workers, models, save_path, train_data_list, test_data,iterations_epoch, run))