def main(args): # Read configs with open(args.cfg_path, "r") as fp: configs = json.load(fp) # Update the configs based on command line args arg_dict = vars(args) for key in arg_dict: if key in configs: if arg_dict[key] is not None: configs[key] = arg_dict[key] configs = utils.ConfigMapper(configs) configs.attack_eps = float(configs.attack_eps) configs.attack_lr = float(configs.attack_lr) print("configs mode: ", configs.mode) print("configs lr: ", configs.lr) print("configs size: ", configs.size) configs.save_path = os.path.join(configs.save_path, configs.mode) experiment_name = exp_name(configs) configs.save_path = os.path.join(configs.save_path, experiment_name) pathlib.Path(configs.save_path).mkdir(parents=True, exist_ok=True) trainer = Trainer(configs) trainer.train() print("training is over!!!")
def main(args): # Read configs with open(args.cfg_path, "r") as fp: configs = json.load(fp) # Update the configs based on command line args arg_dict = vars(args) for key in arg_dict: if key in configs: if arg_dict[key] is not None: configs[key] = arg_dict[key] configs = utils.ConfigMapper(configs) configs.attack_eps = float(configs.attack_eps) / 255 configs.attack_lr = float(configs.attack_lr) / 255 configs.save_path = os.path.join(configs.save_path, configs.mode, configs.alg) pathlib.Path(configs.save_path).mkdir(parents=True, exist_ok=True) if configs.mode == 'train': trainer = Trainer(configs) trainer.train() elif configs.mode == 'eval': evaluator = Evaluator(configs) evaluator.eval() elif configs.mode == 'vis': visualizer = Visualizer(configs) visualizer.visualize() else: raise ValueError('mode should be train, eval or vis')
def main(config): logger = config.get_logger('train') data_loader = config.init_obj('data_loader', module_dataloader) torch.hub.set_dir(config['weights_path']) model = config.init_obj('arch', module_model) logger.info(model) # FIXME: refactor needed if config['data_loader']['args']['self_supervised']: criterion = torch.nn.CrossEntropyLoss() else: criterion = torch.nn.CrossEntropyLoss( weight=data_loader.get_label_proportions().to('cuda')) metrics = [getattr(module_metric, met) for met in config['metrics']] trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = config.init_obj('optimizer', torch.optim, trainable_params) lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer) trainer = Trainer(model.get_model(), criterion, metrics, optimizer, config=config, train_data_loader=data_loader.train, valid_data_loader=data_loader.val, test_data_loader=data_loader.test, lr_scheduler=lr_scheduler) trainer.train()
def main(args): # Read configs with open(args.cfg_path, "r") as fp: configs = json.load(fp) # Update the configs based on command line args arg_dict = vars(args) for key in arg_dict: if key in configs: if arg_dict[key] is not None: configs[key] = arg_dict[key] configs = utils.ConfigMapper(configs) configs.attack_eps = float(configs.attack_eps) / 255 configs.attack_lr = float(configs.attack_lr) / 255 print("configs mode: ", configs.mode) print("configs lr: ", configs.lr) configs.save_path = os.path.join(configs.save_path, configs.mode) experiment_name = exp_name(configs) configs.save_path = os.path.join(configs.save_path, experiment_name) pathlib.Path(configs.save_path).mkdir(parents=True, exist_ok=True) # settings experiment_name = "resnet18_Adversarial_Training_margin" + '_lr_' + str( configs.lr) + '_alpha_' + str(configs.alpha) + '_seed_' + str( configs.seed) + '_epsilon_' + str(configs.attack_eps) trainer = Trainer(configs) trainer.train() print("training is over!!!")
def main(): # capture the config path from the run arguments # then process the json configration file try: args = get_args() config = process_config(args.config) except: print("missing or invalid arguments") exit(0) # create tensorflow session tf.reset_default_graph() sess = tf.Session(config=tf_config) # create instance of the model you want model = DLPDE_Model(config) model.load(sess) # create your data generator train_data, test_data, input_train_extra = creat_dataset(config) # create tensorboard logger logger = Logger(sess, config) # create trainer and path all previous components to it trainer = Trainer(sess, model, train_data, test_data, config, logger) # here you train your model trainer.train()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning-rate', '-lr', type=float, default=1e-3) parser.add_argument('--epochs', type=int, default=20) parser.add_argument('--no-cuda', action='store_true') parser.add_argument('--data-parallel', action='store_true') parser.add_argument('--num-d-iterations', type=int, default=1) args = parser.parse_args() args.cuda = torch.cuda.is_available() and not args.no_cuda print(args) device = torch.device('cuda' if args.cuda else 'cpu') net_g = Generator(ch=128).to(device) net_d = Discriminator(ch=128).to(device) optim_g = optim.Adam( net_g.parameters(), lr=args.learning_rate, betas=(0.5, 0.999)) optim_d = optim.Adam( net_d.parameters(), lr=args.learning_rate, betas=(0.5, 0.999)) dataloader = get_cat_dataloader() trainer = Trainer(net_g, net_d, optim_g, optim_d, dataloader, device, args.num_d_iterations) os.makedirs('samples', exist_ok=True) trainer.train(args.epochs)
def main(): create_logging() sess = tf.Session() dl = IMDBDataLoader(sess) model = Model(dl) trainer = Trainer(sess, model, dl) trainer.train()
def run_small_net(): global training_data2, n2, t2, testing_data layers = [] layers.append({'type': 'input', 'out_sx': 24, 'out_sy': 24, 'out_depth': 1}) #layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'relu'}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n2 = Net(layers) print 'Smaller Net made...' print n2 t2 = Trainer(n2, {'method': 'sgd', 'momentum': 0.0}) print 'Trainer made for smaller net...' print 'In training of smaller net...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data2: stats = t2.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] except: #hit control-c or other pass print 'Testing smaller net: 5000 trials' right = 0 count = 5000 for x, y in sample(testing_data, count): n2.forward(x) right += n2.getPrediction() == y accuracy = float(right) / count * 100 print accuracy
def main(not_parsed_args): if len(not_parsed_args) > 1: print("Unknown args:%s" % not_parsed_args) exit() dpt = os.path.join(DATA_DIR, FLAGS.data_file) testp = os.path.join(DATA_DIR, FLAGS.test_file) DL = MnistLoader(testp, FLAGS, dpt) md = ConvLSTMNetwork(FLAGS) config = tf.ConfigProto() config.gpu_options.allow_growth = False sess = tf.Session(config=config, graph=md.graph) print('Sess created.') trainer = Trainer(DL, md, sess, FLAGS) trainer.load_model() trainer.train() sess.close()
def train2(): global training_data2, n2, t2 layers = [] layers.append({ 'type': 'input', 'out_sx': 28, 'out_sy': 28, 'out_depth': 1 }) layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n2 = Net(layers) print 'Net made...' print n2 t2 = Trainer(n2, { 'method': 'adadelta', 'batch_size': 20, 'l2_decay': 0.001 }) print 'Trainer made...' print 'In training of smaller net...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data2: stats = t2.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] except: #hit control-c or other return
def train2(): global training_data2, n2, t2 layers = [] layers.append({'type': 'input', 'out_sx': 28, 'out_sy': 28, 'out_depth': 1}) layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n2 = Net(layers) print 'Net made...' print n2 t2 = Trainer(n2, {'method': 'adadelta', 'batch_size': 20, 'l2_decay': 0.001}); print 'Trainer made...' print 'In training of smaller net...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data2: stats = t2.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] except: #hit control-c or other return
def test_trainer(model, optimizer, scheduler, data_loader, criterion): args = DictConfig({'experiment': {'debug': False}}) App.init(args) output_dirpath = Path.cwd() # basic training run of model pre_training_model = copy.deepcopy(model) logger = Logger() trainer = Trainer(model=model, train_dataloader=data_loader, val_dataloader=data_loader, optimizer=optimizer, scheduler=scheduler, epochs=2, logger=logger, debug=True, criterion=criterion, output_path=output_dirpath) trainer.train() check_variable_change(pre_training_model, model) assert trainer.epochs_trained == 2 assert len(trainer.logger.metrics_dict) == 4 assert (0 <= trainer.logger.metrics_dict['accuracy'] <= 1) # check that can load from checkpoint pre_training_model = copy.deepcopy(model) trainer = Trainer(model=model, train_dataloader=data_loader, val_dataloader=data_loader, optimizer=optimizer, scheduler=scheduler, epochs=4, logger=logger, debug=True, criterion=criterion, checkpoint='last.pth', output_path=output_dirpath) trainer.train() os.remove('last.pth') os.remove('best.pth') check_variable_change(pre_training_model, model) assert trainer.epochs_trained == 4
def run_big_net(): global training_data, testing_data, n, t, training_data2 training_data = load_data() testing_data = load_data(False) training_data2 = [] print 'Data loaded...' layers = [] layers.append({ 'type': 'input', 'out_sx': 24, 'out_sy': 24, 'out_depth': 1 }) layers.append({ 'type': 'fc', 'num_neurons': 100, 'activation': 'relu', 'drop_prob': 0.5 }) #layers.append({'type': 'fc', 'num_neurons': 800, 'activation': 'relu', 'drop_prob': 0.5}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n = Net(layers) print 'Net made...' print n t = Trainer(n, {'method': 'sgd', 'momentum': 0.0}) print 'Trainer made...' print 'In training...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data: stats = t.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] training_data2.append((x, n.getPrediction())) except: #hit control-c or other pass print 'In testing: 5000 trials' right = 0 count = 5000 for x, y in sample(testing_data, count): n.forward(x) right += n.getPrediction() == y accuracy = float(right) / count * 100 print accuracy
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default='configs/config.json') parser.add_argument('--no-cuda', action='store_true') parser.add_argument('--parallel', action='store_true') args = parser.parse_args() args.cuda = torch.cuda.is_available() and not args.no_cuda print(args) device = torch.device('cuda' if args.cuda else 'cpu') config = load_json(args.config) model = MNISTNet() if args.parallel: model = nn.DataParallel(model) model.to(device) optimizer = optim.Adam(model.parameters(), **config['adam']) scheduler = optim.lr_scheduler.StepLR(optimizer, **config['steplr']) train_loader, valid_loader = mnist_loader(**config['dataset']) trainer = Trainer(model, optimizer, train_loader, valid_loader, device) output_dir = os.path.join(config['output_dir'], datetime.now().strftime('%Y%m%d_%H%M%S')) os.makedirs(output_dir, exist_ok=True) # save config to output dir save_json(config, os.path.join(output_dir, 'config.json')) for epoch in range(config['epochs']): scheduler.step() train_loss, train_acc = trainer.train() valid_loss, valid_acc = trainer.validate() print( 'epoch: {}/{},'.format(epoch + 1, config['epochs']), 'train loss: {:.4f}, train acc: {:.2f}%,'.format( train_loss, train_acc * 100), 'valid loss: {:.4f}, valid acc: {:.2f}%'.format( valid_loss, valid_acc * 100)) torch.save( model.state_dict(), os.path.join(output_dir, 'model_{:04d}.pt'.format(epoch + 1)))
def run(config, norm2d): train_loader, valid_loader = cifar10_loader(config.root, config.batch_size) model = CIFAR10Net(norm2d=norm2d) if config.cuda: model.cuda() optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=1e-4) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1) trainer = Trainer(model, optimizer, train_loader, valid_loader, use_cuda=config.cuda) valid_acc_list = [] for epoch in range(config.epochs): start = time() scheduler.step() train_loss, train_acc = trainer.train(epoch) valid_loss, valid_acc = trainer.validate() print( 'epoch: {}/{},'.format(epoch + 1, config.epochs), 'train loss: {:.4f}, train acc: {:.2f}%,'.format( train_loss, train_acc * 100), 'valid loss: {:.4f}, valid acc: {:.2f}%,'.format( valid_loss, valid_acc * 100), 'time: {:.2f}s'.format(time() - start)) save_dir = os.path.join(config.save_dir, norm2d) os.makedirs(save_dir, exist_ok=True) torch.save(model.state_dict(), os.path.join(save_dir, 'model_{:04d}.pt'.format(epoch + 1))) valid_acc_list.append(valid_acc) return valid_acc_list
def run_small_net(): global training_data2, n2, t2, testing_data layers = [] layers.append({ 'type': 'input', 'out_sx': 24, 'out_sy': 24, 'out_depth': 1 }) #layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'relu'}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n2 = Net(layers) print 'Smaller Net made...' print n2 t2 = Trainer(n2, {'method': 'sgd', 'momentum': 0.0}) print 'Trainer made for smaller net...' print 'In training of smaller net...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data2: stats = t2.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] except: #hit control-c or other pass print 'Testing smaller net: 5000 trials' right = 0 count = 5000 for x, y in sample(testing_data, count): n2.forward(x) right += n2.getPrediction() == y accuracy = float(right) / count * 100 print accuracy
noise_sigma=float( args['--noise_sigma'])) checkpoint_path = args['--checkpoint'] checkpoint = None if len(checkpoint_path) > 0: checkpoint = torch.load(checkpoint_path) generator.load_state_dict(checkpoint['generator_model_state_dict']) image_discriminator.load_state_dict( checkpoint['image_discriminator_model_state_dict']) video_discriminator.load_state_dict( checkpoint['video_discriminator_model_state_dict']) if torch.cuda.is_available(): generator.cuda() image_discriminator.cuda() video_discriminator.cuda() trainer = Trainer(image_loader, video_loader, int(args['--print_every']), int(args['--batches']), args['<log_folder>'], use_cuda=torch.cuda.is_available(), use_infogan=args['--use_infogan'], use_categories=args['--use_categories']) trainer.train(generator, image_discriminator, video_discriminator, checkpoint)
class Brain(object): def __init__(self, num_states, num_actions, opt={}): """ in number of time steps, of temporal memory the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x so to have no information from previous time step going into value function, set to 0. """ self.temporal_window = getopt(opt, 'temporal_window', 1) """size of experience replay memory""" self.experience_size = getopt(opt, 'experience_size', 30000) """number of examples in experience replay memory before we begin learning""" self.start_learn_threshold = getopt( opt, 'start_learn_threshold', int(min(self.experience_size * 0.1, 1000))) """gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]""" self.gamma = getopt(opt, 'gamma', 0.8) """number of steps we will learn for""" self.learning_steps_total = getopt(opt, 'learning_steps_total', 100000) """how many steps of the above to perform only random actions (in the beginning)?""" self.learning_steps_burnin = getopt(opt, 'learning_steps_burnin', 3000) """what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end""" self.epsilon_min = getopt(opt, 'epsilon_min', 0.05) """what epsilon to use at test time? (i.e. when learning is disabled)""" self.epsilon_test_time = getopt(opt, 'epsilon_test_time', 0.01) """ advanced feature. Sometimes a random action should be biased towards some values for example in flappy bird, we may want to choose to not flap more often """ if 'random_action_distribution' in opt: #this better sum to 1 by the way, and be of length this.num_actions self.random_action_distribution = opt['random_action_distribution'] if len(self.random_action_distribution) != num_actions: print 'TROUBLE. random_action_distribution should be same length as num_actions.' a = self.random_action_distribution s = sum(a) if abs(s - 1.0) > 0.0001: print 'TROUBLE. random_action_distribution should sum to 1!' else: self.random_action_distribution = [] """ states that go into neural net to predict optimal action look as x0,a0,x1,a1,x2,a2,...xt this variable controls the size of that temporal window. Actions are encoded as 1-of-k hot vectors """ self.net_inputs = num_states * self.temporal_window + num_actions * self.temporal_window + num_states self.num_states = num_states self.num_actions = num_actions self.window_size = max( self.temporal_window, 2) #must be at least 2, but if we want more context even more self.state_window = zeros(self.window_size) self.action_window = zeros(self.window_size) self.reward_window = zeros(self.window_size) self.net_window = zeros(self.window_size) #create [state -> value of all possible actions] modeling net for the value function layers = [] if 'layers' in opt: """ this is an advanced usage feature, because size of the input to the network, and number of actions must check out. """ layers = opt['layers'] if len(layers) < 2: print 'TROUBLE! must have at least 2 layers' if layers[0]['type'] != 'input': print 'TROUBLE! first layer must be input layer!' if layers[-1]['type'] != 'regression': print 'TROUBLE! last layer must be input regression!' if layers[0]['out_depth'] * layers[0]['out_sx'] * layers[0][ 'out_sy'] != self.net_inputs: print 'TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!' if layers[-1]['num_neurons'] != self.num_actions: print 'TROUBLE! Number of regression neurons should be num_actions!' else: #create a very simple neural net by default layers.append({ 'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': self.net_inputs }) if 'hidden_layer_sizes' in opt: #allow user to specify this via the option, for convenience for size in opt['hidden_layer_sizes']: layers.append({ 'type': 'fc', 'num_neurons': size, 'activation': 'relu' }) layers.append({ 'type': 'regression', 'num_neurons': self.num_actions }) #value function output self.value_net = Net(layers) #and finally we need a Temporal Difference Learning trainer! trainer_ops_default = { 'learning_rate': 0.01, 'momentum': 0.0, 'batch_size': 64, 'l2_decay': 0.01 } tdtrainer_options = getopt(opt, 'tdtrainer_options', trainer_ops_default) self.tdtrainer = Trainer(self.value_net, tdtrainer_options) #experience replay self.experience = [] #various housekeeping variables self.age = 0 #incremented every backward() self.forward_passes = 0 #incremented every forward() self.epsilon = 1.0 #controls exploration exploitation tradeoff. Should be annealed over time self.latest_reward = 0 self.last_input_array = [] self.average_reward_window = Window(1000, 10) self.average_loss_window = Window(1000, 10) self.learning = True def random_action(self): """ a bit of a helper function. It returns a random action we are abstracting this away because in future we may want to do more sophisticated things. For example some actions could be more or less likely at "rest"/default state. """ if len(random_action_distribution) == 0: return randi(0, self.num_actions) else: #okay, lets do some fancier sampling p = randf(0, 1.0) cumprob = 0.0 for k in xrange(self.num_actions): cumprob += self.random_action_distribution[k] if p < cumprob: return k def policy(self, s): """ compute the value of doing any action in this state and return the argmax action and its value """ V = Vol(s) action_values = self.value_net.forward(V) weights = action_values.w max_val = max(weights) max_k = weights.index(maxval) return {'action': max_k, 'value': max_val} def getNetInput(self, xt): """ return s = (x,a,x,a,x,a,xt) state vector It's a concatenation of last window_size (x,a) pairs and current state x """ w = [] w.extend(xt) #start with current state #and now go backwards and append states and actions from history temporal_window times n = self.window_size for k in xrange(self.temporal_window): index = n - 1 - k w.extend(self.state_window[index]) #state #action, encoded as 1-of-k indicator vector. We scale it up a bit because #we dont want weight regularization to undervalue this information, as it only exists once action1ofk = zeros(self.num_actions) action1ofk[index] = 1.0 * self.num_states w.extend(action1ofk) return w def forward(self, input_array): self.forward_passes += 1 self.last_input_array = input_array # create network input action = None if self.forward_passes > self.temporal_window: #we have enough to actually do something reasonable net_input = self.getNetInput(input_array) if self.learning: #compute epsilon for the epsilon-greedy policy self.epsilon = min( 1.0, max( self.epsilon_min, 1.0 - \ (self.age - self.learning_steps_burnin) / \ (self.learning_steps_total - self.learning_steps_burnin) ) ) else: self.epsilon = self.epsilon_test_time #use test-time value rf = randf(0, 1) if rf < self.epsilon: #choose a random action with epsilon probability action = self.random_action() else: #otherwise use our policy to make decision maxact = self.policy(net_input) action = maxact['action'] else: #pathological case that happens first few iterations #before we accumulate window_size inputs net_input = [] action = self.random_action() #remember the state and action we took for backward pass self.net_window.pop(0) self.net_window.append(net_input) self.state_window.pop(0) self.state_window.append(input_array) self.action_window.pop(0) self.action_window.append(action) def backward(self, reward): self.latest_reward = reward self.average_reward_window.add(reward) self.reward_window.pop(0) self.reward_window.append(reward) if not self.learning: return self.age += 1 #it is time t+1 and we have to store (s_t, a_t, r_t, s_{t+1}) as new experience #(given that an appropriate number of state measurements already exist, of course) if self.forward_passes > self.temporal_window + 1: n = self.window_size e = Experience(self.net_window[n - 2], self.action_window[n - 2], self.reward_window[n - 2], self.net_window[n - 1]) if len(self.experience) < self.experience_size: self.experience.append(e) else: ri = randi(0, self.experience_size) self.experience[ri] = e #learn based on experience, once we have some samples to go on #this is where the magic happens... if len(self.experience) > self.start_learn_threshold: avcost = 0.0 for k in xrange(self.tdtrainer.batch_size): re = randi(0, len(self.experience)) e = self.experience[re] x = Vol(1, 1, self.net_inputs) x.w = e.state0 maxact = self.policy(e.state1) r = e.reward0 + self.gamma * maxact.value ystruct = {'dim': e.action0, 'val': r} stats = self.tdtrainer.train(x, ystruct) avcost += stats['loss'] avcost /= self.tdtrainer.batch_size self.average_loss_window.add(avcost)
dim_z_motion, video_length) image_discriminator = build_discriminator(image_discriminator, n_channels=n_channels, use_noise=use_noise, noise_sigma=noise_sigma) video_discriminator = build_discriminator(video_discriminator, dim_categorical=dim_z_category, n_channels=n_channels, use_noise=use_noise, noise_sigma=noise_sigma) if torch.cuda.is_available(): generator.cuda() image_discriminator.cuda() video_discriminator.cuda() trainer = Trainer(image_loader, video_loader, image_loader, video_loader, print_every, batches, log_folder, use_cuda=torch.cuda.is_available(), use_infogan=use_infogan, use_categories=use_categories) trainer.train(generator, image_discriminator, video_discriminator)
def k_fold(): images, masks = load_train_data(TRAIN_IMAGES_PATH, TRAIN_MASKS_PATH) test_file_paths, test_images = load_test_data(TEST_IMAGES_PATH, load_images=True, to256=False) train_transformer = transforms.Compose([ CropAugmenter(), AffineAugmenter(), MasksAdder(), ToTensor(), Normalize(), ClassAdder() ]) eval_transformer = transforms.Compose( [MasksAdder(), ToTensor(), Normalize(), ClassAdder()]) predict_transformer = transforms.Compose( [ToTensor(predict=True), Normalize(predict=True)]) test_images_loader = build_data_loader(test_images, None, predict_transformer, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, predict=True) k_fold = KFold(n_splits=FOLDS, random_state=RANDOM_SEED, shuffle=True) test_masks_folds = [] config = AttrDict({ 'cuda_index': CUDA_ID, 'momentum': MOMENTUM, 'lr': LR, 'tune_lr': TUNE_LR, 'min_lr': MIN_LR, 'bce_epochs': BCE_EPOCHS, 'intermediate_epochs': INTERMEDIATE_EPOCHS, 'cycle_length': CYCLE_LENGTH, 'logs_dir': LOGS_DIR, 'masks_weight': MASKS_WEIGHT, 'class_weight': CLASS_WEIGHT, 'val_metric_criterion': 'comp_metric' }) for index, (train_index, valid_index) in list(enumerate(k_fold.split(images))): print('fold_{}\n'.format(index)) x_train_fold, x_valid = images[train_index], images[valid_index] y_train_fold, y_valid = masks[train_index], masks[valid_index] train_data_loader = build_data_loader(x_train_fold, y_train_fold, train_transformer, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, predict=False) val_data_loader = build_data_loader(x_valid, y_valid, eval_transformer, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, predict=False) test_data_loader = build_data_loader(x_valid, y_valid, eval_transformer, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, predict=False) data_loaders = AttrDict({ 'train': train_data_loader, 'val': val_data_loader, 'test': test_data_loader }) zers = np.zeros(BCE_EPOCHS) zers += 0.1 lovasz_ratios = np.linspace(0.1, 0.9, INTERMEDIATE_EPOCHS) lovasz_ratios = np.hstack((zers, lovasz_ratios)) bce_ratios = 1.0 - lovasz_ratios loss_weights = [ (bce_ratio, lovasz_ratio) for bce_ratio, lovasz_ratio in zip(bce_ratios, lovasz_ratios) ] loss = LossAggregator((nn.BCEWithLogitsLoss(), LovaszLoss()), weights=[0.9, 0.1]) metrics = { 'binary_accuracy': BinaryAccuracy, 'dice_coefficient': DiceCoefficient, 'comp_metric': CompMetric } segmentor = SawSeenNet(base_channels=64, pretrained=True, frozen=False).cuda(config.cuda_index) trainer = Trainer(config=config, model=segmentor, loss=loss, loss_weights=loss_weights, metrics=metrics, data_loaders=data_loaders) segmentor = trainer.train(num_epochs=NUM_EPOCHS, model_pattern=MODEL_FILE_PATH + '_{}_fold.pth'.format(index)) test_masks = predict(config, segmentor, test_images_loader, thresholding=False) test_masks = trim_masks(test_masks, height=IMG_SIZE_ORIGIN, width=IMG_SIZE_ORIGIN) test_masks_folds.append(test_masks) np.save(FOLDS_FILE_PATH.format(index), test_masks) result_masks = np.zeros_like(test_masks_folds[0]) for test_masks in test_masks_folds: result_masks += test_masks result_masks = result_masks.astype(dtype=np.float32) result_masks /= FOLDS result_masks = result_masks > THRESHOLD return test_file_paths, result_masks
audio_encoder = torch.nn.DataParallel(audio_encoder).cuda() # Save config. LOGGER.info('Saving configurations...') config_path = os.path.join(model_dir, 'config.json') with open(config_path, 'w') as f: json.dump(args, f) # Load data. LOGGER.info('Loading audio data...') # if torch.cuda.is_available(): # generator.cuda() # image_discriminator.cuda() # video_discriminator.cuda() #need other logger for image part trainer = Trainer(image_loader,image_loader_test, int(args['--print_every']), int(args['--batches']), args['<log_folder>'],LOGGER,LOGGER, use_cuda=torch.cuda.is_available(), use_infogan=args['--use_infogan'], use_categories=args['--use_categories']) trainer.train(ImageModel,netG,netD,audio_encoder,ImageGeneratorModel,ImageDiscriminatorModel, args)
perception_loss_weight = 1 if 'perception_loss' in config: if 'perception_model' in config['perception_loss']: perception_loss_model = build_model(config['perception_loss']['perception_model']['type'], config['perception_loss']['perception_model']['args'], device) else: perception_loss_model = discriminator perception_loss_weight = config['perception_loss']['weight'] trainer = Trainer( train_loader=train_loader, data_for_dataloader=d, # data for later dataloader creation, if needed opt_generator=opt_generator, opt_discriminator=opt_discriminator, adversarial_criterion=adversarial_criterion, reconstruction_criterion=reconstruction_criterion, reconstruction_weight=config['trainer']['reconstruction_weight'], adversarial_weight=config['trainer']['adversarial_weight'], log_interval=args.log_interval, perception_loss_model=perception_loss_model, perception_loss_weight=perception_loss_weight, use_image_loss=config['trainer']['use_image_loss'], device=device ) args_config = args.config.replace('\\', '/') args_config = args_config[args_config.rfind('/') + 1:] trainer.train(generator, discriminator, int(config['trainer']['epochs']), args.data_root, args_config, 0) print("Training finished", flush=True) sys.exit(0)
def main(): ap = argparse.ArgumentParser("SZO") ap.add_argument("--data", choices=["mnist", "cifar10"], default="mnist", help="dataset") #, "skewedmnist" ap.add_argument( "--opt", choices=["first", "flaxman", "dueling", "ghadimi", "agarwal"], help="optimizer type") ap.add_argument("--model", choices=["fc3", "cnn"], help="Model type") ap.add_argument("--depth", default=1, type=int, help="Depth of the cnn") ap.add_argument("--seed", default=12345, type=int, help="random seed") ap.add_argument("--num_epochs", default=5, type=int, help="number of epochs") ap.add_argument("--num_rounds", default=20, type=int, help="number of rounds") ap.add_argument("--lr", default=0.1, type=float, help="initial learning rate") ap.add_argument("--pr", default=0.2, type=float, help="pruning rate") ap.add_argument("--mu", default=0.1, type=float, help="exploration rate, smoothing parameter") ap.add_argument("--beta", default=0.0, type=float, help="momentum") ap.add_argument("--max_grad_norm", default=0.0, type=float, help="maximum gradient norm") ap.add_argument("--var", default=1.0, type=float, help="noise variance") ap.add_argument("--eval_interval", default=10000, type=int, help="evaluation interval") ap.add_argument("--batch_size", default=64, type=int, help="batch_size") ap.add_argument("--eval_batch_size", default=1000, type=int, help="batch size used in evaluation") ap.add_argument("--cv", default=True, action="store_true", help="whether to include control variates") # type=bool, ap.add_argument( "--init", choices=["reset", "random", "last"], #, 'rewind', 'best' help="initialization strategy in pruning: one of {reset, random, last}" ) #, rewind, best #ap.add_argument("--rewind_step", type=int, help="which epoch to return to after pruning") ap.add_argument( "--reward", choices=["nce", "acc", "expected_reward", "sampled_score"], help= "reward function: one of {nce, acc, expected_reward, sampled_score}") ap.add_argument("--prune_or_freeze", choices=["none", "prune", "freeze"], help="sparsification strategy: one of {prune or freeze}") ap.add_argument( "--masking_strategy", choices=["none", "L1", "heldout", "random"], help="masking strategy: one of {none, L1, heldout, random}") ap.add_argument( "--num_samples", type=int, help="number of samples to evaluate for gradient estimation") ap.add_argument("--device", choices=["cpu", "gpu"], default="cpu") ap.add_argument( '--affine', action="store_true", default=False, # type=bool, help="if specified, turn on affine transform in normalization layers") ap.add_argument('--norm', choices=["batch", "layer", "none"], default="batch", help="type of normalization to use between NN layeres") args = ap.parse_args() log_dir = f'runs-{args.seed}' if not os.path.exists(log_dir): os.mkdir(log_dir) #if not os.path.exists('logs/'+log_dir): # os.mkdir('logs/'+log_dir) # logging label = f'{args.opt}-{args.reward}-{args.prune_or_freeze}-{args.init}-{args.masking_strategy}-{args.batch_size}' logging.basicConfig( filename=os.path.join(log_dir, f'{label}-train.log'), filemode='a', format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) logger.addHandler(TqdmLoggingHandler()) logger.info('Arguments:') for arg in vars(args): logger.info(f'\t{arg}: {getattr(args, arg)}') # data if args.data == 'mnist': trainset, testset, classes = mnist(data_path='data/MNIST_data/') elif args.data == 'cifar10': trainset, testset, classes = cifar10(data_path='data/CIFAR10_data/') trainloader, testloader, devloader = get_dataloader( trainset, testset, batch_size=args.batch_size, eval_batch_size=args.eval_batch_size, seed=args.seed) # model model = None model_kwargs = { 'seed': args.seed, 'class_names': classes, 'output_dim': len(classes), 'norm_affine': args.affine, 'norm': args.norm } if args.model == 'cnn': assert args.data == 'cifar10' model_kwargs['modules'] = args.depth model_kwargs['input_size'] = 32 model = ConvolutionalNN(**model_kwargs) elif args.model == 'fc3': if args.data == 'mnist': model_kwargs['input_dim'] = 28 * 28 elif args.data == 'cifar10': model_kwargs['input_dim'] = 32 * 32 * 3 model = FullyConnectedNN(**model_kwargs) else: raise ValueError("Unknown model type") # gpu device = None if args.device == 'gpu' and torch.cuda.is_available(): device = 'cuda:0' torch.set_default_tensor_type(torch.cuda.FloatTensor) else: device = 'cpu' model.to(device) logger.info(f"Device: {device}") if torch.cuda.is_available(): logger.info(f"\tn_gpu: {torch.cuda.device_count()}") # optimizer kwargs = {'prune_or_freeze': args.prune_or_freeze, 'init': args.init} if args.lr: kwargs['lr'] = args.lr if args.mu: kwargs['mu'] = args.mu if args.beta: kwargs['beta'] = args.beta if args.max_grad_norm: kwargs['max_grad_norm'] = args.max_grad_norm if args.var: kwargs['var'] = args.var if args.num_samples: kwargs['num_samples'] = args.num_samples #if args.init == 'rewind': # print(args.rewind_step) opt = None if args.opt == 'first': if args.reward in ['sampled_score']: kwargs['cv'] = args.cv # control variates opt = FirstOrderBanditOptimizer(model.parameters(), **kwargs) elif args.reward in ['nce', 'expected_reward']: opt = FirstOrderOptimizer(model.parameters(), **kwargs) else: raise ValueError elif args.opt == 'flaxman': opt = VanillaEvolutionOptimizer(model.parameters(), **kwargs) elif args.opt == 'dueling': opt = DuelingEvolutionOptimizer(model.parameters(), **kwargs) elif args.opt == 'ghadimi': opt = OneSideEvolutionOptimizer(model.parameters(), **kwargs) elif args.opt == 'agarwal': opt = TwoSideEvolutionOptimizer(model.parameters(), **kwargs) else: raise ValueError("Unknown optimizer type") #scheduler = lr_scheduler.ReduceLROnPlateau(opt, mode='max', patience=3, threshold=1e-2) scheduler = None # constant learning rate # trainer pruning_rate = 0.0 if args.prune_or_freeze == 'none' or args.masking_strategy == 'none' else args.pr metrics = ['acc', 'f1-score', 'precision', 'recall'] trainer = Trainer(model, opt, scheduler, args.num_epochs, args.num_rounds, label, seed=args.seed, init=args.init, pruning_rate=pruning_rate, reward=args.reward, metrics=metrics, log_dir=log_dir, eval_interval=args.eval_interval, masking_strategy=args.masking_strategy, device=device) trainer.train(trainloader, testloader, devloader) #del model #del opt #del scheduler #del trainer logging.shutdown()