def configure_optimizers(self): "Prepare optimizer and schedule (linear warmup and decay)" model = self.model no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], "weight_decay": self.hparams.weight_decay, }, { "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] # Original optimizer from Transformers. It works but needs warmup. # optimizer = transformers.AdamW(optimizer_grouped_parameters, # lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon) # The RAdam optimizer works approximately as well as Ranger. #optimizer = RAdam(optimizer_grouped_parameters, # lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon) # The Ranger optimizer is the combination of RAdam and Lookahead. It # works well for this task. The best conditions seem to be learning # rate 1e-4 w/ RAdam or Ranger, gradient accumulation of 2 batches. optimizer = ranger.Ranger(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon) # The constant scheduler does nothing. Replace with another # scheduler if required. scheduler = transformers.get_constant_schedule(optimizer) scheduler = { 'scheduler': scheduler, 'interval': 'step', 'frequency': 1 } return [optimizer], [scheduler]
def configure_optimizers(self): # Train with a lower LR on the output layer LR = 1e-3 train_params = [ { 'params': self.get_layers( lambda x: self.output != x and self.input != x), 'lr': LR }, { 'params': self.get_layers(lambda x: self.input == x), 'lr': LR, 'gc_dim': 0 }, { 'params': self.get_layers(lambda x: self.output == x), 'lr': LR / 10 }, ] # increasing the eps leads to less saturated nets with a few dead neurons optimizer = ranger.Ranger(train_params, betas=(.9, 0.999), eps=1.0e-7) # Drop learning rate after 75 epochs scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.3) return [optimizer], [scheduler]
def configure_optimizers(self): # Train with a lower LR on the output layer LR = 1.5e-3 train_params = [ { 'params': get_parameters([self.input]), 'lr': LR, 'gc_dim': 0 }, { 'params': [self.layer_stacks.l1_fact.weight], 'lr': LR }, { 'params': [self.layer_stacks.l1.weight], 'lr': LR }, { 'params': [self.layer_stacks.l1.bias], 'lr': LR }, { 'params': [self.layer_stacks.l2.weight], 'lr': LR }, { 'params': [self.layer_stacks.l2.bias], 'lr': LR }, { 'params': [self.layer_stacks.output.weight], 'lr': LR / 10 }, { 'params': [self.layer_stacks.output.bias], 'lr': LR / 10 }, ] # increasing the eps leads to less saturated nets with a few dead neurons optimizer = ranger.Ranger(train_params, betas=(.9, 0.999), eps=1.0e-7, gc_loc=False, use_gc=False) # Drop learning rate after 75 epochs scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.987) return [optimizer], [scheduler]
def configure_optimizers(self): # Train with a lower LR on the output layer LR = 1.5e-3 train_params = [ {'params' : get_parameters([self.input]), 'lr' : LR, 'gc_dim' : 0 }, {'params' : [self.layer_stacks.l1_fact.weight], 'lr' : LR }, {'params' : [self.layer_stacks.l1.weight], 'lr' : LR }, {'params' : [self.layer_stacks.l1.bias], 'lr' : LR }, {'params' : [self.layer_stacks.l2.weight], 'lr' : LR }, {'params' : [self.layer_stacks.l2.bias], 'lr' : LR }, {'params' : [self.layer_stacks.output.weight], 'lr' : LR / 10 }, {'params' : [self.layer_stacks.output.bias], 'lr' : LR / 10 }, ] # increasing the eps leads to less saturated nets with a few dead neurons optimizer = ranger.Ranger(train_params, betas=(.9, 0.999), eps=1.0e-7, gc_loc=False, use_gc=False) scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=100, verbose=True) return [optimizer], [scheduler]
self.model = models.resnet101(pretrained=False) self.model.fc = nn.Linear(2048, 10) def forward(self, x): output = self.model(x) return output # In[8]: import ranger net = Net() net.to(device) criterion = nn.CrossEntropyLoss() optimizer = ranger.Ranger(net.parameters(), lr=LR, eps=1e-6) scheduler = lr_scheduler.CosineAnnealingLR(optimizer, EPOCH + 20) loss_history = [] acc_history = [] tacc_history = [] tloss_history = [] lr_list = [] best_acc = 0 best_epoch = 0 # In[ ]: start_time = time.time() for epoch in range(EPOCH): epoch_time = time.time()
def main(args): # Select which device to use if torch.cuda.is_available(): main_device = 'cuda:0' else: main_device = 'cpu' # Create directories to store data and logs in output_path = prepare_output_directory() log_path = prepare_log_directory() # Print configuration info print(f'Device: {main_device}') print(f'Training set: {args.train}') print(f'Validation set: {args.val}') print(f'Batch size: {args.batch_size}') print(f'Using factorizer: {args.use_factorizer}') print(f'Lambda: {args.lambda_}') print(f'Validation check interval: {args.val_check_interval}') print(f'Logs written to: {log_path}') print(f'Data written to: {output_path}') print('') # Create log writer writer = SummaryWriter(log_path) # Create data loaders train_data_loader, val_data_loader = create_data_loaders( args.train, args.val, args.train_size, args.val_size, args.batch_size, args.use_factorizer, main_device) # Create model nnue = M.NNUE(args.use_factorizer, feature_set=halfkp.Features()).to(main_device) # Configure optimizer optimizer = ranger.Ranger(nnue.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=1, verbose=True, min_lr=1e-6) # Main training loop num_batches = len(train_data_loader) epoch = 0 running_train_loss = 0.0 while True: best_val_loss = 1000000.0 for k, sample in enumerate(train_data_loader): train_loss = train_step(nnue, sample, optimizer, args.lambda_, epoch, k, num_batches) running_train_loss += train_loss.item() if k % args.val_check_interval == (args.val_check_interval - 1): val_loss = calculate_validation_loss(nnue, val_data_loader, args.lambda_) new_best = False if (val_loss < best_val_loss): new_best = True best_val_loss = val_loss save_model(nnue, output_path, epoch, k, val_loss, new_best, False) writer.add_scalar('training loss', running_train_loss / args.val_check_interval, epoch * num_batches + k) writer.add_scalar('validation loss', val_loss, epoch * num_batches + k) running_train_loss = 0.0 val_loss = calculate_validation_loss(nnue, val_data_loader, args.lambda_) new_best = False if (val_loss < best_val_loss): new_best = True best_val_loss = val_loss save_model(nnue, output_path, epoch, num_batches - 1, val_loss, new_best, True) print('') scheduler.step(val_loss) epoch += 1
def configure_optimizers(self): optimizer = ranger.Ranger(self.parameters()) return optimizer
def main(): parser = argparse.ArgumentParser(description="Trains the network.") parser.add_argument("train", help="Training data (.bin or .binpack)") parser.add_argument("val", help="Validation data (.bin or .binpack)") parser.add_argument("--tune", action="store_true", help="automated LR search") parser.add_argument( "--save", action="store_true", help="save after every training epoch (default = False)") parser.add_argument("--experiment", default="1", type=str, help="specify the experiment id") parser.add_argument("--py-data", action="store_true", help="Use python data loader (default=False)") parser.add_argument( "--lambda", default=1.0, type=float, dest='lambda_', help= "lambda=1.0 = train on evaluations, lambda=0.0 = train on game results, interpolates between (default=1.0)." ) parser.add_argument( "--num-workers", default=1, type=int, dest='num_workers', help= "Number of worker threads to use for data loading. Currently only works well for binpack." ) parser.add_argument( "--batch-size", default=-1, type=int, dest='batch_size', help= "Number of positions per batch / per iteration. Default on GPU = 8192 on CPU = 128." ) parser.add_argument( "--threads", default=-1, type=int, dest='threads', help="Number of torch threads to use. Default automatic (cores) .") parser.add_argument("--seed", default=42, type=int, dest='seed', help="torch seed to use.") parser.add_argument( "--smart-fen-skipping", action='store_true', dest='smart_fen_skipping', help= "If enabled positions that are bad training targets will be skipped during loading. Default: False" ) parser.add_argument( "--random-fen-skipping", default=0, type=int, dest='random_fen_skipping', help= "skip fens randomly on average random_fen_skipping before using one.") parser.add_argument( "--resume-from-model", dest='resume_from_model', help="Initializes training using the weights from the given .pt model") features.add_argparse_args(parser) args = parser.parse_args() print("Training with {} validating with {}".format(args.train, args.val)) torch.manual_seed(123) torch.cuda.manual_seed(123) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True batch_size = args.batch_size if batch_size <= 0: batch_size = 128 if args.gpus == 0 else 8192 print('Using batch size {}'.format(batch_size)) print('Smart fen skipping: {}'.format(args.smart_fen_skipping)) print('Random fen skipping: {}'.format(args.random_fen_skipping)) if args.threads > 0: print('limiting torch to {} threads.'.format(args.threads)) t_set_num_threads(args.threads) feature_set = features.get_feature_set_from_name(args.features) if args.py_data: print('Using python data loader') train_data, val_data = data_loader_py(args.train, args.val, batch_size, feature_set, 'cuda:0') else: print('Using c++ data loader') train_data, val_data = data_loader_cc( args.train, args.val, feature_set, args.num_workers, batch_size, args.smart_fen_skipping, args.random_fen_skipping, 'cuda:0') print("Feature set: {}".format(feature_set.name)) print("Num real features: {}".format(feature_set.num_real_features)) print("Num virtual features: {}".format(feature_set.num_virtual_features)) print("Num features: {}".format(feature_set.num_features)) START_EPOCH = 0 NUM_EPOCHS = 150 SWA_START = int(0.75 * NUM_EPOCHS) LEARNING_RATE = 5e-4 DECAY = 0 EPS = 1e-7 best_loss = 1000 is_best = False early_stopping_delay = 30 early_stopping_count = 0 early_stopping_flag = False summary_location = 'logs/nnue_experiment_' + args.experiment save_location = '/home/esigelec/PycharmProjects/nnue-pytorch/save_models/' + args.experiment writer = SummaryWriter(summary_location) nnue = M.NNUE(feature_set=feature_set, lambda_=args.lambda_, s=1) train_params = [{ 'params': nnue.get_1xlr(), 'lr': LEARNING_RATE }, { 'params': nnue.get_10xlr(), 'lr': LEARNING_RATE * 10.0 }] optimizer = ranger.Ranger(train_params, lr=LEARNING_RATE, eps=EPS, betas=(0.9, 0.999), weight_decay=DECAY) if args.resume_from_model is not None: nnue, optimizer, START_EPOCH = load_ckp(args.resume_from_model, nnue, optimizer) nnue.set_feature_set(feature_set) for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=7, cooldown=1, min_lr=1e-7, verbose=True) swa_scheduler = SWALR(optimizer, annealing_epochs=5, swa_lr=[5e-5, 1e-4]) nnue = nnue.cuda() swa_nnue = AveragedModel(nnue) for epoch in range(START_EPOCH, NUM_EPOCHS): nnue.train() train_interval = 100 loss_f_sum_interval = 0.0 loss_f_sum_epoch = 0.0 loss_v_sum_epoch = 0.0 if early_stopping_flag: print("early end of training at epoch" + str(epoch)) break for batch_idx, batch in enumerate(train_data): batch = [_data.cuda() for _data in batch] us, them, white, black, outcome, score = batch optimizer.zero_grad() output = nnue(us, them, white, black) loss = nnue_loss(output, outcome, score, args.lambda_) loss.backward() torch.nn.utils.clip_grad_norm_(nnue.parameters(), 0.5) optimizer.step() loss_f_sum_interval += loss.float() loss_f_sum_epoch += loss.float() if batch_idx % train_interval == train_interval - 1: writer.add_scalar('train_loss', loss_f_sum_interval / train_interval, epoch * len(train_data) + batch_idx) loss_f_sum_interval = 0.0 print("Epoch #{}\t Train_Loss: {:.8f}\t".format( epoch, loss_f_sum_epoch / len(train_data))) if epoch % 1 == 0 or (epoch + 1) == NUM_EPOCHS: with torch.no_grad(): nnue.eval() for batch_idx, batch in enumerate(val_data): batch = [_data.cuda() for _data in batch] us, them, white, black, outcome, score = batch _output = nnue(us, them, white, black) loss_v = nnue_loss(_output, outcome, score, args.lambda_) loss_v_sum_epoch += loss_v.float() if epoch > SWA_START: print("swa_mode") swa_nnue.update_parameters(nnue) swa_scheduler.step() checkpoint = { 'epoch': epoch + 1, 'state_dict': swa_nnue.state_dict(), 'optimizer': optimizer.state_dict() } save_ckp(checkpoint, save_location, 'swa_nnue.pt') else: scheduler.step(loss_v_sum_epoch / len(val_data)) if loss_v_sum_epoch / len(val_data) <= best_loss: best_loss = loss_v_sum_epoch / len(val_data) is_best = True early_stopping_count = 0 else: early_stopping_count += 1 if early_stopping_delay == early_stopping_count: early_stopping_flag = True if is_best: checkpoint = { 'epoch': epoch + 1, 'state_dict': nnue.state_dict(), 'optimizer': optimizer.state_dict() } save_ckp(checkpoint, save_location) is_best = False writer.add_scalar('val_loss', loss_v_sum_epoch / len(val_data), epoch * len(train_data) + batch_idx) print("Epoch #{}\tVal_Loss: {:.8f}\t".format( epoch, loss_v_sum_epoch / len(val_data))) loss_v_sum_epoch = 0.0 with torch.no_grad(): swa_nnue.eval() for batch_idx, batch in enumerate(val_data): batch = [_data.cuda() for _data in batch] us, them, white, black, outcome, score = batch _output = swa_nnue(us, them, white, black) loss_v = nnue_loss(_output, outcome, score, args.lambda_) loss_v_sum_epoch += loss_v.float() print("Val_Loss: {:.8f}\t".format(loss_v_sum_epoch / len(val_data))) writer.close()
def handler(self,pickedClass): if pickedClass == 'barbarian': self.tab_window.main_window.classes=barbarian.Barbarian(['Nature','Perception'],['Great-Axe','Two Handaxe','Explorer Pack with 4 Javelins']) self.label.setText("Barbarian\n{}".format(self.tab_window.main_window.classes.__str__())) self.classSelected = True elif pickedClass == 'bard': self.tab_window.main_window.classes=bard.Bard(['Persuasion', 'Stealth','Nature'],['Rapier','Diplomat Pack'],['Flute'],['Dancing Lights','Viscous Mockery'],['Charm Person','Detect Magic','Healing Word','Thunderwave']) self.label.setText("Bard\n{}".format(self.tab_window.main_window.classes.__str__())) elif pickedClass == 'cleric': self.tab_window.main_window.classes=cleric.Cleric(['Persuasion','Religion'],['Mace','Scale Mail','Light Crossbow','Priest Pack','Shield'],['Guidance','Light','Mending'],['Command','Identify'],'Light Domain') self.label.setText("Cleric\n{}".format(self.tab_window.main_window.classes.__str__())) elif pickedClass == 'druid': self.tab_window.main_window.classes=druid.Druid(["Animal Handling","Survival"],['Wooden Shield','Scimitar','Leather Armor','Explorers Pack','Druidic Focus'],['Posion Spray','Frostbite'],["Calm Animal","Charm Animal"]) self.label.setText("Druid\n{}".format(self.tab_window.main_window.classes.__str__())) elif pickedClass == 'fighter': self.tab_window.main_window.classes=fighter.Fighter(['Intimidation','Athletics'],['Chain Mail','Martial Weapon and Shield','Light Crossbow and 20 bolts','Dungeoneers pack'],'Dueling') self.label.setText("Fighter\n{}".format(self.tab_window.main_window.classes.__str__())) elif pickedClass == 'monk': self.tab_window.main_window.classes=monk.Monk(['Acrobatics','Stealth'],['Shortsword','Dungeoneer pack','10 Darts'],'Guitar') self.label.setText("Monk\n{}".format(self.tab_window.main_window.classes.__str__())) elif pickedClass == 'paladin': self.tab_window.main_window.classes=paladin.Paladin(['Medicine','Religion'],['Martial weapon and Shield','5 Javelins','Priest Pack','Chain Mail','Holy Symbol']) self.label.setText("Paladin\n{}".format(self.tab_window.main_window.classes.__str__())) elif pickedClass == 'ranger': self.tab_window.main_window.classes=ranger.Ranger(['Animal Handling','Nature'],['Scale Mail','Two shortswords','Dungeoneer pack','Longbow 20 arrows']) self.label.setText("Ranger\n{}".format(self.tab_window.main_window.classes.__str__())) elif pickedClass == 'rogue': self.tab_window.main_window.classes=rogue.Rogue(['Stealth','Sleight of Hand','Investigation','Athletics'],['Rapier','Shortbow with 20 arrows','Burglars pack','Leather Armor','Two daggers',"Thieve's tools"],'Thieve\'s Tools','Stealth') self.label.setText("Rogue\n{}".format(self.tab_window.main_window.classes.__str__())) elif pickedClass == 'sorcerer': self.tab_window.main_window.classes=sorcerer.Sorcerer(['Arcana','Insight'],['Light crossbow 20 bolts','Component pouch','Dungeoneers pack','Two Daggers'],['Light','Prestidigitation','Ray of frost','Shocking grasp'],['Shield','Magic missile'],'Bloodline') self.label.setText("Sorceror\n{}".format(self.tab_window.main_window.classes.__str__())) elif pickedClass == 'warlock': self.tab_window.main_window.classes=warlock.Warlock(["Arcana","Religion"],['Light crossbow 20 bolts','Component pouch','Scholars pack','Leather Armor','Shortsword','Two daggers'],['Eldrich blast','Chill touch'],['Ray of sickness','Witch bolt'],'The Fiend') self.label.setText("Warlock\n{}".format(self.tab_window.main_window.classes.__str__())) elif pickedClass == 'wizard': self.tab_window.main_window.classes=wizard.Wizard(['History','Insight'],['Quarterstaff','Component pouch','Scholar pack','Spellbook'],['Mage hand','Light','Ray of Frost'],['Burning hands','Charm person','Feather fall','Mage armor','Missile','Sleep']) self.label.setText("Wizard\n{}".format(self.tab_window.main_window.classes.__str__())) self.classSelected = True