def _get_val_loader(train_config): """ Returns the validation loader and x-Data object. """ _, x_val, y_val_value, y_val_policy, plys_to_end, _ = load_pgn_dataset(dataset_type="val", part_id=0, normalize=train_config.normalize, verbose=False, q_value_ratio=train_config.q_value_ratio) y_val_policy = prepare_policy(y_val_policy, train_config.select_policy_from_plane, train_config.sparse_policy_label, train_config.is_policy_from_plane_data) if train_config.framework == 'gluon': if train_config.use_wdl and train_config.use_plys_to_end: val_dataset = gluon.data.ArrayDataset(nd.array(x_val), nd.array(y_val_value), nd.array(y_val_policy), nd.array(value_to_wdl_label(y_val_value)), nd.array(prepare_plys_label(plys_to_end))) else: val_dataset = gluon.data.ArrayDataset(nd.array(x_val), nd.array(y_val_value), nd.array(y_val_policy)) val_data = gluon.data.DataLoader(val_dataset, train_config.batch_size, shuffle=False, num_workers=train_config.cpu_count) elif train_config.framework == 'pytorch': if train_config.use_wdl and train_config.use_wdl: val_dataset = TensorDataset(torch.Tensor(x_val), torch.Tensor(y_val_value), torch.Tensor(y_val_policy), torch.Tensor(value_to_wdl_label(y_val_value)), torch.Tensor(prepare_plys_label(plys_to_end))) else: val_dataset = TensorDataset(torch.Tensor(x_val), torch.Tensor(y_val_value), torch.Tensor(y_val_policy)) val_data = DataLoader(val_dataset, shuffle=True, batch_size=train_config.batch_size, num_workers=train_config.cpu_count) return val_data, x_val
def _get_train_loader(self, part_id): # load one chunk of the dataset from memory _, self.x_train, self.yv_train, self.yp_train, self.plys_to_end, _ = load_pgn_dataset( dataset_type="train", part_id=part_id, normalize=self.tc.normalize, verbose=False, q_value_ratio=self.tc.q_value_ratio) self.yp_train = prepare_policy( y_policy=self.yp_train, select_policy_from_plane=self.tc.select_policy_from_plane, sparse_policy_label=self.tc.sparse_policy_label, is_policy_from_plane_data=self.tc.is_policy_from_plane_data) # update the train_data object if self.tc.use_wdl and self.tc.use_plys_to_end: train_dataset = TensorDataset( torch.Tensor(self.x_train), torch.Tensor(self.yv_train), torch.Tensor(self.yp_train), torch.Tensor(value_to_wdl_label(self.yv_train)), torch.Tensor(prepare_plys_label(self.plys_to_end))) else: train_dataset = TensorDataset(torch.Tensor(self.x_train), torch.Tensor(self.yv_train), torch.Tensor(self.yp_train)) train_loader = DataLoader(train_dataset, shuffle=True, batch_size=self.tc.batch_size, num_workers=self.tc.cpu_count) return train_loader
def convert_all_planes_to_rec(self): """ Converts all part files from the via load_pgn_dataset() to a single .rec file :return: """ # we must add '**/*' because we want to go into the time stamp directory plane_files = glob(self._import_dir + "**/*") # construct the export filepaths idx_filepath = "%s%s" % (self._export_dir, self._dataset_type + ".idx") rec_filepath = "%s%s" % (self._export_dir, self._dataset_type + ".rec") # create both an '.idx' and '.rec' file # the '.idx' file stores the indices to the string buffers # the '.rec' files stores the planes in a compressed binary string buffer format record = mx.recordio.MXIndexedRecordIO(idx_filepath, rec_filepath, "w") nb_parts = len(plane_files) idx = 0 for part_id in range(nb_parts): t_s = time() logging.info("PART: %d", part_id) # load one chunk of the dataset from memory s_ids_train, x, yv, yp, pgn_datasets = load_pgn_dataset( dataset_type=self._dataset_type, part_id=part_id, print_statistics=True, print_parameters=False, normalize=False, ) # iterate over all board states aka. data samples in the file for position, value in enumerate(x): data = value.flatten() buf = zlib.compress(data.tobytes()) # we only store the integer idx of the highest output header = mx.recordio.IRHeader( 0, [yv[position], yp[position].argmax()], idx, 0) s = mx.recordio.pack(header, buf) record.write_idx(idx, s) idx += 1 # log the elapsed time for a single dataset part file logging.debug("elapsed time %.2fs", (time() - t_s)) # close the record file record.close() logging.debug("created %s sucessfully", idx_filepath) logging.debug("created %s sucessfully", rec_filepath)
def test_loaded_dataset_black_move(self): """ Loads the dataset file and checks the first move policy vector for black for correctness :return: """ _, _, _, yp_val, _ = load_pgn_dataset(dataset_type="test", part_id=0, print_statistics=True, print_parameters=True, normalize=True) board = chess.variant.CrazyhouseBoard() # push a dummy move board.push_uci("e2e4") mv_conv0 = policy_to_move(yp_val[1], is_white_to_move=False) mv_conv1, prob = policy_to_best_move(board, yp_val[1]) self.assertEqual(prob, 1, msg="The policy vector has to be one hot encoded.") selected_moves, move_probabilities = policy_to_moves(board, yp_val[1]) mv_conv2 = selected_moves[0] self.assertGreater(move_probabilities[0], 0, msg="The move probability must be greater 0") self.assertEqual(move_probabilities[0], 1, msg="The policy vector has to be one hot encoded.") converted_moves = [mv_conv0, mv_conv1, mv_conv2] for mv_converted in converted_moves: mv_converted_is_legal = False # check if the move is legal in the starting position for move in board.legal_moves: if move == mv_converted: mv_converted_is_legal = True self.assertTrue( mv_converted_is_legal, msg= "Convert move %s is not a legal move in the starting position for BLACK" % mv_converted.uci(), )
def custom_metric_eval(self): """ Evaluates the model based on the validation set of different variants """ if self.to.variant_metrics is None: return for part_id, variant_name in enumerate(self.to.variant_metrics): # load one chunk of the dataset from memory _, x_val, yv_val, yp_val, _, _ = load_pgn_dataset( dataset_type="val", part_id=part_id, normalize=self.tc.normalize, verbose=False, q_value_ratio=self.tc.q_value_ratio) if self.tc.select_policy_from_plane: val_iter = mx.io.NDArrayIter({'data': x_val}, { 'value_label': yv_val, 'policy_label': np.array(FLAT_PLANE_IDX)[yp_val.argmax(axis=1)] }, self.tc.batch_size) else: val_iter = mx.io.NDArrayIter( {'data': x_val}, { 'value_label': yv_val, 'policy_label': yp_val.argmax(axis=1) }, self.tc.batch_size) results = self._model.score(val_iter, self.to.metrics) prefix = "val_" for entry in results: name = variant_name + "_" + entry[0] value = entry[1] print(" - %s%s: %.4f" % (prefix, name, value), end="") # add the metrics to the tensorboard event file if self.tc.log_metrics_to_tensorboard: self.sum_writer.add_scalar( name, [prefix.replace("_", ""), value], self.k_steps) print()
def update_network(queue, nn_update_idx, k_steps_initial, max_lr, symbol_filename, params_filename, cwd, convert_to_onnx): """ Creates a new NN checkpoint in the model contender directory after training using the game files stored in the training directory :param queue: Queue object used to return items :param k_steps_initial: Initial amount of steps of the NN update :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented after every update. :param max_lr: Maximum learning rate used for the learning rate schedule :param symbol_filename: Architecture definition file :param params_filename: Weight file which will be loaded before training Updates the neural network with the newly acquired games from the replay memory :param cwd: Current working directory (must end with "/") :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference :return: k_steps_final """ # set the context on CPU, switch to GPU if there is one available (strongly recommended for training) ctx = mx.gpu(train_config["device_id"] ) if train_config["context"] == "gpu" else mx.cpu() # set a specific seed value for reproducibility nb_parts = len(glob.glob(main_config["planes_train_dir"] + '**/*.zip')) logging.info("number parts: %d" % nb_parts) if nb_parts <= 0: raise Exception( 'No .zip files for training available. Check the path in main_config["planes_train_dir"]:' ' %s' % main_config["planes_train_dir"]) _, x_val, y_val_value, y_val_policy, _, _ = load_pgn_dataset( dataset_type="val", part_id=0, normalize=train_config["normalize"], verbose=False, q_value_ratio=train_config["q_value_ratio"]) y_val_policy = prepare_policy(y_val_policy, train_config["select_policy_from_plane"], train_config["sparse_policy_label"]) symbol = mx.sym.load(symbol_filename) if not train_config["sparse_policy_label"]: symbol = add_non_sparse_cross_entropy( symbol, train_config["val_loss_factor"], train_config["value_output"] + "_output", train_config["policy_output"] + "_output") # calculate how many iterations per epoch exist nb_it_per_epoch = (len(x_val) * nb_parts) // train_config["batch_size"] # one iteration is defined by passing 1 batch and doing backprop total_it = int(nb_it_per_epoch * train_config["nb_epochs"]) lr_schedule = CosineAnnealingSchedule(train_config["min_lr"], max_lr, max(total_it * .7, 1)) lr_schedule = LinearWarmUp(lr_schedule, start_lr=train_config["min_lr"], length=max(total_it * .25, 1)) momentum_schedule = MomentumSchedule(lr_schedule, train_config["min_lr"], max_lr, train_config["min_momentum"], train_config["max_momentum"]) if train_config["select_policy_from_plane"]: val_iter = mx.io.NDArrayIter({'data': x_val}, { 'value_label': y_val_value, 'policy_label': y_val_policy }, train_config["batch_size"]) else: val_iter = mx.io.NDArrayIter({'data': x_val}, { 'value_label': y_val_value, 'policy_label': y_val_policy }, train_config["batch_size"]) # calculate how many iterations per epoch exist nb_it_per_epoch = (len(x_val) * nb_parts) // train_config["batch_size"] # one iteration is defined by passing 1 batch and doing backprop total_it = int(nb_it_per_epoch * train_config["nb_epochs"]) input_shape = x_val[0].shape model = mx.mod.Module(symbol=symbol, context=ctx, label_names=['value_label', 'policy_label']) # mx.viz.print_summary( # symbol, # shape={'data': (1, input_shape[0], input_shape[1], input_shape[2])}, # ) model.bind(for_training=True, data_shapes=[('data', (train_config["batch_size"], input_shape[0], input_shape[1], input_shape[2]))], label_shapes=val_iter.provide_label) model.load_params(params_filename) metrics = [ mx.metric.MSE(name='value_loss', output_names=['value_output'], label_names=['value_label']), mx.metric.create(acc_sign, name='value_acc_sign', output_names=['value_output'], label_names=['value_label']), ] if train_config["sparse_policy_label"]: print("train with sparse labels") # the default cross entropy only supports sparse labels metrics.append( mx.metric.Accuracy(axis=1, name='policy_acc', output_names=['policy_output'], label_names=['policy_label'])) metrics.append( mx.metric.CrossEntropy(name='policy_loss', output_names=['policy_output'], label_names=['policy_label'])) else: metrics.append( mx.metric.create(acc_distribution, name='policy_acc', output_names=['policy_output'], label_names=['policy_label'])) metrics.append( mx.metric.create(cross_entropy, name='policy_loss', output_names=['policy_output'], label_names=['policy_label'])) logging.info("Performance pre training") logging.info(model.score(val_iter, metrics)) train_agent = TrainerAgentMXNET( model, symbol, val_iter, nb_parts, lr_schedule, momentum_schedule, total_it, train_config["optimizer_name"], wd=train_config["wd"], batch_steps=train_config["batch_steps"], k_steps_initial=k_steps_initial, cpu_count=train_config["cpu_count"], batch_size=train_config["batch_size"], normalize=train_config["normalize"], export_weights=train_config["export_weights"], export_grad_histograms=train_config["export_grad_histograms"], log_metrics_to_tensorboard=train_config["log_metrics_to_tensorboard"], ctx=ctx, metrics=metrics, use_spike_recovery=train_config["use_spike_recovery"], max_spikes=train_config["max_spikes"], spike_thresh=train_config["spike_thresh"], seed=None, val_loss_factor=train_config["val_loss_factor"], policy_loss_factor=train_config["policy_loss_factor"], select_policy_from_plane=train_config["select_policy_from_plane"], discount=train_config["discount"], sparse_policy_label=train_config["sparse_policy_label"], q_value_ratio=train_config["q_value_ratio"], cwd=cwd) # iteration counter used for the momentum and learning rate schedule cur_it = train_config["k_steps_initial"] * train_config["batch_steps"] (k_steps_final, val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final, val_policy_acc_final), _ = train_agent.train(cur_it) if not train_config["sparse_policy_label"]: symbol = remove_no_sparse_cross_entropy( symbol, train_config["val_loss_factor"], train_config["value_output"] + "_output", train_config["policy_output"] + "_output") prefix = cwd + "model_contender/model-%.5f-%.5f-%.3f-%.3f" % ( val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final, val_policy_acc_final) sym_file = prefix + "-symbol.json" params_file = prefix + "-" + "%04d.params" % nn_update_idx symbol.save(sym_file) model.save_params(params_file) if convert_to_onnx: convert_mxnet_model_to_onnx(sym_file, params_file, ["value_out_output", "policy_out_output"], input_shape, [1, 8, 16], False) logging.info("k_steps_final %d" % k_steps_final) queue.put(k_steps_final)
def run_training(alpha, queue): _, x_val, yv_val, yp_val, plys_to_end, _ = load_pgn_dataset( dataset_type='val', part_id=0, verbose=True, normalize=tc.normalize) if tc.discount != 1: yv_val *= tc.discount**plys_to_end if tc.select_policy_from_plane: val_iter = mx.io.NDArrayIter( {'data': x_val}, { 'value_label': yv_val, 'policy_label': np.array(FLAT_PLANE_IDX)[yp_val.argmax(axis=1)] }, tc.batch_size) else: val_iter = mx.io.NDArrayIter({'data': x_val}, { 'value_label': yv_val, 'policy_label': yp_val.argmax(axis=1) }, tc.batch_size) tc.nb_parts = len(glob.glob(main_config['planes_train_dir'] + '**/*')) nb_it_per_epoch = ( len(x_val) * tc.nb_parts ) // tc.batch_size # calculate how many iterations per epoch exist # one iteration is defined by passing 1 batch and doing backprop tc.total_it = int(nb_it_per_epoch * tc.nb_training_epochs) ### Define a Learning Rate schedule to.lr_schedule = OneCycleSchedule(start_lr=tc.max_lr / 8, max_lr=tc.max_lr, cycle_length=tc.total_it * .3, cooldown_length=tc.total_it * .6, finish_lr=tc.min_lr) to.lr_schedule = LinearWarmUp(to.lr_schedule, start_lr=tc.min_lr, length=tc.total_it / 30) ### Momentum schedule to.momentum_schedule = MomentumSchedule(to.lr_schedule, tc.min_lr, tc.max_lr, tc.min_momentum, tc.max_momentum) plot_schedule(to.momentum_schedule, iterations=tc.total_it, ylabel='Momentum') input_shape = x_val[0].shape beta = np.sqrt(2 / alpha) print("alpha:", alpha) print("beta:", beta) depth = int(round(base_depth * alpha)) channels = int(round(base_channels * beta)) kernels = [3] * depth se_types = [None] * len(kernels) channels_reduced = int(round(channels / 4)) symbol = rise_mobile_v3_symbol(channels=channels, channels_operating_init=channels_reduced, act_type='relu', channels_value_head=8, value_fc_size=256, channels_policy_head=NB_POLICY_MAP_CHANNELS, grad_scale_value=tc.val_loss_factor, grad_scale_policy=tc.policy_loss_factor, dropout_rate=tc.dropout_rate, select_policy_from_plane=True, kernels=kernels, se_types=se_types) # create a trainable module on compute context model = mx.mod.Module(symbol=symbol, context=ctx, label_names=['value_label', 'policy_label']) model.bind(for_training=True, data_shapes=[('data', (tc.batch_size, input_shape[0], input_shape[1], input_shape[2]))], label_shapes=val_iter.provide_label) model.init_params( mx.initializer.Xavier(rnd_type='uniform', factor_type='avg', magnitude=2.24)) metrics_mxnet = [ metric.MSE(name='value_loss', output_names=['value_output'], label_names=['value_label']), metric.CrossEntropy(name='policy_loss', output_names=['policy_output'], label_names=['policy_label']), metric.create(acc_sign, name='value_acc_sign', output_names=['value_output'], label_names=['value_label']), metric.Accuracy(axis=1, name='policy_acc', output_names=['policy_output'], label_names=['policy_label']) ] to.metrics = metrics_mxnet train_agent = TrainerAgentMXNET(model, symbol, val_iter, tc, to, use_rtpt=True) print("model.score(val_iter, to.metrics:", model.score(val_iter, to.metrics)) # Start the training process _, (k_steps_best, val_metric_values_best) = train_agent.train(cur_it) new_row = { 'alpha': alpha, 'beta': beta, 'depth': depth, 'channels': channels, 'k_steps_best': k_steps_best, 'val_loss': val_metric_values_best['loss'], 'val_value_loss': val_metric_values_best['value_loss'], 'val_policy_loss': val_metric_values_best['policy_loss'], 'val_policy_acc': val_metric_values_best['policy_acc'], 'val_value_acc': val_metric_values_best['value_acc_sign'] } queue.put(new_row) print(new_row)
def __init__(self, part_id=0, *args, **kwargs): """ Constructor :param part_id: Part id to choose for file selection. This way you can test different variants at a time. :param args: :param kwargs: """ super(FullRoundTripTests, self).__init__(*args, **kwargs) logging.info("loading test dataset...") self._s_idcs_test, self._x_test, self._yv_test, self._yp_test, _, self._pgn_datasets_test = load_pgn_dataset( dataset_type="test", part_id=part_id, verbose=True, normalize=False, ) logging.info("loading test pgn file...") self._pgn_filename = self._pgn_datasets_test["parameters/pgn_name"][0].decode("UTF8") self._batch_size = self._pgn_datasets_test["parameters/batch_size"][0] # self._min_elo_both = self._pgn_datasets_test["parameters/min_elo_both"][0] # Rating cap at 90% cumulative rating for all varaints self._min_elo_both = { "Chess": 2200, # "Crazyhouse": 2000, # "Chess960": 1950, # "King of the Hill": 1925, # "Three-check": 1900, # "Antichess": 1925, # "Atomic": 1900, # "Horde": 1900, # "Racing Kings": 1900 } self._start_indices = self._pgn_datasets_test["start_indices"] use_all_games = True if MODE == MODE_CHESS and VERSION == 2 else False converter = PGN2PlanesConverter( limit_nb_games_to_analyze=0, nb_games_per_file=self._batch_size, max_nb_files=1, min_elo_both=self._min_elo_both, termination_conditions=["Normal"], log_lvl=logging.DEBUG, compression="lz4", clevel=5, dataset_type="test", first_pgn_to_analyze=self._pgn_filename, use_all_games=use_all_games ) self._all_pgn_sel, _, _, _, _ = converter.filter_pgn() print(len(self._all_pgn_sel))
def __init__(self, *args, **kwargs): super(FullRoundTripTests, self).__init__(*args, **kwargs) logging.info("loading test dataset...") self._s_idcs_test, self._x_test, self._yv_test, self._yp_test, self._pgn_datasets_test = load_pgn_dataset( dataset_type="test", part_id=0, print_statistics=True, normalize=False, print_parameters=True ) logging.info("loading test pgn file...") self._pgn_filename = self._pgn_datasets_test["parameters/pgn_name"][0].decode("UTF8") self._batch_size = self._pgn_datasets_test["parameters/batch_size"][0] self._min_elo_both = self._pgn_datasets_test["parameters/min_elo_both"][0] self._start_indices = self._pgn_datasets_test["start_indices"] converter = PGN2PlanesConverter( limit_nb_games_to_analyze=0, nb_games_per_file=self._batch_size, max_nb_files=1, min_elo_both=self._min_elo_both, termination_conditions=["Normal"], log_lvl=logging.DEBUG, compression="lz4", clevel=5, dataset_type="test", ) self._all_pgn_sel, _, _, _, _ = converter.filter_pgn() print(len(self._all_pgn_sel))
def update_network(queue, nn_update_idx, symbol_filename, params_filename, convert_to_onnx, main_config, train_config: TrainConfig, model_contender_dir): """ Creates a new NN checkpoint in the model contender directory after training using the game files stored in the training directory :param queue: Queue object used to return items :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented after every update. :param symbol_filename: Architecture definition file :param params_filename: Weight file which will be loaded before training Updates the neural network with the newly acquired games from the replay memory :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference :param main_config: Dict of the main_config (imported from main_config.py) :param train_config: Dict of the train_config (imported from train_config.py) :param model_contender_dir: String of the contender directory path :return: k_steps_final """ # set the context on CPU, switch to GPU if there is one available (strongly recommended for training) ctx = mx.gpu( train_config.device_id) if train_config.context == "gpu" else mx.cpu() # set a specific seed value for reproducibility train_config.nb_parts = len( glob.glob(main_config["planes_train_dir"] + '**/*.zip')) logging.info("number parts for training: %d" % train_config.nb_parts) train_objects = TrainObjects() if train_config.nb_parts <= 0: raise Exception( 'No .zip files for training available. Check the path in main_config["planes_train_dir"]:' ' %s' % main_config["planes_train_dir"]) _, x_val, y_val_value, y_val_policy, _, _ = load_pgn_dataset( dataset_type="val", part_id=0, normalize=train_config.normalize, verbose=False, q_value_ratio=train_config.q_value_ratio) y_val_policy = prepare_policy(y_val_policy, train_config.select_policy_from_plane, train_config.sparse_policy_label, train_config.is_policy_from_plane_data) val_dataset = gluon.data.ArrayDataset(nd.array(x_val), nd.array(y_val_value), nd.array(y_val_policy)) val_data = gluon.data.DataLoader(val_dataset, train_config.batch_size, shuffle=False, num_workers=train_config.cpu_count) symbol = mx.sym.load(symbol_filename) # calculate how many iterations per epoch exist nb_it_per_epoch = (len(x_val) * train_config.nb_parts) // train_config.batch_size # one iteration is defined by passing 1 batch and doing backprop train_config.total_it = int(nb_it_per_epoch * train_config.nb_training_epochs) train_objects.lr_schedule = CosineAnnealingSchedule( train_config.min_lr, train_config.max_lr, max(train_config.total_it * .7, 1)) train_objects.lr_schedule = LinearWarmUp(train_objects.lr_schedule, start_lr=train_config.min_lr, length=max( train_config.total_it * .25, 1)) train_objects.momentum_schedule = MomentumSchedule( train_objects.lr_schedule, train_config.min_lr, train_config.max_lr, train_config.min_momentum, train_config.max_momentum) input_shape = x_val[0].shape inputs = mx.sym.var('data', dtype='float32') value_out = symbol.get_internals()[main_config['value_output'] + '_output'] policy_out = symbol.get_internals()[main_config['policy_output'] + '_output'] sym = mx.symbol.Group([value_out, policy_out]) net = mx.gluon.SymbolBlock(sym, inputs) net.collect_params().load(params_filename, ctx) metrics_gluon = { 'value_loss': metric.MSE(name='value_loss', output_names=['value_output']), 'value_acc_sign': metric.create(acc_sign, name='value_acc_sign', output_names=['value_output'], label_names=['value_label']), } if train_config.sparse_policy_label: print("train with sparse labels") # the default cross entropy only supports sparse labels metrics_gluon['policy_loss'] = metric.CrossEntropy( name='policy_loss', output_names=['policy_output'], label_names=['policy_label']), metrics_gluon['policy_acc'] = metric.Accuracy( axis=1, name='policy_acc', output_names=['policy_output'], label_names=['policy_label']) else: metrics_gluon['policy_loss'] = metric.create( cross_entropy, name='policy_loss', output_names=['policy_output'], label_names=['policy_label']) metrics_gluon['policy_acc'] = metric.create( acc_distribution, name='policy_acc', output_names=['policy_output'], label_names=['policy_label']) train_objects.metrics = metrics_gluon train_config.export_weights = False # don't save intermediate weights train_agent = TrainerAgent(net, val_data, train_config, train_objects, use_rtpt=False) # iteration counter used for the momentum and learning rate schedule cur_it = train_config.k_steps_initial * train_config.batch_steps (k_steps_final, val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final, val_policy_acc_final), _ = train_agent.train(cur_it) prefix = "%smodel-%.5f-%.5f-%.3f-%.3f" % ( model_contender_dir, val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final, val_policy_acc_final) sym_file = prefix + "-symbol.json" params_file = prefix + "-" + "%04d.params" % nn_update_idx # the export function saves both the architecture and the weights net.export(prefix, epoch=nn_update_idx) print() logging.info("Saved checkpoint to %s-%04d.params", prefix, nn_update_idx) if convert_to_onnx: convert_mxnet_model_to_onnx(sym_file, params_file, ["value_out_output", "policy_out_output"], input_shape, [1, 8, 16], False) logging.info("k_steps_final %d" % k_steps_final) queue.put(k_steps_final)
def train(self, cur_it=None): # Probably needs refactoring """ Training model :param cur_it: Current iteration which is used for the learning rate and momentum schedule. If set to None it will be initialized :return: return_metrics_and_stop_training() """ # Too many local variables (44/15) - Too many branches (18/12) - Too many statements (108/50) # set a custom seed for reproducibility if self.tc.seed is not None: random.seed(self.tc.seed) # define and initialize the variables which will be used self.t_s = time() # track on how many batches have been processed in this epoch self.patience_cnt = epoch = self.batch_proc_tmp = 0 self.k_steps = self.tc.k_steps_initial # counter for thousands steps if cur_it is None: self.cur_it = self.tc.k_steps_initial * 1000 else: self.cur_it = cur_it self.nb_spikes = 0 # count the number of spikes that have been detected # initialize the loss to compare with, with a very high value self.old_val_loss = 9000 self.graph_exported = False # create a state variable to check if the net architecture has been reported yet self.continue_training = True self.optimizer.lr = self.to.lr_schedule(self.cur_it) if self.tc.optimizer_name == "nag": self.optimizer.momentum = self.to.momentum_schedule(self.cur_it) if not self.ordering: # safety check to prevent eternal loop raise Exception( "You must have at least one part file in your planes-dataset directory!" ) if self.use_rtpt: # Start the RTPT tracking self.rtpt.start() while self.continue_training: # Too many nested blocks (7/5) # reshuffle the ordering of the training game batches (shuffle works in place) random.shuffle(self.ordering) epoch += 1 logging.info("EPOCH %d", epoch) logging.info("=========================") self.t_s_steps = time() self._model.init_optimizer(optimizer=self.optimizer) if self._augment: # stores part ids that were not augmented yet parts_not_augmented = list(set(self.ordering.copy())) # stores part ids that were loaded before but not augmented parts_to_augment = [] for part_id in tqdm_notebook(self.ordering): if MODE == MODE_XIANGQI: _, self.x_train, self.yv_train, self.yp_train, _ = load_xiangqi_dataset( dataset_type="train", part_id=part_id, normalize=self.tc.normalize, verbose=False) if self._augment: # check whether the current part should be augmented if part_id in parts_to_augment: augment(self.x_train, self.yp_train) logging.debug( "Using augmented part with id {}".format( part_id)) elif part_id in parts_not_augmented: if random.randint(0, 1): augment(self.x_train, self.yp_train) parts_not_augmented.remove(part_id) logging.debug( "Using augmented part with id {}".format( part_id)) else: parts_to_augment.append(part_id) logging.debug( "Using unaugmented part with id {}".format( part_id)) else: # load one chunk of the dataset from memory _, self.x_train, self.yv_train, self.yp_train, plys_to_end, _ = load_pgn_dataset( dataset_type="train", part_id=part_id, normalize=self.tc.normalize, verbose=False, q_value_ratio=self.tc.q_value_ratio) # fill_up_batch if there aren't enough games if len(self.yv_train) < self.tc.batch_size: logging.info("filling up batch with too few samples %d" % len(self.yv_train)) self.x_train = fill_up_batch(self.x_train, self.tc.batch_size) self.yv_train = fill_up_batch(self.yv_train, self.tc.batch_size) self.yp_train = fill_up_batch(self.yp_train, self.tc.batch_size) if MODE != MODE_XIANGQI: if plys_to_end is not None: plys_to_end = fill_up_batch( plys_to_end, self.tc.batch_size) if MODE != MODE_XIANGQI: if self.tc.discount != 1: self.yv_train *= self.tc.discount**plys_to_end self.yp_train = prepare_policy( self.yp_train, self.tc.select_policy_from_plane, self.tc.sparse_policy_label, self.tc.is_policy_from_plane_data) if self.tc.use_wdl and self.tc.use_plys_to_end: self._train_iter = mx.io.NDArrayIter( {'data': self.x_train}, { 'value_label': self.yv_train, 'policy_label': self.yp_train, 'wdl_label': value_to_wdl_label(self.yv_train), 'plys_to_end_label': prepare_plys_label(plys_to_end) }, self.tc.batch_size, shuffle=True) else: self._train_iter = mx.io.NDArrayIter( {'data': self.x_train}, { 'value_label': self.yv_train, 'policy_label': self.yp_train }, self.tc.batch_size, shuffle=True) # avoid memory leaks by adding synchronization mx.nd.waitall() reset_metrics(self.to.metrics) for batch in self._train_iter: self._model.forward(batch, is_train=True) # compute predictions for metric in self.to.metrics: # update the metrics self._model.update_metric(metric, batch.label) self._model.backward() # compute gradients self._model.update() # update parameters self.batch_callback() if not self.continue_training: logging.info('Elapsed time for training(hh:mm:ss): ' + str( datetime.timedelta( seconds=round(time() - self.t_s)))) return return_metrics_and_stop_training( self.k_steps, self.val_metric_values, self.k_steps_best, self.val_metric_values_best) # add the graph representation of the network to the tensorboard log file if not self.graph_exported and self.tc.log_metrics_to_tensorboard: # self.sum_writer.add_graph(self._symbol) self.graph_exported = True
def train(self, cur_it=None): # Probably needs refactoring """ Training model :param cur_it: Current iteration which is used for the learning rate and momentum schedule. If set to None it will be initialized """ # Too many local variables (44/15) - Too many branches (18/12) - Too many statements (108/50) # set a custom seed for reproducibility random.seed(self.tc.seed) # define and initialize the variables which will be used t_s = time() # predefine the local variables that will be used in the training loop val_loss_best = val_p_acc_best = k_steps_best = val_metric_values_best = old_label = value_out = None patience_cnt = epoch = batch_proc_tmp = 0 # track on how many batches have been processed in this epoch k_steps = self.tc.k_steps_initial # counter for thousands steps # calculate how many log states will be processed k_steps_end = round(self.tc.total_it / self.tc.batch_steps) # we use k-steps instead of epochs here if k_steps_end == 0: k_steps_end = 1 if self.use_rtpt: self.rtpt = RTPT(name_initials=self.tc.name_initials, experiment_name='crazyara', max_iterations=k_steps_end - self.tc.k_steps_initial) if cur_it is None: cur_it = self.tc.k_steps_initial * 1000 nb_spikes = 0 # count the number of spikes that have been detected # initialize the loss to compare with, with a very high value old_val_loss = np.inf graph_exported = False # create a state variable to check if the net architecture has been reported yet if not self.ordering: # safety check to prevent eternal loop raise Exception( "You must have at least one part file in your planes-dataset directory!" ) if self.use_rtpt: # Start the RTPT tracking self.rtpt.start() while True: # Too many nested blocks (7/5) # reshuffle the ordering of the training game batches (shuffle works in place) random.shuffle(self.ordering) epoch += 1 logging.info("EPOCH %d", epoch) logging.info("=========================") t_s_steps = time() for part_id in tqdm_notebook(self.ordering): # load one chunk of the dataset from memory _, x_train, yv_train, yp_train, _, _ = load_pgn_dataset( dataset_type="train", part_id=part_id, normalize=self.tc.normalize, verbose=False, q_value_ratio=self.tc.q_value_ratio) yp_train = prepare_policy( y_policy=yp_train, select_policy_from_plane=self.tc.select_policy_from_plane, sparse_policy_label=self.tc.sparse_policy_label, is_policy_from_plane_data=self.tc.is_policy_from_plane_data ) # update the train_data object train_dataset = gluon.data.ArrayDataset( nd.array(x_train), nd.array(yv_train), nd.array(yp_train)) train_data = gluon.data.DataLoader( train_dataset, batch_size=self.tc.batch_size, shuffle=True, num_workers=self.tc.cpu_count) for _, (data, value_label, policy_label) in enumerate(train_data): data = data.as_in_context(self._ctx) value_label = value_label.as_in_context(self._ctx) policy_label = policy_label.as_in_context(self._ctx) # update a dummy metric to see a proper progress bar # (the metrics will get evaluated at the end of 100k steps) if batch_proc_tmp > 0: self.to.metrics["value_loss"].update( old_label, value_out) old_label = value_label with autograd.record(): [value_out, policy_out] = self._net(data) value_loss = self._l2_loss(value_out, value_label) policy_loss = self._softmax_cross_entropy( policy_out, policy_label) # weight the components of the combined loss combined_loss = ( self.tc.val_loss_factor * value_loss + self.tc.policy_loss_factor * policy_loss) # update a dummy metric to see a proper progress bar # self._metrics['value_loss'].update(preds=value_out, labels=value_label) combined_loss.backward() learning_rate = self.to.lr_schedule( cur_it) # update the learning rate self._trainer.set_learning_rate(learning_rate) momentum = self.to.momentum_schedule( cur_it) # update the momentum self._trainer._optimizer.momentum = momentum self._trainer.step(data.shape[0]) cur_it += 1 batch_proc_tmp += 1 # add the graph representation of the network to the tensorboard log file if not graph_exported and self.tc.log_metrics_to_tensorboard: self.sum_writer.add_graph(self._net) graph_exported = True if batch_proc_tmp >= self.tc.batch_steps: # show metrics every thousands steps # log the current learning rate # update batch_proc_tmp counter by subtracting the batch_steps batch_proc_tmp = batch_proc_tmp - self.tc.batch_steps ms_step = ( (time() - t_s_steps) / self.tc.batch_steps) * 1000 # measure elapsed time # update the counters k_steps += 1 patience_cnt += 1 logging.info("Step %dK/%dK - %dms/step", k_steps, k_steps_end, ms_step) logging.info("-------------------------") logging.debug("Iteration %d/%d", cur_it, self.tc.total_it) logging.debug("lr: %.7f - momentum: %.7f", learning_rate, momentum) train_metric_values = evaluate_metrics( self.to.metrics, train_data, self._net, nb_batches=10, #25, ctx=self._ctx, sparse_policy_label=self.tc.sparse_policy_label, apply_select_policy_from_plane=self.tc. select_policy_from_plane and not self.tc.is_policy_from_plane_data) val_metric_values = evaluate_metrics( self.to.metrics, self._val_data, self._net, nb_batches=None, ctx=self._ctx, sparse_policy_label=self.tc.sparse_policy_label, apply_select_policy_from_plane=self.tc. select_policy_from_plane and not self.tc.is_policy_from_plane_data) if self.use_rtpt: # update process title according to loss self.rtpt.step( subtitle= f"loss={val_metric_values['loss']:2.2f}") if self.tc.use_spike_recovery and ( old_val_loss * self.tc.spike_thresh < val_metric_values["loss"] or np.isnan(val_metric_values["loss"]) ): # check for spikes nb_spikes += 1 logging.warning( "Spike %d/%d occurred - val_loss: %.3f", nb_spikes, self.tc.max_spikes, val_metric_values["loss"], ) if nb_spikes >= self.tc.max_spikes: val_loss = val_metric_values["loss"] val_p_acc = val_metric_values["policy_acc"] logging.debug( "The maximum number of spikes has been reached. Stop training." ) # finally stop training because the number of lr drops has been achieved print() print("Elapsed time for training(hh:mm:ss): " + str( datetime.timedelta( seconds=round(time() - t_s)))) if self.tc.log_metrics_to_tensorboard: self.sum_writer.close() return return_metrics_and_stop_training( k_steps, val_metric_values, k_steps_best, val_metric_values_best) logging.debug("Recover to latest checkpoint") model_path = self.tc.export_dir + "weights/model-%.5f-%.3f-%04d.params" % ( val_loss_best, val_p_acc_best, k_steps_best, ) # Load the best model once again logging.debug("load current best model:%s", model_path) self._net.load_parameters(model_path, ctx=self._ctx) k_steps = k_steps_best logging.debug("k_step is back at %d", k_steps_best) # print the elapsed time t_delta = time() - t_s_steps print(" - %.ds" % t_delta) t_s_steps = time() else: # update the val_loss_value to compare with using spike recovery old_val_loss = val_metric_values["loss"] # log the metric values to tensorboard self._log_metrics(train_metric_values, global_step=k_steps, prefix="train_") self._log_metrics(val_metric_values, global_step=k_steps, prefix="val_") if self.tc.export_grad_histograms: grads = [] # logging the gradients of parameters for checking convergence for _, name in enumerate(self._param_names): if "bn" not in name and "batch" not in name and name != "policy_flat_plane_idx": grads.append(self._params[name].grad()) self.sum_writer.add_histogram( tag=name, values=grads[-1], global_step=k_steps, bins=20) # check if a new checkpoint shall be created if val_loss_best is None or val_metric_values[ "loss"] < val_loss_best: # update val_loss_best val_loss_best = val_metric_values["loss"] val_p_acc_best = val_metric_values[ "policy_acc"] val_metric_values_best = val_metric_values k_steps_best = k_steps if self.tc.export_weights: prefix = self.tc.export_dir + "weights/model-%.5f-%.3f" \ % (val_loss_best, val_p_acc_best) # the export function saves both the architecture and the weights self._net.export(prefix, epoch=k_steps_best) print() logging.info( "Saved checkpoint to %s-%04d.params", prefix, k_steps_best) patience_cnt = 0 # reset the patience counter # print the elapsed time t_delta = time() - t_s_steps print(" - %.ds" % t_delta) t_s_steps = time() # log the samples per second metric to tensorboard self.sum_writer.add_scalar( tag="samples_per_second", value={ "hybrid_sync": data.shape[0] * self.tc.batch_steps / t_delta }, global_step=k_steps, ) # log the current learning rate self.sum_writer.add_scalar( tag="lr", value=self.to.lr_schedule(cur_it), global_step=k_steps) # log the current momentum value self.sum_writer.add_scalar( tag="momentum", value=self.to.momentum_schedule(cur_it), global_step=k_steps) if cur_it >= self.tc.total_it: val_loss = val_metric_values["loss"] val_p_acc = val_metric_values["policy_acc"] logging.debug( "The number of given iterations has been reached" ) # finally stop training because the number of lr drops has been achieved print() print("Elapsed time for training(hh:mm:ss): " + str( datetime.timedelta( seconds=round(time() - t_s)))) if self.tc.log_metrics_to_tensorboard: self.sum_writer.close() return return_metrics_and_stop_training( k_steps, val_metric_values, k_steps_best, val_metric_values_best)
def __init__(self, *args, **kwargs): super(FullRoundTripTests, self).__init__(*args, **kwargs) logging.info('loading test dataset...') self._s_idcs_test, self._x_test, self._yv_test, self._yp_test, self._pgn_datasets_test = load_pgn_dataset( dataset_type='test', part_id=0, print_statistics=True, normalize=False, print_parameters=True) logging.info('loading test pgn file...') self._pgn_filename = self._pgn_datasets_test['parameters/pgn_name'][ 0].decode('UTF8') self._batch_size = self._pgn_datasets_test['parameters/batch_size'][0] self._min_elo_both = self._pgn_datasets_test[ 'parameters/min_elo_both'][0] self._start_indices = self._pgn_datasets_test['start_indices'] converter = PGN2PlanesConverter(limit_nb_games_to_analyze=0, nb_games_per_file=self._batch_size, max_nb_files=1, min_elo_both=self._min_elo_both, termination_conditions=["Normal"], log_lvl=logging.DEBUG, compression='lz4', clevel=5, dataset_type='test') self._all_pgn_sel, nb_games_sel, batch_white_won, batch_black_won, batch_draw = converter.filter_pgn( ) print(len(self._all_pgn_sel))
def train(self): """ :param net: Gluon network object :param val_data: Gluon dataloader object :param nb_parts: Sets how many different part files exist in the train directory :param lr: Initial learning rate :param momentum: :param wd: :param nb_k_steps: Number of steps in after which to drop the learning rate (assuming the patience counter early dropping hasn't activated beforehand) :param patience: Number of batches to wait until no progress on validation loss has been achieved. If the no progress has been done the learning rate is multiplied by the drop factor. :param nb_lr_drops: Number of time to drop the learning rate in total. This defines the end of the train loop :param batch_steps: Number of batches after which the validation loss is evaluated :param k_steps_initial: Initial starting point of the network in terms of process k batches (default 0) :param lr_drop_fac: Dropping factor to the learning rate to apply :param cpu_count: How many cpu threads on the current are available :param batch_size: Batch size to train the network with :param normalize: Weather to use data normalization after loading the data (recommend to set to True) :param export_weights: Sets if network checkpoints should be exported :param export_grad_histograms: Sets if the gradient updates of the weights should be logged to tensorboard :return: """ # set a custom seed for reproducibility random.seed(self._seed) # define and initialize the variables which will be used t_s = time() # predefine the local variables that will be used in the training loop val_loss_best = None val_p_acc_best = None k_steps_best = None patience_cnt = 0 epoch = 0 # keep track on how many batches have been processed in this epoch so far batch_proc_tmp = 0 # counter for thousands steps k_steps = self._k_steps_initial # calculate how many log states will be processed k_steps_end = self._total_it / self._batch_steps cur_it = 0 # count the number of spikes that have been detected nb_spikes = 0 # initialize the loss to compare with, with a very high value old_val_loss = 9000 # self._lr = self._lr_warmup_init # logging.info('Warmup-Schedule') # logging.info('Initial learning rate: lr = %.5f', self._lr) # logging.info('=========================================') # set initial lr # self._trainer.set_learning_rate(self._lr) # log the current learning rate # self.sw.add_scalar(tag='lr', value=self._lr, global_step=k_steps) # create a state variable to check if the net architecture has been reported yet graph_exported = False old_label = None value_out = None # safety check to prevent eternal loop if not self.ordering: raise Exception( "You must have at least one part file in your planes-dataset directory!" ) while True: # reshuffle the ordering of the training game batches (shuffle works in place) random.shuffle(self.ordering) epoch += 1 logging.info("EPOCH %d", epoch) logging.info("=========================") t_s_steps = time() for part_id in tqdm_notebook(self.ordering): # load one chunk of the dataset from memory s_idcs_train, x_train, yv_train, yp_train, pgn_datasets_train = load_pgn_dataset( dataset_type="train", part_id=part_id, normalize=self._normalize, verbose=False) # update the train_data object train_dataset = gluon.data.ArrayDataset( nd.array(x_train), nd.array(yv_train), nd.array(yp_train.argmax(axis=1))) train_data = gluon.data.DataLoader(train_dataset, batch_size=self._batch_size, shuffle=True, num_workers=self._cpu_count) # batch_proc_tmp, dummy = self._process_on_data_plane_file(train_data, batch_proc_tmp) for i, (data, value_label, policy_label) in enumerate(train_data): data = data.as_in_context(self._ctx) value_label = value_label.as_in_context(self._ctx) policy_label = policy_label.as_in_context(self._ctx) # update a dummy metric to see a proper progress bar # (the metrics will get evaluated at the end of 100k steps) if batch_proc_tmp > 0: self._metrics["value_loss"].update( old_label, value_out) old_label = value_label with autograd.record(): [value_out, policy_out] = self._net(data) value_loss = self._l2_loss(value_out, value_label) policy_loss = self._softmax_cross_entropy( policy_out, policy_label) # weight the components of the combined loss combined_loss = ( self._val_loss_factor * value_loss.sum() + self._policy_loss_factor * policy_loss.sum()) # update a dummy metric to see a proper progress bar # self._metrics['value_loss'].update(preds=value_out, labels=value_label) combined_loss.backward() # update the learning rate lr = self._lr_schedule(cur_it) self._trainer.set_learning_rate(lr) # update the momentum momentum = self._momentum_schedule(cur_it) self._trainer._optimizer.momentum = momentum self._trainer.step(data.shape[0]) cur_it += 1 batch_proc_tmp += 1 # add the graph representation of the network to the tensorboard log file if graph_exported is False and self._log_metrics_to_tensorboard is True: self.sw.add_graph(self._net) graph_exported = True # show metrics every thousands steps if batch_proc_tmp >= self._batch_steps: # if k_steps < self._warmup_k_steps: # update the learning rate # self._lr *= k_steps * ((self._lr_first - self._lr_warmup_init) / self._warmup_k_steps) + self._lr_warmup_init #self._lr_drop_fac # self._trainer.set_learning_rate(self._lr) # logging.info('Learning rate update: lr = %.5f', self._lr) # logging.info('=========================================') # log the current learning rate # update batch_proc_tmp counter by subtracting the batch_steps batch_proc_tmp = batch_proc_tmp - self._batch_steps # measure elapsed time ms_step = ( (time() - t_s_steps) / self._batch_steps) * 1000 # update the counters k_steps += 1 patience_cnt += 1 logging.info("Step %dK/%dK - %dms/step", k_steps, k_steps_end, ms_step) logging.info("-------------------------") logging.debug("Iteration %d/%d", cur_it, self._total_it) logging.debug("lr: %.7f - momentum: %.7f", lr, momentum) train_metric_values = evaluate_metrics(self._metrics, train_data, self._net, nb_batches=25, ctx=self._ctx) val_metric_values = evaluate_metrics(self._metrics, self._val_data, self._net, nb_batches=None, ctx=self._ctx) # spike_detected = False # spike_detected = old_val_loss * 1.5 < val_metric_values['loss'] # if np.isnan(val_metric_values['loss']): # spike_detected = True # check for spikes if self._use_spike_recovery is True and ( old_val_loss * self._spike_thresh < val_metric_values["loss"] or np.isnan(val_metric_values["loss"])): nb_spikes += 1 logging.warning( "Spike %d/%d occurred - val_loss: %.3f", nb_spikes, self._max_spikes, val_metric_values["loss"], ) if nb_spikes >= self._max_spikes: val_loss = val_metric_values["loss"] val_p_acc = val_metric_values["policy_acc"] logging.debug( "The maximum number of spikes has been reached. Stop training." ) # finally stop training because the number of lr drops has been achieved print() print("Elapsed time for training(hh:mm:ss): " + str( datetime.timedelta( seconds=round(time() - t_s)))) if self._log_metrics_to_tensorboard is True: self.sw.close() return (k_steps, val_loss, val_p_acc), (k_steps_best, val_loss_best, val_p_acc_best) logging.debug("Recover to latest checkpoint") # ## Load the best model once again model_path = "./weights/model-%.5f-%.3f-%04d.params" % ( val_loss_best, val_p_acc_best, k_steps_best, ) logging.debug("load current best model:%s" % model_path) self._net.load_parameters(model_path, ctx=self._ctx) k_steps = k_steps_best logging.debug("k_step is back at %d", k_steps_best) # print the elapsed time t_delta = time() - t_s_steps print(" - %.ds" % t_delta) t_s_steps = time() else: # update the val_loss_value to compare with using spike recovery old_val_loss = val_metric_values["loss"] # log the metric values to tensorboard self._log_metrics(train_metric_values, global_step=k_steps, prefix="train_") self._log_metrics(val_metric_values, global_step=k_steps, prefix="val_") if self._export_grad_histograms is True: grads = [] # logging the gradients of parameters for checking convergence for i_p, name in enumerate(self._param_names): if "bn" not in name and "batch" not in name: grads.append(self._params[name].grad()) self.sw.add_histogram( tag=name, values=grads[-1], global_step=k_steps, bins=20) # check if a new checkpoint shall be created if val_loss_best is None or val_metric_values[ "loss"] < val_loss_best: # update val_loss_best val_loss_best = val_metric_values["loss"] val_p_acc_best = val_metric_values[ "policy_acc"] k_steps_best = k_steps if self._export_weights is True: prefix = "./weights/model-%.5f-%.3f" % ( val_loss_best, val_p_acc_best) # the export function saves both the architecture and the weights self._net.export(prefix, epoch=k_steps_best) print() logging.info( "Saved checkpoint to %s-%04d.params" % (prefix, k_steps_best)) # reset the patience counter patience_cnt = 0 # print the elapsed time t_delta = time() - t_s_steps print(" - %.ds" % t_delta) t_s_steps = time() # log the samples per second metric to tensorbaord self.sw.add_scalar( tag="samples_per_second", value={ "hybrid_sync": data.shape[0] * self._batch_steps / t_delta }, global_step=k_steps, ) # log the current learning rate self.sw.add_scalar(tag="lr", value=self._lr_schedule(cur_it), global_step=k_steps) # log the current momentum value self.sw.add_scalar( tag="momentum", value=self._momentum_schedule(cur_it), global_step=k_steps) if cur_it >= self._total_it: val_loss = val_metric_values["loss"] val_p_acc = val_metric_values["policy_acc"] logging.debug( "The number of given iterations has been reached" ) # finally stop training because the number of lr drops has been achieved print() print("Elapsed time for training(hh:mm:ss): " + str( datetime.timedelta( seconds=round(time() - t_s)))) if self._log_metrics_to_tensorboard is True: self.sw.close() return (k_steps, val_loss, val_p_acc), (k_steps_best, val_loss_best, val_p_acc_best) """
def main(): # config batch_size = 32 logger = logging.getLogger('logger') logger.setLevel(logging.DEBUG) ctx = mx.cpu(0) calib_mode = 'entropy' excluded_sym_names = ['stem_conv0'] num_calib_batches = 128 quantized_dtype = 'int8' symbol_path = glob.glob(main_config["model_architecture_dir"] + "*")[0] params_path = glob.glob(main_config["model_weights_dir"] + "*")[0] print("symbol_path:", symbol_path) print("params_path:", params_path) epoch = int(params_path[-11:-7]) print(epoch) # load calibration dataset _, x_train, yv_train, yp_train, plys_to_end, _ = load_pgn_dataset( normalize=True) calib_data = mx.io.NDArrayIter({'data': x_train}, {}, batch_size, shuffle=True) # construct the model name based on the parameter file prefix = symbol_path.split("/")[-1].replace("-symbol.json", "") sym = mx.sym.load(symbol_path) sym = remove_labels(sym, main_config['value_output'] + '_output', main_config['policy_output'] + '_output') # https://github.com/apache/incubator-mxnet/issues/6951 save_dict = mx.nd.load(params_path) arg_params = {} aux_params = {} for key, val in save_dict.items(): param_type, name = key.split(":", 1) if param_type == "arg": arg_params[name] = val if param_type == "aux": aux_params[name] = val # quantize model sym = sym.get_backend_symbol('MKLDNN_QUANTIZE') label_names = [] qsym, qarg_params, aux_params = quantize_model( sym=sym, arg_params=arg_params, aux_params=aux_params, ctx=ctx, excluded_sym_names=excluded_sym_names, excluded_op_names=excluded_sym_names, calib_mode=calib_mode, calib_data=calib_data, num_calib_examples=num_calib_batches * batch_size, quantized_dtype=quantized_dtype, quantize_mode='smart', label_names=label_names, logger=logger) sym_name = '%s-symbol.json' % (prefix + '-int8') save_symbol(sym_name, qsym, logger) param_name = '%s-%04d.params' % (prefix + '-int8', epoch) save_params(param_name, qarg_params, aux_params, logger)