def myTrain(config,num_epochs,sym01,sym02,period): p = dict( seq_len = config['seq_len'], batch_size = config['batch_size'], criterion = nn.MSELoss(), max_epochs = num_epochs, n_features = 3, hidden_size = config['hidden_size'], num_layers = config['num_layers'], dropout = config['dropout'], learning_rate = config['lr'] ) print("myTrain parameters:",sym01,sym02,period) seed_everything(1) csv_logger = CSVLogger('./', name='lstm', version='0'), metrics = {"loss": "ptl/val_loss"} trainer = Trainer( max_epochs=p['max_epochs'], logger=csv_logger, callbacks=[TuneReportCallback(metrics, on="validation_end")] #gpus=1, #row_log_interval=1, #progress_bar_refresh_rate=2, ) model = LSTMRegressor( n_features = p['n_features'], hidden_size = p['hidden_size'], seq_len = p['seq_len'], batch_size = p['batch_size'], criterion = p['criterion'], num_layers = p['num_layers'], dropout = p['dropout'], learning_rate = p['learning_rate'] ) dm = MyDataModule( sym01=sym01, sym02=sym02, period=period, seq_len = p['seq_len'], batch_size = p['batch_size'] ) dm.reset( sym01=sym01, sym02=sym02, period=period, seq_len = p['seq_len'], batch_size = p['batch_size'] ) dm.setup('test') trainer.fit(model, dm) testresult = trainer.test(model, datamodule=dm) trainer.save_checkpoint(sym01+"-lstm.ckpt") print(testresult) print(testresult) testresult = testresult[0] print(testresult['val_loss']) return model,testresult['val_loss']
def do_training(args): # ARG CORRECTIONS AND CHECKS date_str = args.cmd_timestamp.split('T')[0] args.model_id = args.model_id.format(TRAIN_DATE=date_str, TRAIN_ID=args.TRAIN_ID) # make sure output directory exists os.makedirs(args.outdir, exist_ok=True) # Setup Callbacks callbacks = [] plotting_callbacks = [] # TODO validation_results_callbacks = [] if not args.result_files: args.result_files = [ 'results.mat training_image_basenames training_classes image_basenames input_classes output_scores confusion_matrix counts_perclass f1_perclass f1_weighted f1_macro' .split() ] for result_file in args.result_files: svr = SaveValidationResults(outdir=args.outdir, outfile=result_file[0], series=result_file[1:]) validation_results_callbacks.append(svr) callbacks.extend(validation_results_callbacks) callbacks.extend(plotting_callbacks) if args.estop: callbacks.append(EarlyStopping('val_loss', patience=args.estop)) # Set Seed. If args.seed is 0 ie None, a random seed value is used and stored args.seed = seed_everything(args.seed or None) #if os.path.isfile(args.MODEL): #TODO: transfer learning option # see https://pytorch-lightning.readthedocs.io/en/stable/transfer_learning.html?highlight=predictions # Setup dataloaders training_dataset, validation_dataset = get_trainval_datasets(args) assert training_dataset.classes == validation_dataset.classes args.classes = training_dataset.classes # output list of training and validation images with open(os.path.join(args.outdir, 'training_images.list'), 'w') as f: f.write('\n'.join(sorted(training_dataset.images))) with open(os.path.join(args.outdir, 'validation_images.list'), 'w') as f: f.write('\n'.join(sorted(validation_dataset.images))) # TODO add to args classes removed by class_min and skipped/combined from class_config print('Loading Training Dataloader...') training_loader = DataLoader(training_dataset, pin_memory=True, shuffle=True, batch_size=args.batch_size, num_workers=args.loaders) print('Loading Validation Dataloader...') validation_loader = DataLoader(validation_dataset, pin_memory=True, shuffle=False, batch_size=args.batch_size, num_workers=args.loaders) # Gerry Rig Logger class ExperimentWriter_hack(ExperimentWriter): def log_metrics(self, metrics_dict, step=None): _handle_value = lambda v: v.item() if isinstance(v, torch.Tensor ) else v metrics = { k: _handle_value(v) for k, v in metrics_dict.items() if k not in ['input_classes', 'output_classes', 'input_srcs', 'outputs'] } self.metrics.append(metrics) logger = CSVLogger(save_dir=os.path.join(args.outdir, 'logs'), name='default', version=None) os.makedirs(logger.root_dir, exist_ok=True) logger._experiment = ExperimentWriter_hack(log_dir=logger.log_dir) # Setup Trainer chkpt_path = os.path.join(args.outdir, 'chkpts') os.makedirs(chkpt_path, exist_ok=True) callbacks.append(ModelCheckpoint(dirpath=chkpt_path, monitor='val_loss')) trainer = Trainer(deterministic=True, logger=logger, gpus=len(args.gpus) if args.gpus else None, max_epochs=args.emax, min_epochs=args.emin, checkpoint_callback=True, callbacks=callbacks, num_sanity_val_steps=0) # Setup Model classifier = NeustonModel(args) # TODO setup dataloaders in the model, allowing auto-batch-size optimization # see https://pytorch-lightning.readthedocs.io/en/stable/training_tricks.html#auto-scaling-of-batch-size # Do Training trainer.fit(classifier, train_dataloader=training_loader, val_dataloaders=validation_loader) # Copy best model checkpoint_path = trainer.checkpoint_callback.best_model_path output_path = os.path.join(args.outdir, args.model_id + '.ptl') copyfile(checkpoint_path, output_path) # Copying Logs if args.epochs_log: output_path = os.path.join(args.outdir, args.epochs_log) copyfile(logger.experiment.metrics_file_path, output_path) if args.args_log: src_path = os.path.join(logger.experiment.log_dir, logger.experiment.NAME_HPARAMS_FILE) output_path = os.path.join(args.outdir, args.args_log) copyfile(src_path, output_path) # ONNX Export if args.onnx: classifier.eval() classifier.freeze() output_path_onnx = os.path.join(args.outdir, args.model_id + '.onnx') dummy_batch_size = 10 if 'inception' in str(type(classifier.model)): dummy_input = torch.randn(dummy_batch_size, 3, 299, 299, device='cpu') else: dummy_input = torch.randn(dummy_batch_size, 3, 244, 244, device='cpu') # perform export torch.onnx.export( classifier.model, # model being run dummy_input, # model input (or a tuple for multiple inputs) output_path_onnx, # where to save the model (can be a file or file-like object) export_params= True, # store the trained parameter weights inside the model file # opset_version=10, # the ONNX version to export the model to do_constant_folding= True, # whether to execute constant folding for optimization input_names=['input'], # the model's input names output_names=['output'], # the model's output names dynamic_axes={ 'input': { 0: 'batch_size' }, 'output': { 0: 'batch_size' } }, #verbose=True, ) print('EXPORTED:', output_path_onnx) # include classes file classes_output = output_path_onnx + '.classes' with open(classes_output, 'w') as f: f.write('\n'.join(classifier.hparams.classes)) print('EXPORTED:', classes_output)
def do_training(args): # ARG CORRECTIONS AND CHECKS date_str = args.cmd_timestamp.split('T')[0] args.model_id = args.model_id.format(TRAIN_DATE=date_str, TRAIN_ID=args.TRAIN_ID) # make sure output directory exists os.makedirs(args.outdir, exist_ok=True) # Setup Callbacks callbacks = [] plotting_callbacks = [] # TODO validation_results_callbacks = [] if not args.result_files: args.result_files = [ 'results.mat image_basenames output_scores counts_perclass confusion_matrix f1_perclass f1_weighted f1_macro' .split() ] for result_file in args.result_files: svr = SaveValidationResults(outdir=args.outdir, outfile=result_file[0], series=result_file[1:]) validation_results_callbacks.append(svr) callbacks.extend(validation_results_callbacks) callbacks.extend(plotting_callbacks) # Set Seed. If args.seed is 0 ie None, a random seed value is used and stored args.seed = seed_everything(args.seed or None) #if os.path.isfile(args.MODEL): #TODO: transfer learning option # see https://pytorch-lightning.readthedocs.io/en/stable/transfer_learning.html?highlight=predictions # Setup dataloaders training_dataset, validation_dataset = get_trainval_datasets(args) assert training_dataset.classes == validation_dataset.classes args.classes = training_dataset.classes # TODO add to args classes removed by class_min and skipped/combined from class_config print('Loading Training Dataloader...') training_loader = DataLoader(training_dataset, pin_memory=True, shuffle=True, batch_size=args.batch_size, num_workers=args.loaders) print('Loading Validation Dataloader...') validation_loader = DataLoader(validation_dataset, pin_memory=True, shuffle=False, batch_size=args.batch_size, num_workers=args.loaders) # Gerry Rig Logger class ExperimentWriter_hack(ExperimentWriter): def log_metrics(self, metrics_dict, step=None): _handle_value = lambda v: v.item() if isinstance(v, torch.Tensor ) else v metrics = { k: _handle_value(v) for k, v in metrics_dict.items() if k not in ['input_classes', 'output_classes', 'input_srcs', 'outputs'] } self.metrics.append(metrics) logger = CSVLogger(save_dir=os.path.join(args.outdir, 'logs'), name='default', version=None) os.makedirs(logger.root_dir, exist_ok=True) logger._experiment = ExperimentWriter_hack(log_dir=logger.log_dir) # Setup Trainer chkpt_path = os.path.join(args.outdir, 'chkpts') os.makedirs(chkpt_path, exist_ok=True) trainer = Trainer( deterministic=True, logger=logger, gpus=len(args.gpus) if args.gpus else None, max_epochs=args.emax, min_epochs=args.emin, early_stop_callback=EarlyStopping('val_loss', patience=args.estop) if args.estop else False, checkpoint_callback=ModelCheckpoint(filepath=chkpt_path), callbacks=callbacks, num_sanity_val_steps=0) # Setup Model classifier = NeustonModel(args) # TODO setup dataloaders in the model, allowing auto-batch-size optimization # see https://pytorch-lightning.readthedocs.io/en/stable/training_tricks.html#auto-scaling-of-batch-size # Do Training trainer.fit(classifier, train_dataloader=training_loader, val_dataloaders=validation_loader) # Copy best model checkpoint_path = trainer.checkpoint_callback.best_model_path output_path = os.path.join(args.outdir, args.model_id + '.ptl') copyfile(checkpoint_path, output_path) # Copying Logs if args.epochs_log: output_path = os.path.join(args.outdir, args.epochs_log) copyfile(logger.experiment.metrics_file_path, output_path) if args.args_log: src_path = os.path.join(logger.experiment.log_dir, logger.experiment.NAME_HPARAMS_FILE) output_path = os.path.join(args.outdir, args.args_log) copyfile(src_path, output_path)
loss = self.Loss(Logits,Val_Data.y) if self.loss_only: Result = {'val_loss':loss} self.logger.experiment.log_metrics(Result) return Result else: Acc_Bool = Logits == Val_Data.y Acc = sum(Acc_Bool.long()) * 100// len(Logits) Result = {"val_loss":loss,"val_accuracy":Acc.float()} self.logger.experiment.log_metrics(Result) return Result def valid_epoch_end(self,Outputs): Avg_Loss = stack([x['val_loss'] for x in Outputs]).mean() if self.loss_only: Epoch_Log = {"avg_val_loss":Avg_Loss} self.logger.experiment.log_metrics(Epoch_Log) return Epoch_Log else: Avg_Acc = stack([x['val_accuracy'] for x in Outputs]).mean() Epoch_Log = {"avg_val_loss":Avg_Loss,"avg_val_accuracy":Avg_Acc} self.logger.experiment.log_metrics(Epoch_Log) return Epoch_Log if __name__ == "__main__": seed_everything(42) Logger = CSVLogger("logs",name="Trial",version="SAGEConv") Logger.save() Mod = GNN(2,50,150,200,121) trainer = Trainer(logger=Logger,max_epochs=1) trainer.fit(Mod)
def get_logger(cls, save_dir: str, version=0, name=None, **__) -> LightningLoggerBase: return CSVLogger(save_dir=save_dir, version=str(version), name=name)
from pytorch_lightning import LightningModule, Trainer, TrainResult, seed_everything from pytorch_lightning.loggers.csv_logs import CSVLogger from data import Load_Dataset from model import GNN import argparse parser = argparse.ArgumentParser() parser.add_argument("Batch_Size", type=int) parser.add_argument("Max_Epoch", type=int) Args = parser.parse_args() Epochs_Max = Args.Max_Epoch Batch_Size = Args.Batch_Size seed_everything(42) Logger = CSVLogger("logs", name="PPI_SAGEConv", version=str(Batch_Size) + "_" + str(Epochs_Max)) Mod = GNN(Batch_Size, 50, 500, 200, 121) trainer = Trainer(logger=Logger, max_epochs=Epochs_Max) trainer.fit(Mod)
parser.add_argument("--with_neptune", action="store_true") parser = pl.Trainer.add_argparse_args(parser) parser = AncientPairDataModule.add_data_args(parser) parser = ModelInterface.add_trainer_args(parser) parser = argparse.ArgumentParser(parents=[parser]) args = parser.parse_args() if args.with_neptune: from pytorch_lightning.loggers.neptune import NeptuneLogger logger = NeptuneLogger(project_name="lds/ancient-chinese-translator", ) logger.log_hyperparams(args) else: from pytorch_lightning.loggers.csv_logs import CSVLogger logger = CSVLogger("logs", name="ancient-chinese") # 加载训练数据 data_module = AncientPairDataModule( **get_args_by_parser(args, AncientPairDataModule.parser), ) # 提前加载数据,为了获取词表相关信息 data_module.setup() model = ModelInterface( src_vocab=data_module.src_vocab, trg_vocab=data_module.trg_vocab, num_epoch=args.max_epochs, steps_per_epoch=len(data_module.train_dataloader()), model_config=get_args_by_parser(args, ModelInterface.model_cls.parser), **get_args_by_parser(args, ModelInterface.parser),