mean=means[QM9.U0], stddev=stddevs[QM9.U0], atomref=atomrefs[QM9.U0], ) ] model = spk.AtomisticModel(representation, output_modules) # build optimizer optimizer = Adam(model.parameters(), lr=1e-4) # hooks logging.info("build trainer") metrics = [MeanAbsoluteError(p, p) for p in properties] hooks = [CSVHook(log_path=model_dir, metrics=metrics), ReduceLROnPlateauHook(optimizer)] # trainer loss = mse_loss(properties) trainer = Trainer( model_dir, model=model, hooks=hooks, loss_fn=loss, optimizer=optimizer, train_loader=train_loader, validation_loader=val_loader, ) # run training logging.info("training") trainer.train(device="cpu")
# hooks logging.info("build trainer") metrics = [MeanAbsoluteError(p, p) for p in properties] ###hooks = [CSVHook(log_path=model_dir, metrics=metrics), ReduceLROnPlateauHook(optimizer)] hooks = [CSVHook(log_path=model_dir, metrics=metrics) ] # trainer clip_norm=None loss = build_mse_loss(properties, loss_tradeoff=[0.01, 0.99]) trainer = Trainer( model_dir, model=model, hooks=hooks, loss_fn=loss, optimizer=optimizer, train_loader=train_loader, validation_loader=val_loader, clip_norm=clip_norm, ) total_parms = sum(p.numel() for p in model.parameters() if p.requires_grad) np.savetxt('./parms.txt', [total_parms], fmt='%d') # run training logging.info("training") ###trainer.train(device="cpu", n_epochs=1000) ###trainer.train(device="cuda", n_epochs=1) trainer.train(device="cuda", n_epochs=300)
def train(self, n_epochs, lr, loss_fn, batch_size, num_workers, device, patience=100, threshold_ratio=0.0001): self.i += 1 reduced = self.dataset.create_subset(self.idx_red) num_val = round(0.10 * len(reduced)) train, val, test = train_test_split(data=reduced, num_train=len(reduced) - num_val, num_val=num_val) train_loader = AtomsLoader(train, batch_size=round(batch_size), num_workers=num_workers, shuffle=True, pin_memory=True) val_loader = AtomsLoader(val, batch_size=round(batch_size / 2), num_workers=num_workers, pin_memory=True) representation = SchNet(n_atom_basis=self.n_atom_basis, n_filters=self.n_filters, n_interactions=self.n_interactions, cutoff=self.cutoff, n_gaussians=self.n_gaussians) output_modules = Atomwise(representation.n_atom_basis, n_layers=self.n_layers, property='energy', derivative='forces', stress='stress', negative_dr=True, create_graph=True) model = AtomisticModel(representation, output_modules) optimizer = Adam(model.parameters(), lr=lr) hooks = [ CSVHook('log_%i' % self.i, [ MeanAbsoluteError('energy', 'energy'), MeanAbsoluteError('forces', 'forces', element_wise=True), MeanAbsoluteError('stress', 'stress'), R2Score('energy', 'energy'), R2Score('forces', 'forces', element_wise=True), R2Score('stress', 'stress') ], every_n_epochs=1) ] hooks.append(EarlyStoppingHook(patience, threshold_ratio)) trainer = Trainer('output_%i/' % self.i, model, loss_fn, optimizer, train_loader, val_loader, hooks=hooks, keep_n_checkpoints=1, checkpoint_interval=n_epochs) print('Running training!') print(' Reduced images: %i' % len(reduced)) print(' Traning images: %i' % len(train)) print(' Validation images: %i' % len(val)) print('') trainer.train(device, n_epochs)
# move the model to the GPU model.to(args.device) # Make the loss function, optimizer, and hooks -> Add them to a trainer def loss(b, p): y_true = torch.stack(tuple(torch.squeeze(b[s]) for s in options['output_props']), 1) return torch.nn.functional.mse_loss(p['y'], y_true) # Get only the fittable parameters trainable_params = filter(lambda p: p.requires_grad, model.parameters()) opt = torch.optim.Adam(trainable_params, lr=1e-4) hook_list = [hooks.CSVHook(work_dir, []), hooks.ReduceLROnPlateauHook(opt, patience=lr_patience(len(train_data)), factor=lr_decay, min_lr=lr_min, stop_after_min=True)] logger.info('Created loss, hooks, and optimizer') trainer = Trainer(work_dir, model, loss, opt, train_load, valid_load, hooks=hook_list, checkpoint_interval=chkp_interval(len(train_data))) # Run the training logger.info('Started training') sys.stdout.flush() trainer.train(args.device) # Mark training as complete with open(os.path.join(work_dir, 'finished'), 'w') as fp: print(str(datetime.now()), file=fp) logger.info('Training finished')
# hooks logging.info("build trainer") metrics = [MeanAbsoluteError(p, p) for p in properties] + \ [RootMeanSquaredError(p, p) for p in properties] hooks = [ CSVHook(log_path=model_dir, metrics=metrics), ReduceLROnPlateauHook(optimizer), TensorboardHook(log_path=model_dir, metrics=metrics) ] # trainer #loss = lambda b, p: F.mse_loss(p["y"], b['energy','force']) loss = mse_loss(properties) trainer = Trainer( model_dir, model=model, hooks=hooks, loss_fn=loss, optimizer=optimizer, train_loader=train_loader, validation_loader=val_loader, #device = 'cuda' ) # run training logging.info("training") trainer.train(device="cpu", n_epochs=10000)