def save_test_results(results, output_path): path_utils.create_dir_if_not_exists(output_path) output_path = os.path.join(output_path, 'test_results') with tf.io.TFRecordWriter(output_path) as writer: for example in to_serialized_examples(results): writer.write(example) return results
def set_pipelines_dir(self, pipelines_dir: Text): """ Updates artifact store to point to path. Args: pipelines_dir: new path to pipelines dir """ path_utils.create_dir_if_not_exists(pipelines_dir) self.pipelines_dir = pipelines_dir self.save()
def save(self, output_dir: Text): """ Save a trained tokenizer model to disk. Args: output_dir: Path to which to save the trained tokenizer. """ # save_model does not attempt to create the destination, so create it path_utils.create_dir_if_not_exists(output_dir) self.tokenizer.save_model(directory=output_dir)
def run_fn(self): train_dataset = self.input_fn(self.train_files, self.tf_transform_output) eval_dataset = self.input_fn(self.eval_files, self.tf_transform_output) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = self.model_fn(train_dataset, eval_dataset) model.to(device) criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) model.train() for e in range(1, self.epoch + 1): epoch_loss = 0 epoch_acc = 0 step_count = 0 for x, y in train_dataset: step_count += 1 X_batch, y_batch = x.to(device), y.to(device) optimizer.zero_grad() y_pred = model(X_batch) loss = criterion(y_pred, y_batch) acc = binary_acc(y_pred, y_batch) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_acc += acc.item() print(f'Epoch {e + 0:03}: | Loss: ' f'{epoch_loss / step_count:.5f} | Acc: ' f'{epoch_acc / step_count:.3f}') path_utils.create_dir_if_not_exists(self.serving_model_dir) if path_utils.is_remote(self.serving_model_dir): temp_model_dir = '__temp_model_dir__' temp_path = os.path.join(os.getcwd(), temp_model_dir) if path_utils.is_dir(temp_path): raise PermissionError('{} is used as a temp path but it ' 'already exists. Please remove it to ' 'continue.') torch.save(model, temp_path) path_utils.copy_dir(temp_path, self.serving_model_dir) path_utils.rm_dir(temp_path) else: torch.save(model, os.path.join(self.serving_model_dir, 'model.pt'))
def register_pipeline(self, file_name: Text, config: Dict[Text, Any]): """ Registers a pipeline in the artifact store as a YAML file. Args: file_name (str): file name of pipeline config (dict): dict representation of ZenML config """ self._check_if_initialized() pipelines_dir = self.zenml_config.get_pipelines_dir() # Create dir path_utils.create_dir_if_not_exists(pipelines_dir) # Write yaml_utils.write_yaml(os.path.join(pipelines_dir, file_name), config)
def to_config(path: Text, artifact_store_path: Text = None, metadata_store: Optional[Type[ZenMLMetadataStore]] = None, pipelines_dir: Text = None): """ Creates a default .zenml config at path/zenml/.zenml_config. Args: path (str): path to a directory. metadata_store: metadata store definition. artifact_store_path (str): path where to store artifacts. pipelines_dir (str): path where to store pipeline configs. """ config_dir_path = os.path.join(path, ZENML_DIR_NAME) config_path = os.path.join(config_dir_path, ZENML_CONFIG_NAME) if path_utils.file_exists(config_path): raise AssertionError(f'.zenml file already exists at ' f'{config_path}. ' f'Cannot replace. Please delete the ' f'{config_dir_path} directory first.') # Create config dir path_utils.create_dir_if_not_exists(config_dir_path) if artifact_store_path is None: artifact_store_path = \ os.path.join(config_dir_path, ARTIFACT_STORE_DEFAULT_DIR) else: # if provided, then resolve it absolutely artifact_store_path = path_utils.resolve_relative_path( artifact_store_path) # create artifact_store path path_utils.create_dir_if_not_exists(artifact_store_path) if metadata_store is None: uri = os.path.join(artifact_store_path, ML_METADATA_SQLITE_DEFAULT_NAME) from zenml.metadata import \ SQLiteMetadataStore metadata_dict = SQLiteMetadataStore(uri).to_config() else: metadata_dict = metadata_store.to_config() if pipelines_dir is None: pipelines_dir = os.path.join(path, PIPELINES_DEFAULT_DIR_NAME) else: # if provided, still resolve pipelines_dir = path_utils.resolve_relative_path(pipelines_dir) path_utils.create_dir_if_not_exists(pipelines_dir) config_dict = { ARTIFACT_STORE_KEY: artifact_store_path, METADATA_KEY: metadata_dict, PIPELINES_DIR_KEY: pipelines_dir, } # Write initial config yaml_utils.write_yaml(config_path, config_dict)
def run_fn(self): train_dataset = self.input_fn(self.train_files, self.tf_transform_output) eval_dataset = self.input_fn(self.eval_files, self.tf_transform_output) class LitModel(pl.LightningModule): def __init__(self): super().__init__() self.l1 = torch.nn.Linear(8, 64) self.layer_out = torch.nn.Linear(64, 1) def forward(self, x): x = torch.relu(self.l1(x)) x = self.layer_out(x) return x def training_step(self, batch, batch_idx): x, y = batch y_hat = self(x) loss = F.binary_cross_entropy_with_logits(y_hat, y) tensorboard_logs = {'train_loss': loss} return {'loss': loss, 'log': tensorboard_logs} def configure_optimizers(self): return torch.optim.Adam(self.parameters(), lr=0.001) def train_dataloader(self): return train_dataset def validation_step(self, batch, batch_idx): x, y = batch y_hat = self(x) return { 'val_loss': F.binary_cross_entropy_with_logits(y_hat, y) } def validation_epoch_end(self, outputs): avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() tensorboard_logs = {'val_loss': avg_loss} return {'avg_val_loss': avg_loss, 'log': tensorboard_logs} def val_dataloader(self): return eval_dataset model = LitModel() # most basic trainer, uses good defaults trainer = Trainer( default_root_dir=self.log_dir, max_epochs=self.epoch, ) trainer.fit(model) path_utils.create_dir_if_not_exists(self.serving_model_dir) if path_utils.is_remote(self.serving_model_dir): temp_model_dir = '__temp_model_dir__' temp_path = os.path.join(os.getcwd(), temp_model_dir) if path_utils.is_dir(temp_path): raise PermissionError('{} is used as a temp path but it ' 'already exists. Please remove it to ' 'continue.') trainer.save_checkpoint(os.path.join(temp_path, 'model.cpkt')) path_utils.copy_dir(temp_path, self.serving_model_dir) path_utils.rm_dir(temp_path) else: trainer.save_checkpoint( os.path.join(self.serving_model_dir, 'model.ckpt'))
def run_fn(self): train_dataset = self.input_fn(self.train_files, self.tf_transform_output) eval_dataset = self.input_fn(self.eval_files, self.tf_transform_output) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = self.model_fn(train_dataset, eval_dataset) model.to(device) criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) writer = SummaryWriter(self.log_dir) model.train() total_count = 0 for e in range(1, self.epochs + 1): epoch_loss = 0 epoch_acc = 0 step_count = 0 for x, y, _ in train_dataset: step_count += 1 total_count += 1 x_batch = torch.cat([v.to(device) for v in x.values()], dim=-1) y_batch = torch.cat([v.to(device) for v in y.values()], dim=-1) optimizer.zero_grad() y_pred = model(x_batch) loss = criterion(y_pred, y_batch) acc = binary_acc(y_pred, y_batch) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_acc += acc.item() if e == 1 and step_count == 1: writer.add_graph(model, x_batch) writer.add_scalar('training_loss', loss, total_count) writer.add_scalar('training_accuracy', acc, total_count) print(f'Epoch {e + 0:03}: | Loss: ' f'{epoch_loss / step_count:.5f} | Acc: ' f'{epoch_acc / step_count:.3f}') # test test_results = self.test_fn(model, eval_dataset) utils.save_test_results(test_results, self.test_results) path_utils.create_dir_if_not_exists(self.serving_model_dir) if path_utils.is_remote(self.serving_model_dir): temp_model_dir = '__temp_model_dir__' temp_path = os.path.join(os.getcwd(), temp_model_dir) if path_utils.is_dir(temp_path): raise PermissionError('{} is used as a temp path but it ' 'already exists. Please remove it to ' 'continue.') torch.save(model, temp_path) path_utils.copy_dir(temp_path, self.serving_model_dir) path_utils.rm_dir(temp_path) else: torch.save(model, os.path.join(self.serving_model_dir, 'model.pt'))