def create_predictor( config: PyTextConfig, model_file: Optional[str] = None, db_type: str = CAFFE2_DB_TYPE, task: Optional[NewTask] = None, ) -> Predictor: """ Create a simple prediction API from a training config and an exported caffe2 model file. This model file should be created by calling export on a trained model snapshot. """ workspace_id = str(uuid.uuid4()) workspace.SwitchWorkspace(workspace_id, True) predict_net = predictor_exporter.prepare_prediction_net( filename=model_file or config.export_caffe2_path, db_type=db_type ) new_task = task or NewTask.from_config(config.task) input_tensorizers = { name: tensorizer for name, tensorizer in new_task.data.tensorizers.items() if tensorizer.is_input } return lambda input: _predict( workspace_id, predict_net, new_task.model, input_tensorizers, input )
def create_predictor(config: PyTextConfig, model_file: Optional[str] = None) -> Predictor: """ Create a simple prediction API from a training config and an exported caffe2 model file. This model file should be created by calling export on a trained model snapshot. """ workspace_id = str(uuid.uuid4()) workspace.SwitchWorkspace(workspace_id, True) predict_net = predictor_exporter.prepare_prediction_net( filename=model_file or config.export_caffe2_path, db_type=CAFFE2_DB_TYPE) supportedInputTensorizers = [ FloatListTensorizer, GazetteerTensorizer, TokenTensorizer, ] new_task = NewTask.from_config(config.task) input_tensorizers = { name: tensorizer for name, tensorizer in new_task.data.tensorizers.items() if any( isinstance(tensorizer, t) for t in supportedInputTensorizers) } return lambda input: _predict(workspace_id, predict_net, new_task.model, input_tensorizers, input)
def train( self, training_data: DataLoader, eval_data: DataLoader, model: Model, optimizer: Optimizer, label_names: List[str], scheduler: Scheduler = None, sparsifier: Sparsifier = None, metric_reporter: MetricReporter = None, train_config: PyTextConfig = None, rank: int = 0, ) -> Tuple[torch.nn.Module, Any]: # temp workaround to minimize changes to TaskTrainer if not train_config: train_config = PyTextConfig( task=NewTask.Config(model=RoBERTa.Config), version=20) if scheduler: self.scheduler = scheduler if sparsifier: self.sparsifier = sparsifier state = TrainingState( model=model, optimizer=optimizer, scheduler=self.scheduler, sparsifier=self.sparsifier, rank=rank, ) metric_reporter_config = ClassificationMetricReporter.Config( output_path="/tmp/test_out.txt", pep_format=False, model_select_metric=ComparableClassificationMetric. ACCURACY, # in json: "accuracy" target_label=None, text_column_names=["text"], additional_column_names=[], recall_at_precision_thresholds=[0.2, 0.4, 0.6, 0.8, 0.9], ) metric_reporter = ClassificationMetricReporter.from_config_and_label_names( config=metric_reporter_config, label_names=label_names) return self.train_from_state(state, training_data, eval_data, metric_reporter, train_config)
def create_predictor( config: PyTextConfig, model_file: Optional[str] = None, db_type: str = CAFFE2_DB_TYPE, task: Optional[NewTask] = None, cache_size: int = 0, ) -> Predictor: """ Create a simple prediction API from a training config and an exported caffe2 model file. This model file should be created by calling export on a trained model snapshot. """ workspace_id = str(uuid.uuid4()) workspace.SwitchWorkspace(workspace_id, True) predict_net = predictor_exporter.prepare_prediction_net( filename=model_file or PathManager.get_local_path(config.export_caffe2_path), db_type=db_type, ) new_task = task or NewTask.from_config(config.task) input_tensorizers = { name: tensorizer for name, tensorizer in new_task.data.tensorizers.items() if tensorizer.is_input } def predict_fn(input): return _predict(workspace_id, predict_net, new_task.model, input_tensorizers, input) if cache_size < 0: return lru_cache(maxsize=None)(predict_fn) elif cache_size > 0: return lru_cache(maxsize=cache_size)(predict_fn) else: return predict_fn