Пример #1
0
def create_predictor(
    config: PyTextConfig,
    model_file: Optional[str] = None,
    db_type: str = CAFFE2_DB_TYPE,
    task: Optional[NewTask] = None,
) -> Predictor:
    """
    Create a simple prediction API from a training config and an exported caffe2
    model file. This model file should be created by calling export on a trained
    model snapshot.
    """
    workspace_id = str(uuid.uuid4())
    workspace.SwitchWorkspace(workspace_id, True)
    predict_net = predictor_exporter.prepare_prediction_net(
        filename=model_file or config.export_caffe2_path, db_type=db_type
    )

    new_task = task or NewTask.from_config(config.task)
    input_tensorizers = {
        name: tensorizer
        for name, tensorizer in new_task.data.tensorizers.items()
        if tensorizer.is_input
    }
    return lambda input: _predict(
        workspace_id, predict_net, new_task.model, input_tensorizers, input
    )
Пример #2
0
def create_predictor(config: PyTextConfig,
                     model_file: Optional[str] = None) -> Predictor:
    """
    Create a simple prediction API from a training config and an exported caffe2
    model file. This model file should be created by calling export on a trained
    model snapshot.
    """
    workspace_id = str(uuid.uuid4())
    workspace.SwitchWorkspace(workspace_id, True)
    predict_net = predictor_exporter.prepare_prediction_net(
        filename=model_file or config.export_caffe2_path,
        db_type=CAFFE2_DB_TYPE)

    supportedInputTensorizers = [
        FloatListTensorizer,
        GazetteerTensorizer,
        TokenTensorizer,
    ]
    new_task = NewTask.from_config(config.task)
    input_tensorizers = {
        name: tensorizer
        for name, tensorizer in new_task.data.tensorizers.items() if any(
            isinstance(tensorizer, t) for t in supportedInputTensorizers)
    }

    return lambda input: _predict(workspace_id, predict_net, new_task.model,
                                  input_tensorizers, input)
Пример #3
0
    def train(
        self,
        training_data: DataLoader,
        eval_data: DataLoader,
        model: Model,
        optimizer: Optimizer,
        label_names: List[str],
        scheduler: Scheduler = None,
        sparsifier: Sparsifier = None,
        metric_reporter: MetricReporter = None,
        train_config: PyTextConfig = None,
        rank: int = 0,
    ) -> Tuple[torch.nn.Module, Any]:
        # temp workaround to minimize changes to TaskTrainer
        if not train_config:
            train_config = PyTextConfig(
                task=NewTask.Config(model=RoBERTa.Config), version=20)
        if scheduler:
            self.scheduler = scheduler
        if sparsifier:
            self.sparsifier = sparsifier

        state = TrainingState(
            model=model,
            optimizer=optimizer,
            scheduler=self.scheduler,
            sparsifier=self.sparsifier,
            rank=rank,
        )
        metric_reporter_config = ClassificationMetricReporter.Config(
            output_path="/tmp/test_out.txt",
            pep_format=False,
            model_select_metric=ComparableClassificationMetric.
            ACCURACY,  # in json: "accuracy"
            target_label=None,
            text_column_names=["text"],
            additional_column_names=[],
            recall_at_precision_thresholds=[0.2, 0.4, 0.6, 0.8, 0.9],
        )
        metric_reporter = ClassificationMetricReporter.from_config_and_label_names(
            config=metric_reporter_config, label_names=label_names)
        return self.train_from_state(state, training_data, eval_data,
                                     metric_reporter, train_config)
Пример #4
0
def create_predictor(
    config: PyTextConfig,
    model_file: Optional[str] = None,
    db_type: str = CAFFE2_DB_TYPE,
    task: Optional[NewTask] = None,
    cache_size: int = 0,
) -> Predictor:
    """
    Create a simple prediction API from a training config and an exported caffe2
    model file. This model file should be created by calling export on a trained
    model snapshot.
    """
    workspace_id = str(uuid.uuid4())
    workspace.SwitchWorkspace(workspace_id, True)
    predict_net = predictor_exporter.prepare_prediction_net(
        filename=model_file
        or PathManager.get_local_path(config.export_caffe2_path),
        db_type=db_type,
    )

    new_task = task or NewTask.from_config(config.task)
    input_tensorizers = {
        name: tensorizer
        for name, tensorizer in new_task.data.tensorizers.items()
        if tensorizer.is_input
    }

    def predict_fn(input):
        return _predict(workspace_id, predict_net, new_task.model,
                        input_tensorizers, input)

    if cache_size < 0:
        return lru_cache(maxsize=None)(predict_fn)
    elif cache_size > 0:
        return lru_cache(maxsize=cache_size)(predict_fn)
    else:
        return predict_fn