def test_update(nlp: TorchLanguage): texts = ["This is a test sentence to check\u3000model.update!"] labels = [{}] pipe: TrfModel = nlp.get_pipe(TRANSFORMERS_MODEL) optimizer = nlp.resume_training() eps = 1e-5 def sum_param(params): return sum(p.sum().item() for p in params) def train(): docs, golds = nlp._format_docs_and_golds(texts, labels) before = sum_param(pipe.optim_parameters()) nlp._update_pipes(docs, golds) h = get_last_hidden_state_from_docs(docs) loss = h.sum() + torch.tensor(0.0, requires_grad=True) add_loss_to_docs(docs, loss) nlp._update_params(docs, optimizer) return abs(before - sum_param(pipe.optim_parameters())) assert train() > eps # freeze model pipe.cfg["freeze"] = True assert train() < eps # restore freeze state pipe.cfg["freeze"] = False
def evaluate(cfg: Config, nlp: TorchLanguage, val_data: InputData) -> Dict: try: scores = nlp.evaluate(val_data, batch_size=cfg.nbatch * 2) except Exception: report_fail(val_data) raise return scores
def create_lang(cfg: LangConfig) -> Language: kwargs = cfg.kwargs or {} kwargs = (OmegaConf.to_container(kwargs) if isinstance( kwargs, omegaconf.Config) else kwargs) if cfg.torch: kwargs["meta"] = merge(kwargs.get("meta", {}), {"lang": cfg.name}) return TorchLanguage(True, optimizer_config=cfg.optimizer, **kwargs) return spacy.blank(cfg.name, **kwargs)
def train_epoch( cfg: Config, nlp: TorchLanguage, optim: Optimizer, train_data: InputData, val_data: InputData, epoch: int, eval_fn: EvalFn, ) -> None: for j, batch in enumerate(minibatch(train_data, size=cfg.nbatch)): texts, golds = zip(*batch) try: nlp.update(texts, golds, optim, verbose=True) except Exception: report_fail(batch) raise logger.info(f"epoch {epoch} {j*cfg.nbatch}/{cfg.data.ndata}")
def evaluate_textcat(cfg: Config, nlp: TorchLanguage, val_data: InputData) -> Dict: # TODO: https://github.com/explosion/spaCy/pull/4664 texts, golds = cast(Tuple[Tuple[str], Dict], zip(*val_data)) try: y = np.array(list(map(lambda x: goldcat_to_label(x["cats"]), golds))) docs = list(nlp.pipe(texts, batch_size=cfg.nbatch * 2)) preds = np.array([doc._.get(TOP_LABEL) for doc in docs]) except Exception: report_fail(val_data) raise return classification_report(y, preds, output_dict=True)
def train( cfg: Config, nlp: TorchLanguage, train_data: InputData, val_data: InputData, savedir: Path, ) -> None: eval_fn = EVAL_FN_MAP[cfg.task] optim = nlp.resume_training() scheduler = load_scheduler(cfg, optim) for i in range(cfg.niter): random.shuffle(train_data) train_epoch(cfg, nlp, optim, train_data, val_data, i, eval_fn) scheduler.step( ) # type: ignore # (https://github.com/pytorch/pytorch/pull/26531) scores = eval_fn(cfg, nlp, val_data) nlp.meta.update({ "score": scores, "config": OmegaConf.to_container(cfg) }) save_model(nlp, savedir / str(i))
def test(nlp: TorchLanguage, data): if torch.cuda.is_available(): nlp.to(torch.device("cuda")) nlp.evaluate(data, batch_size=256)
def nlp(): return TorchLanguage(meta={"lang": "en"})
def torch_lang(): return TorchLanguage()