def label_chunk_file(file_to_label: Path, output_file: Path): r""" Perform NER on file \p file_to_label and write to \p output_file """ label_corpus = Corpus(file_to_label) label_corpus.fit_features() data_path = file_to_label.with_suffix(".dat_chunk") label_corpus.export(data_path) output_file.parent.mkdir(parents=True, exist_ok=True) label_with_maxent(data_path=data_path, model_path=MODEL_PATH, output_file=output_file)
def build_model(train_path: Path): r""" Construct the learner model """ train_corpus = Corpus(train_path) train_corpus.fit_features() data_path = train_path.with_suffix(".dat_name") train_corpus.export(data_path) if MODEL_PATH.exists(): MODEL_PATH.unlink() # Delete the existing model MODEL_PATH.parent.mkdir(exist_ok=True, parents=True) train_maxent_model(data_path=data_path, model_path=MODEL_PATH)