def main(): from rita.utils import RitaJSONEncoder parser = argparse.ArgumentParser( description="Compile rita -> spaCy patterns" ) parser.add_argument("-f", help=".rita rules file") parser.add_argument( "out", help="output .jsonl file to store rules" ) parser.add_argument("--debug", help="debug mode", action="store_true") parser.add_argument("--engine", help="Engine to use when compiling rules", default="spacy") args = parser.parse_args() if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) patterns = rita.compile(args.f, use_engine=args.engine) logger.info("Compiling rules using {} engine".format(args.engine)) with open(args.out, "w") as f: for pattern in patterns: f.write(json.dumps(pattern, cls=RitaJSONEncoder) + "\n")
def _spacy_v3(model, patterns=None, rules_path=None, rules_string=None, override_ents=True): ruler = model.add_pipe("entity_ruler", config={"overwrite_ents": override_ents, "validate": True}) if not patterns: if rules_path: patterns = rita.compile(rules_path, use_engine="spacy") elif rules_string: patterns = rita.compile_string(rules_string, use_engine="spacy") else: raise RuntimeError("Please provides rules. Either `patterns`, `rules_path` or `rules_string`") ruler.add_patterns(patterns) else: ruler.from_disk(patterns) return model
def test_shortcuts_spacy_compiled(): spacy = pytest.importorskip("spacy", minversion="2.1") nlp = spacy.load("en_core_web_sm") tmp = tempfile.NamedTemporaryFile(mode="w", encoding="UTF-8", suffix=".jsonl", delete=False) patterns = rita.compile("examples/color-car.rita") for pattern in patterns: tmp.write(json.dumps(pattern) + "\n") tmp.flush() tmp.close() setup_spacy(nlp, patterns=tmp.name) os.unlink(tmp.name)
def _spacy_v2(model, patterns=None, rules_path=None, rules_string=None, override_ents=True): from spacy.pipeline import EntityRuler ruler = EntityRuler(model, overwrite_ents=override_ents) if not patterns: if rules_path: patterns = rita.compile(rules_path, use_engine="spacy") elif rules_string: patterns = rita.compile_string(rules_string, use_engine="spacy") else: raise RuntimeError("Please provides rules. Either `patterns`, `rules_path` or `rules_string`") ruler.add_patterns(patterns) else: ruler.from_disk(patterns) model.add_pipe(ruler) return model