def __init__(self, sql_db_path, surface_cache_path): self.app = Flask(__name__) CORS(self.app) self.mem_cache = ServerCache() self.surface_cache = SurfaceCache(surface_cache_path) self.pipeline = local_pipeline.LocalPipeline() self.runner = ZoeRunner(allow_tensorflow=True) self.runner.elmo_processor.load_sqlite_db(sql_db_path, server_mode=True) signal.signal(signal.SIGINT, self.grace_end)
def produce_surface_cache(db_name, cache_name): pipeline = local_pipeline.LocalPipeline() cache = SurfaceCache(db_name, server_mode=False) runner = ZoeRunner() runner.elmo_processor.load_sqlite_db(cache_name, server_mode=False) dataset = DataReader("data/large_text.json", size=-1, unique=True) counter = 0 total = len(dataset.sentences) for sentence in dataset.sentences: ta = pipeline.doc([sentence.tokens], pretokenized=True) for chunk in ta.get_shallow_parse: new_sentence = Sentence(sentence.tokens, chunk['start'], chunk['end']) runner.process_sentence(new_sentence) cache.insert_cache(new_sentence) progress_bar(counter, total)
def main(args): pipeline = local_pipeline.LocalPipeline() annotator = setup_annotator(args, pipeline=pipeline) test_file = args["test_doc"] out_file = args["out_doc"] print("[#] Test Mentions File : {}".format(test_file)) with open(test_file, 'r') as f: lines = f.read().strip().split("\n") assert len(lines) == 1, "Only support inference for single doc" doctext = lines[0].strip() ta = annotator.inference_on_text(text=doctext) ta_json = ta.as_json json.dump(ta_json, open(out_file, "w"), indent=True)
def main(): # create your model object here, see the DummyModel class for a minimal example. mymodel = ExampleModel() pipeline = local_pipeline.LocalPipeline() annotator = ExampleAnnotator(model=mymodel, pipeline=pipeline, provided_view="DUMMYVIEW", required_views=["TOKENS"]) # Expose wrapper.annotate method through a Flask server app.add_url_rule(rule='/annotate', endpoint='annotate', view_func=annotator.annotate, methods=['GET']) app.run(host='localhost', port=5000)
def main(args): pipeline = local_pipeline.LocalPipeline() annotator = setup_annotator(args=args, pipeline=pipeline) annotator.load_params() # The model should have two methods # 1) method load_params() that loads the relevant model parameters into memory. # 2) method inference_on_ta(docta, new_view_name) that takes a text annotation and view name, # creates the view in the text annotation, and returns it. # See the DummyModel class for a minimal example. app.add_url_rule(rule='/annotate', endpoint='annotate', view_func=annotator.annotate, methods=['GET']) # app.run(host='0.0.0.0', port=8009) app.run(host='0.0.0.0', port=8080)
def get_ta_dir(directory): """ Returns a list of TextAnnotation objects which are instatiated using the serialized json data in the directory parameter. @param directory path to directory with serialized TAs @return tas a list of TextAnnotations """ pipeline = local_pipeline.LocalPipeline() serialized_tas = [join(directory+"/",f) \ for f in listdir(directory) if isfile(join(directory+"/",f))] tas = [] for ser_ta in serialized_tas: with open(ser_ta, mode='r', encoding='utf-8') as f: tas.append(core.text_annotation.TextAnnotation(f.read(), pipeline)) return tas
def __init__(self, sql_db_path, surface_cache_path): self.app = Flask(__name__) CORS(self.app) self.mem_cache = ServerCache() self.surface_cache = SurfaceCache(surface_cache_path) self.pipeline = local_pipeline.LocalPipeline() self.pipeline_initialize_helper(['.']) self.runner = ZoeRunner(allow_tensorflow=True) status = self.runner.elmo_processor.load_sqlite_db(sql_db_path, server_mode=True) if not status: print( "ELMo cache file is not found. Server mode is prohibited without it." ) print( "Please contact the author for this cache, or modify this code if you know what you are doing." ) exit(1) self.runner.elmo_processor.rank_candidates_vec() signal.signal(signal.SIGINT, self.grace_end)
def main(args): pipeline = local_pipeline.LocalPipeline() annotators: List[Annotator] = [] langs = ["es", "zh", "fr", "it", "de"] model_paths = [ "data/saved_models/joint/es.joint.wtype.model", "data/saved_models/joint/zh.joint.wtype.model", "data/saved_models/joint/fr.joint.31.5k_v2.model", "data/saved_models/joint/it.joint.56.5k.10M.model", "data/saved_models/joint/de.joint.20M.99k.w0.4.c0.6.model" ] VOCABPKL = "data/{}wiki/vocab/{}wiki.train.vocab.wiki.en-{}.{}.vec_wiki.en.vec.True.True.5.0.word2idx.pkl" VECPKL = "data/{}wiki/vocab/{}wiki.train.vocab.wiki.en-{}.{}.vec_wiki.en.vec.True.True.5.0.embeddings.pkl" COHPATH = "data/{}wiki/combined_coh/en{}.coh1M" for lang, model_path in zip(langs, model_paths): vocab_pkl = VOCABPKL.format(lang, lang, lang, lang) vec_pkl = VECPKL.format(lang, lang, lang, lang) coh_path = COHPATH.format(lang, lang) args["lang"] = lang args["vocabpkl"] = vocab_pkl args["vecpkl"] = vec_pkl args["cohstr"] = coh_path args["restore"] = model_path args["filter_sizes"] = "5" annotator = setup_annotator(args=args, pipeline=pipeline) # print(model.lang) annotator.load_params() annotators.append(annotator) # The model should have two methods # 1) method load_params() that loads the relevant model parameters into memory. # 2) method inference_on_ta(docta, new_view_name) that takes a text annotation and view name, # creates the view in the text annotation, and returns it. # See the DummyModel class for a minimal example. # wrapper = MultiModelWrapperServerLocal(models=models) multi_annotator = MultiAnnotator(annotators=annotators) app.add_url_rule(rule='/annotate', endpoint='annotate', view_func=multi_annotator.annotate, methods=['GET']) app.run(host='0.0.0.0', port=8009)
# -*- coding: utf8 -*- import unittest import os from ccg_nlpy import local_pipeline if os.path.exists('annotation-cache'): os.remove('annotation-cache') lp = local_pipeline.LocalPipeline() class TestLocalPipeline(unittest.TestCase): def setUp(self): self.lp = lp def test_doc(self): ta = self.lp.doc("Hello, how are you.\n\n\n I am doing fine") tokens = [ 'Hello', ',', 'how', 'are', 'you', '.', 'I', 'am', 'doing', 'fine' ] self.assertEqual(ta.get_tokens, tokens) testarr = [6, 10] self.assertEqual(ta.get_sentence_end_token_indices, testarr) self.assertEqual(ta.get_score, 1.0) self.assertEqual(ta.get_text, "Hello, how are you.\n\n\n I am doing fine") def test_doc_illigal_characters(self):
def get_pipeline_instance(self): return local_pipeline.LocalPipeline()
def __init__(self, ontonotes=True): super().__init__() self.connl = not ontonotes self.ontonotes = ontonotes self.pipeline = local_pipeline.LocalPipeline() self.add_detector(self.annotate)