def init_agent(self): agent_config = self.config['kpis'][self.kpi_name]['settings_agent'] model_dir = agent_config['model_dir'] model_file = agent_config['model_file'] dict_file = agent_config['dict_file'] embedding_file = agent_config['embedding_file'] update_model = agent_config['update_model'] if update_model: download_url = agent_config['model_dowload_url'] download_path = model_dir download_untar(download_url, download_path) params_path = os.path.join(model_dir, 'params.json') with open(params_path) as f: network_params = json.load(f) model_path = os.path.join(model_dir, model_file) dict_path = os.path.join(model_dir, dict_file) embeddingg_path = os.path.join(model_dir, embedding_file) corpus = Corpus(dicts_filepath=dict_path, embeddings_file_path=embeddingg_path) network = NER(corpus, pretrained_model_filepath=model_path, **network_params) self.agent = network
def init_agent(self): model_dir = self.config['kpis'][self.kpi_name]['settings_agent']['model_dir'] update_model = bool(self.config['kpis'][self.kpi_name]['settings_agent']['update_model']) if update_model: glob_arg = os.path.join('{}/*'.format(model_dir)) if md5_hashsum(glob(glob_arg)) != 'f25fe8e1297154077fc4d3bf65ed888e': download_url = 'http://lnsigo.mipt.ru/export/ner/ner_model_total_rus.tar.gz' download_path = model_dir download_untar(download_url, download_path) params_path = os.path.join(model_dir, 'params.json') with open(params_path) as f: network_params = json.load(f) dicts_path = os.path.join('model/dict.txt') corpus = Corpus(dicts_filepath=dicts_path) network = NER(corpus, verbouse=False, pretrained_model_filepath='model/ner_model', **network_params) self.agent = network
def __init__(self, model_path=None, tokenizer=None, model_url='http://lnsigo.mipt.ru/export/models/ner/ner_model_total_rus.tar.gz'): self.model_path = ( model_path or pkg_resources.resource_filename(__name__, "../model") ) self.model_url = model_url self._lazy_download() with open(self._get_path('params.json')) as f: self.network_params = json.load(f) self.corpus = Corpus(dicts_filepath=self._get_path('dict.txt')) self.network = NER( self.corpus, verbouse=False, pretrained_model_filepath=self._get_path('ner_model'), **self.network_params) self.tokenizer = tokenizer or Tokenizer() self._morph = pymorphy2.MorphAnalyzer()
# Just run something like command below in terminal # echo "На конспирологическом саммите в США глава Федерального Бюро Расследований сделал невероятное заявление" | python3 ner.py os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Check existence of the model by hashsum if md5_hashsum(glob('model_/*')) != 'f25fe8e1297154077fc4d3bf65ed888e': # Download and extract model download_url = 'http://lnsigo.mipt.ru/export/models/ner/conll_ner.tar.gz' download_path = 'model/' # download_untar(download_url, download_path) # Load network params with open('model/params.json') as f: network_params = json.load(f) corpus = Corpus(dicts_filepath='model/dict.txt', embeddings_file_path='model/glove.6B.100d.txt') network = NER(corpus, pretrained_model_filepath='model/model.ckpt', **network_params) def print_predict(sentence): # Split sentence into tokens tokens = tokenize(sentence) # Lemmatize every token # Example: был -> быть, его -> он # tokens_lemmas = lemmatize(tokens) tags = network.predict_for_token_batch([tokens])[0]
annotated_corpus = AnnotatedCorpus( allowed_pos=['amod', 'nmod', 'nsubj', 'compound', 'conj'], data_fn=args.corpus) selected = [ fr for fr in annotated_corpus.selected_frames if any([el in fr for el in eval_mapping.keys()]) ] print(selected) selected = annotated_corpus.selected_frames print('Creating corpus in', args.data_dir) annotated_corpus.get_corpus_srl_iob(args.data_dir, train_set, args.train_size, selected=selected) dataset_dict = prepare_data_dict(args.data_dir) corpus = Corpus(dataset_dict, embeddings_file_path=None) print_dataset(dataset_dict) net = NER(corpus, **model_params) learning_params = { 'dropout_rate': args.dropout, 'epochs': 10, 'learning_rate': 0.005, 'batch_size': 8, 'learning_rate_decay': 0.707, 'model_file_path': args.model_dir } results = net.fit(**learning_params) else: dialogue_dataset = Dataset(saved_dialogues=args.dataset_path) total = 0 with open(os.path.join(args.exp_dir, 'test_set'), 'rb') as f:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Check existence of the model by hashsum if md5_hashsum(sorted(glob('model/*'))) != 'fd50a27b96b24cdabdda13795a3baae7': # Download and extract model download_url = 'http://lnsigo.mipt.ru/export/models/ner/ner_model_total_rus.tar.gz' download_path = 'model/' download_untar(download_url, download_path) # Load network params with open('model/params.json') as f: network_params = json.load(f) corpus = Corpus(dicts_filepath='model/dict.txt') network = NER(corpus, verbouse=False, pretrained_model_filepath='model/ner_model', **network_params) def print_predict(sentence): # Split sentence into tokens tokens = tokenize(sentence) # Lemmatize every token # Example: был -> быть, его -> он tokens_lemmas = lemmatize(tokens) tags = network.predict_for_token_batch([tokens_lemmas])[0] for token, tag in zip(tokens, tags): print(token, tag)