示例#1
0
    def init_agent(self):
        agent_config = self.config['kpis'][self.kpi_name]['settings_agent']
        model_dir = agent_config['model_dir']
        model_file = agent_config['model_file']
        dict_file = agent_config['dict_file']
        embedding_file = agent_config['embedding_file']

        update_model = agent_config['update_model']

        if update_model:
            download_url = agent_config['model_dowload_url']
            download_path = model_dir
            download_untar(download_url, download_path)

        params_path = os.path.join(model_dir, 'params.json')
        with open(params_path) as f:
            network_params = json.load(f)

        model_path = os.path.join(model_dir, model_file)
        dict_path = os.path.join(model_dir, dict_file)
        embeddingg_path = os.path.join(model_dir, embedding_file)

        corpus = Corpus(dicts_filepath=dict_path,
                        embeddings_file_path=embeddingg_path)
        network = NER(corpus,
                      pretrained_model_filepath=model_path,
                      **network_params)
        self.agent = network
示例#2
0
    def init_agent(self):
        model_dir = self.config['kpis'][self.kpi_name]['settings_agent']['model_dir']
        update_model = bool(self.config['kpis'][self.kpi_name]['settings_agent']['update_model'])

        if update_model:
            glob_arg = os.path.join('{}/*'.format(model_dir))
            if md5_hashsum(glob(glob_arg)) != 'f25fe8e1297154077fc4d3bf65ed888e':
                download_url = 'http://lnsigo.mipt.ru/export/ner/ner_model_total_rus.tar.gz'
                download_path = model_dir
                download_untar(download_url, download_path)

        params_path = os.path.join(model_dir, 'params.json')
        with open(params_path) as f:
            network_params = json.load(f)

        dicts_path = os.path.join('model/dict.txt')
        corpus = Corpus(dicts_filepath=dicts_path)
        network = NER(corpus, verbouse=False, pretrained_model_filepath='model/ner_model', **network_params)
        self.agent = network
示例#3
0
    def __init__(self,
                 model_path=None,
                 tokenizer=None,
                 model_url='http://lnsigo.mipt.ru/export/models/ner/ner_model_total_rus.tar.gz'):
        self.model_path = (
            model_path
            or pkg_resources.resource_filename(__name__, "../model")
        )
        self.model_url = model_url
        self._lazy_download()

        with open(self._get_path('params.json')) as f:
            self.network_params = json.load(f)

        self.corpus = Corpus(dicts_filepath=self._get_path('dict.txt'))
        self.network = NER(
            self.corpus,
            verbouse=False,
            pretrained_model_filepath=self._get_path('ner_model'),
            **self.network_params)

        self.tokenizer = tokenizer or Tokenizer()
        self._morph = pymorphy2.MorphAnalyzer()
示例#4
0
# Just run something like command below in terminal
# echo "На конспирологическом саммите в США глава Федерального Бюро Расследований сделал невероятное заявление" | python3 ner.py

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# Check existence of the model by hashsum
if md5_hashsum(glob('model_/*')) != 'f25fe8e1297154077fc4d3bf65ed888e':
    # Download and extract model
    download_url = 'http://lnsigo.mipt.ru/export/models/ner/conll_ner.tar.gz'
    download_path = 'model/'
    # download_untar(download_url, download_path)

# Load network params
with open('model/params.json') as f:
    network_params = json.load(f)

corpus = Corpus(dicts_filepath='model/dict.txt',
                embeddings_file_path='model/glove.6B.100d.txt')

network = NER(corpus,
              pretrained_model_filepath='model/model.ckpt',
              **network_params)


def print_predict(sentence):
    # Split sentence into tokens
    tokens = tokenize(sentence)

    # Lemmatize every token
    # Example: был -> быть, его -> он
    # tokens_lemmas = lemmatize(tokens)

    tags = network.predict_for_token_batch([tokens])[0]
示例#5
0
         annotated_corpus = AnnotatedCorpus(
             allowed_pos=['amod', 'nmod', 'nsubj', 'compound', 'conj'],
             data_fn=args.corpus)
         selected = [
             fr for fr in annotated_corpus.selected_frames
             if any([el in fr for el in eval_mapping.keys()])
         ]
         print(selected)
         selected = annotated_corpus.selected_frames
         print('Creating corpus in', args.data_dir)
         annotated_corpus.get_corpus_srl_iob(args.data_dir,
                                             train_set,
                                             args.train_size,
                                             selected=selected)
     dataset_dict = prepare_data_dict(args.data_dir)
     corpus = Corpus(dataset_dict, embeddings_file_path=None)
     print_dataset(dataset_dict)
     net = NER(corpus, **model_params)
     learning_params = {
         'dropout_rate': args.dropout,
         'epochs': 10,
         'learning_rate': 0.005,
         'batch_size': 8,
         'learning_rate_decay': 0.707,
         'model_file_path': args.model_dir
     }
     results = net.fit(**learning_params)
 else:
     dialogue_dataset = Dataset(saved_dialogues=args.dataset_path)
     total = 0
     with open(os.path.join(args.exp_dir, 'test_set'), 'rb') as f:
示例#6
0
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# Check existence of the model by hashsum

if md5_hashsum(sorted(glob('model/*'))) != 'fd50a27b96b24cdabdda13795a3baae7':
    # Download and extract model
    download_url = 'http://lnsigo.mipt.ru/export/models/ner/ner_model_total_rus.tar.gz'
    download_path = 'model/'
    download_untar(download_url, download_path)

# Load network params
with open('model/params.json') as f:
    network_params = json.load(f)


corpus = Corpus(dicts_filepath='model/dict.txt')

network = NER(corpus, verbouse=False, pretrained_model_filepath='model/ner_model', **network_params)


def print_predict(sentence):
    # Split sentence into tokens
    tokens = tokenize(sentence)

    # Lemmatize every token
    # Example: был -> быть, его -> он
    tokens_lemmas = lemmatize(tokens)

    tags = network.predict_for_token_batch([tokens_lemmas])[0]
    for token, tag in zip(tokens, tags):
        print(token, tag)