def main(): news_sites = {'m24.ru': M24_accidents, 'mosday.ru': Mosday_accidents, 'vm.ru': VM_accidents} # Инициализируем Наташу morph_vocab = MorphVocab() extractor = AddrExtractor(morph_vocab) # Ищем новости, проверяем на наличие адресов, загружаем # во временное хранилище news_list = [] for key in news_sites.keys(): try: ScrapeClass = news_sites.get(key) source = ScrapeClass() rec = get_news(source, extractor) news_list += rec except (TypeError): print("Источник {} недоступен.".format(key)) for item in news_list: published = item['time'] + ' ' + item['date'] published = datetime.strptime(published, '%H:%M %d.%m.%Y') record = News( title=item['title'], link=item['link'], date_and_time=published, text=item['text'], address=item['location']['address'], street=item['location']['street'], lat=item['location']['coordinates'][0], lon=item['location']['coordinates'][1] ) record_in_db = News.query.filter_by(link=item['link']).first() if record_in_db: continue else: db.session.add(record) db.session.commit() '''
from natasha import (Segmenter, MorphVocab, NewsEmbedding, NewsMorphTagger, NewsSyntaxParser, NewsNERTagger, PER, NamesExtractor, DatesExtractor, MoneyExtractor, AddrExtractor, Doc) segmenter = Segmenter() morph_vocab = MorphVocab() emb = NewsEmbedding() morph_tagger = NewsMorphTagger(emb) syntax_parser = NewsSyntaxParser(emb) ner_tagger = NewsNERTagger(emb) names_extractor = NamesExtractor(morph_vocab) dates_extractor = DatesExtractor(morph_vocab) money_extractor = MoneyExtractor(morph_vocab) addr_extractor = AddrExtractor(morph_vocab) data_path = '../data/' # train_file='paraphrases.tsv', test_file='paraphrases_gold.tsv' # train_file='paraphrase_framebank.tsv' def read_all_data(train_file=data_path + 'paraphrases.tsv', test_file=data_path + 'paraphrases_gold.tsv'): with open(train_file, encoding='utf-8') as f: reader = csv.DictReader(f, delimiter='\t') data = list(reader) data = numpy.asarray(data) # numpy.random.seed(123) # numpy.random.shuffle(data)
def addr_extractor(morph_vocab): return AddrExtractor(morph_vocab)
Doc ) <<<<<<< HEAD segmenter = Segmenter() morph_vocab = MorphVocab() emb = NewsEmbedding() morph_tagger = NewsMorphTagger(emb) syntax_parser = NewsSyntaxParser(emb) ner_tagger = NewsNERTagger(emb) names_extractor = NamesExtractor(morph_vocab) dates_extractor = DatesExtractor(morph_vocab) money_extractor = MoneyExtractor(morph_vocab) addr_extractor = AddrExtractor(morph_vocab) ======= >>>>>>> 48053ea7494d126cc00495acaefc6b7b63fe3943 from loguru import logger from classificator import preproccesor from clasterizator import main_patrol_func import sys sys.path.append('./models/tensorflow1/models/research/object_detection') from Object_detection_image import get_image segmenter = Segmenter() morph_vocab = MorphVocab()