def get_locations(text):
    extractor = LocationExtractor()
    matches = list(filter(lambda m: m.fact.name != "россия", extractor(text)))
    locations = list(map(lambda m: m.fact.name, matches))
    uniq = []
    for k in locations:
        uniq = uniq + k.split()
    return list(set(uniq))
Exemplo n.º 2
0
    def get_location(query):
        """Получение города из реплики. """
        loc_extractor = LocationExtractor()
        city = loc_extractor(query)
        try:
            fact_city = city[0].fact.as_json
        except IndexError:
            raise LocationExtractorException("Location Extractor Error")

        loc = format_json(fact_city)
        loc = re.findall(r"\: \"(\w+)", loc)
        return loc
Exemplo n.º 3
0
def get_extractor(extract_type):
    if extract_type == "name":
        #Экстрактор имён
        return NamesExtractor()
    elif extract_type == "location":
        #Экстрактор мест
        return LocationExtractor()
    elif extract_type == "date":
        #Экстрактор дат
        return DatesExtractor()
    elif extract_type == "money":
        #Экстрактор денежных сумм
        return MoneyExtractor()
Exemplo n.º 4
0
def add_place(all_news):
    extractor = LocationExtractor()
    for i in range(len(all_news)):
        text = ' '.join([all_news[i]['title'], all_news[i]['article']])
        matches = extractor(text)
        all_news[i].update({'place': ''})
        if len(matches.as_json) > 0:
            places = []
            for match in matches.as_json:
                places.append(match['fact']['name'])
            for j in range(len(places)):
                places[j] = places[j].title()
            all_news[i].update({'place': ','.join(places)})
    return all_news
Exemplo n.º 5
0
def natasha_res(text):
    names_extractor = NamesExtractor()
    location_extractor = LocationExtractor()
    organisation_extractor = OrganisationExtractor()
    dates_extractor = DatesExtractor()
    money_extractor = MoneyExtractor()
    names_mapper = lambda x: text[x.span[0]:x.span[1]]
    location_mapper = names_mapper
    org_mapper = names_mapper
    money_mapper = names_mapper
    res = {
        'names': set(map(names_mapper, names_extractor(text))),
        'locations': set(map(location_mapper, location_extractor(text))),
        'organisations': set(map(org_mapper, organisation_extractor(text))),
        'dates': set(map(dates_mapper, dates_extractor(text))),
        'money': set(map(money_mapper, money_extractor(text))),
    }
    return res
Exemplo n.º 6
0
    def __init__(self):
        from natasha import (NamesExtractor, SimpleNamesExtractor,
                             DatesExtractor, MoneyExtractor,
                             MoneyRateExtractor, MoneyRangeExtractor,
                             LocationExtractor, AddressExtractor,
                             OrganisationExtractor, PersonExtractor)

        addr_ex = AddressExtractor()
        date_ex = DatesExtractor()
        loc_ex = LocationExtractor()
        money_ex = MoneyExtractor()
        money_range_ex = MoneyRangeExtractor()
        money_rate_ex = MoneyRateExtractor()
        name_ex = SimpleNamesExtractor()
        org_ex = OrganisationExtractor()
        person_ex = PersonExtractor()
        # extractors=[addr_ex, date_ex, loc_ex, money_ex, money_range_ex, money_rate_ex,
        #            name_ex, org_ex, person_ex]
        self.extractors = [
            addr_ex, date_ex, loc_ex, money_ex, money_range_ex, money_rate_ex,
            org_ex, person_ex
        ]
Exemplo n.º 7
0
def send_db_pool_map(doc):
    extractors = [AddressExtractor(), DatesExtractor(), LocationExtractor(), MoneyExtractor(), NamesExtractor(),
                  OrganisationExtractor(), PersonExtractor()]

    pool_local = ThreadPool(10)
    ne_full = []
    span_ne_full = []
    type_ne_full = []
    try:
        for extr in extractors:
            global text_paper
            text_paper = doc
            matches = extr(text_paper)
            ne = pool_local.starmap(get_ne2, zip(matches,[doc for x in range(len(matches))]))
            span_ne = pool_local.map(get_span_ne, matches)
            type_ne = pool_local.map(get_type_ne, matches)

            ne_full.append(ne)
            span_ne_full.append(span_ne)
            type_ne_full.append(type_ne)
    except:
        print('Ошибка! Примерный номер =', '?')
    pool_local.close()
    pool_local.join()
    if len(ne_full) != 0:
        ne_for_db = reduce(lambda x, y: x + y, ne_full)
        span_ne_for_db = reduce(lambda x, y: x + y, span_ne_full)
        type_ne_for_db = reduce(lambda x, y: x + y, type_ne_full)
        '''if len(ne_for_db) != 0:
            cur.execute('UPDATE public.news_rbc '
                        'SET ne=%s, span_ne=%s, type_ne=%s'
                        'WHERE id=%s;', (ne_for_db, span_ne_for_db,
                                         type_ne_for_db, num))
            con.commit()'''
        return [ne_for_db, span_ne_for_db, type_ne_for_db]
    else:
        return [0, 0, 0]
Exemplo n.º 8
0
                     MoneyExtractor, LocationExtractor)

app = Flask(__name__)

# todo: LocationExtractor работает плохо, его надо настравиать,
# todo: но он умеет находить города, страны и регионы.
# todo: AddressExtractor лучше находит и представляет города,
# todo: но не находит страны и регионы. Он находит улицы и дома,
# todo: нужно исключить их из выдачи и объединить результаты с
# todo: результатами LocationExtractor-а.

names_extractor = NamesExtractor()
address_extractor = AddressExtractor()
dates_extractor = DatesExtractor()
money_extractor = MoneyExtractor()
location_extractor = LocationExtractor()


def find_named_entities(ner_extractor, text):
    """Находит именованные сущности в тексте.

    :param ner_extractor: объект класса NamesExtractor, AddressExtractor,
    DatesExtractor или MoneyExtractor
    :param text: str
    :return: list of namedtuples
    """
    matches = ner_extractor(text)
    return [_.fact.as_json for _ in matches]


def get_response(text):
Exemplo n.º 9
0
        '''if len(ne_for_db) != 0:
            cur.execute('UPDATE public.news_rbc '
                        'SET ne=%s, span_ne=%s, type_ne=%s'
                        'WHERE id=%s;', (ne_for_db, span_ne_for_db,
                                         type_ne_for_db, num))
            con.commit()'''
        return [ne_for_db, span_ne_for_db, type_ne_for_db]
    else:
        return [0, 0, 0]



if __name__ == '__main__':
    time_begin = time()
    # экстракторы
    extractors = [AddressExtractor(), DatesExtractor(), LocationExtractor(), MoneyExtractor(), NamesExtractor(),
                  OrganisationExtractor(), PersonExtractor()]
    send_to_db_news_rbc(extractors) # 292.92 секунды
    # ошибки: 6911;7168;7561;8246;8539;8691;9211
    exit()
    '''con = psycopg2.connect(database="texts_politics", user="******", password="******", host="127.0.0.1",
                           port="5432", )
    cur = con.cursor()
    pool = ThreadPool(10) # было ошибок 8  - 2459? 2779 = []
    for i in tqdm(range(5059,9540,10)): # 296.92 секунды # с3347по3357 не делал
        # обработало 5839 строк, из них 120 строк не обработаных
        cur.execute("SELECT full_text FROM public.news_rbc ORDER BY id ASC LIMIT 10 OFFSET %s", (i,))
        data = cur.fetchall()
        docs = [x[0] for x in data]
        #new_form = pool.starmap(send_db_pool_map, zip(docs,[i_num for i_num in range(i,i+10)]))
        new_form = pool.map(send_db_pool_map,docs) # 281.43 секунды | 293.59
Exemplo n.º 10
0
from ipymarkup import AsciiMarkup, Span, BoxMarkup
import re
import json

from natasha import (NamesExtractor, AddressExtractor, DatesExtractor,
                     MoneyExtractor, OrganisationExtractor, LocationExtractor)
from natasha.markup import show_markup, show_json

extractors = [
    NamesExtractor(),
    AddressExtractor(),
    DatesExtractor(),
    MoneyExtractor(),
    OrganisationExtractor(),
    LocationExtractor()
]

from flask import Flask
from flask import request

app = Flask(__name__)


@app.route('/getFacts', methods=['POST'])
def getFacts():
    print(request.is_json)
    content = request.get_json()

    text = content['text']

    facts = {}
Exemplo n.º 11
0
'''
Если ничего не будет работать, это нас спасет
'''
from natasha import NamesExtractor
from natasha import LocationExtractor
from natasha import OrganisationExtractor
from natasha import DatesExtractor
from natasha import AddressExtractor


names_extr = NamesExtractor()
locs_extr = LocationExtractor()
org_extr = OrganisationExtractor()
dates_extr = DatesExtractor()
address_extr = AddressExtractor()


def recognize_names(text):
    tmp = text
    matches = names_extr(text)
    for match in matches:
        start, finish = match.span
        tmp = tmp.replace(text[start:finish], "[NAME]")
    return tmp


def recognize_locs(text):
    tmp = text
    matches = locs_extr(text)
    for match in matches:
        start, finish = match.span
def get_Locations(text):
    extractor = LocationExtractor()
    matches = extractor(text)
    return list(map(lambda m: Location(m.fact.name), matches))
Exemplo n.º 13
0
def extractor():
    return LocationExtractor()