Exemplo n.º 1
0
# -*- coding: utf-8 -*-

from datetime import datetime
from os import listdir
from os.path import isfile, join
from concurrent import futures

from epoms.db import *
from epoms.entity_extract import EntityExtract


INDEX_NAME  = 'epoms'
TIMEOUT     = 300
MAX_WORKER  = 1

config   = EPOMSConfig()
en = EntityExtract()

news = (News().select())

for n in news:
    print '>> Extracting Entity [%5d]' % ( n.id )
    try:
        names = en.extract_name( n.content )
        keys = names.keys()
        for i in keys:
            for j in range(names[i]):
                print i
    except Exception as exc:
        pass
Exemplo n.º 2
0
from concurrent import futures

from epoms.db import *
from epoms.entity_extract import EntityExtract

from nltk.tokenize import sent_tokenize

import sys
import re

INDEX_NAME = "epoms"
TIMEOUT = 300
MAX_WORKER = 1

config = EPOMSConfig()
en = EntityExtract()


mode = sys.argv[1]
# 1 = extract name from one sentence
# all = whole text

news = News().select()


def merge_sentence(sentences, mode):
    start = 0
    length = len(sentences)

    if mode == "all":
        mode = length