Exemplo n.º 1
0
def setUp():
    """
    set up things we need for the tests
    """
    global z, tagger

    assert 'ZPAR_MODEL_DIR' in os.environ

    model_dir = os.environ['ZPAR_MODEL_DIR']

    z = ZPar(model_dir)
    tagger = z.get_tagger()
Exemplo n.º 2
0
def setUp():
    """
    set up things we need for the tests
    """
    global z, tagger

    assert 'ZPAR_MODEL_DIR' in os.environ

    model_dir = os.environ['ZPAR_MODEL_DIR']

    z = ZPar(model_dir)
    tagger = z.get_tagger()
Exemplo n.º 3
0
class StoppableServer(_baseclass):

    allow_reuse_address = True

    def __init__(self, addr, zpar_model_path, model_list, *args, **kwds):

        # store the hostname and port number
        self.myhost, self.myport = addr

        # store the link to the loaded zpar object
        self.z = ZPar(zpar_model_path)

        # initialize the parent class
        _baseclass.__init__(self, addr, *args, **kwds)

        # Call the individual loading functions
        # and only register the appropriate methods
        if 'tagger' in model_list:
            tagger = self.z.get_tagger()
            self.register_function(tagger.tag_sentence)
            self.register_function(tagger.tag_file)
        if 'parser' in model_list:
            parser = self.z.get_parser()
            self.register_function(parser.parse_sentence)
            self.register_function(parser.parse_file)
            self.register_function(parser.parse_tagged_sentence)
            self.register_function(parser.parse_tagged_file)
        if 'depparser' in model_list:
            parser = self.z.get_depparser()
            self.register_function(parser.dep_parse_sentence)
            self.register_function(parser.dep_parse_file)
            self.register_function(parser.dep_parse_tagged_sentence)
            self.register_function(parser.dep_parse_tagged_file)

        # register the function to remotely stop the server
        self.register_function(self.stop_server)

        self.quit = False

    def serve_forever(self):
        while not self.quit:
            try:
                self.handle_request()
            except KeyboardInterrupt:
                print("\nKeyboard interrupt received, exiting.")
                break
        self.z.close()
        self.server_close()

    def stop_server(self):
        self.quit = True
        return 0, "Server terminated on host %r, port %r" % (self.myhost, self.myport)
Exemplo n.º 4
0
def read_data_use(option, sen2id):

    file_name = option.use_data_path
    max_length = option.num_steps
    dict_size = option.dict_size
    time1 = time.time()
    Rake = RAKE.Rake(RAKE.SmartStopList())
    z = ZPar(option.pos_path)
    tagger = z.get_tagger()
    time2 = time.time()
    print("read data load time: ", time2 - time1)
    with open(file_name) as f:
        data = []
        vector = []
        sta_vec_list = []
        j = 0
        for line in f:
            if len(line.strip().split()) > 15:
                line = ' '.join(line.strip().split()[:15])
            sta_vec = list(np.zeros([option.num_steps - 1]))
            keyword = Rake.run(line.strip())
            pos_list = tagger.tag_sentence(line.strip()).split()
            pos = list(zip(*[x.split('/') for x in pos_list]))[0]
            # pos=list(zip(*[x.split('/') for x in pos_list]))[0]
            if keyword != []:
                keyword = list(list(zip(*keyword))[0])
                keyword_new = []
                linewords = line.strip().split()
                for i in range(len(linewords)):
                    for item in keyword:
                        length11 = len(item.split())
                        if ' '.join(linewords[i:i + length11]) == item:
                            keyword_new.extend(
                                [i + k for k in range(length11)])
                for i in range(len(keyword_new)):
                    ind = keyword_new[i]
                    if ind <= option.num_steps - 2:
                        sta_vec[ind] = 1
            if option.keyword_pos == True:
                sta_vec_list.append(keyword_pos2sta_vec(option, sta_vec, pos))
            else:
                sta_vec_list.append(list(np.zeros([option.num_steps - 1])))
            data.append(sen2id(line.strip().lower().split()))
    data_new = array_data(data, max_length, dict_size)
    return data_new, sta_vec_list  # sentence, keyvector
Exemplo n.º 5
0
def read_data_use1(option, sen2id):

    file_name = option.use_data_path
    max_length = option.num_steps
    dict_size = option.dict_size
    Rake = RAKE.Rake(RAKE.SmartStopList())
    z = ZPar(option.pos_path)
    tagger = z.get_tagger()
    with open(file_name) as f:
        data = []
        vector = []
        sta_vec_list = []
        j = 0
        for line in f:
            print('sentence:' + line)
            sta_vec = list(np.zeros([option.num_steps - 1]))
            keyword = Rake.run(line.strip())
            pos_list = tagger.tag_sentence(line.strip()).split()
            # pos=zip(*[x.split('/') for x in pos_list])[0]
            pos = list(zip(*[x.split('/') for x in pos_list]))[0]
            print(keyword)
            if keyword != []:
                keyword = list(list(zip(*keyword))[0])
                keyword_new = []
                for item in keyword:
                    tem1 = [
                        line.strip().split().index(x) for x in item.split()
                        if x in line.strip().split()
                    ]
                    print('id', tem1)
                    keyword_new.extend(tem1)
                print(keyword_new)
                for i in range(len(keyword_new)):
                    ind = keyword_new[i]
                    if ind <= option.num_steps - 2:
                        sta_vec[ind] = 1
            if option.keyword_pos == True:
                sta_vec_list.append(keyword_pos2sta_vec(option, sta_vec, pos))
            else:
                sta_vec_list.append(list(np.zeros([option.num_steps - 1])))
            print(keyword_pos2sta_vec(option, sta_vec, pos))
            data.append(sen2id(line.strip().lower().split()))
    data_new = array_data(data, max_length, dict_size)
    return data_new, sta_vec_list  # sentence, keyvector
Exemplo n.º 6
0
Arquivo: reader.py Projeto: zhouh/CGMH
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import pickle as pkl
from config import config
config = config()
from utils import *
import sys
sys.path.insert(0, config.dict_path)
from dict_use import *
import RAKE
Rake = RAKE.Rake(RAKE.SmartStopList())
from zpar import ZPar
z = ZPar(config.pos_path)
tagger = z.get_tagger()
tt_proportion = 0.9


class dataset(object):
    def __init__(self, input, sequence_length, target):
        self.input = input
        self.target = target
        self.sequence_length = sequence_length
        self.length = len(input)

    def __call__(self, batch_size, step):
        batch_num = self.length // batch_size
        step = step % batch_num
        return self.input[step * batch_size:(step + 1) *
                          batch_size], self.sequence_length[