def setUp(): """ set up things we need for the tests """ global z, tagger assert 'ZPAR_MODEL_DIR' in os.environ model_dir = os.environ['ZPAR_MODEL_DIR'] z = ZPar(model_dir) tagger = z.get_tagger()
def setUp(): """ set up things we need for the tests """ global z, tagger assert 'ZPAR_MODEL_DIR' in os.environ model_dir = os.environ['ZPAR_MODEL_DIR'] z = ZPar(model_dir) tagger = z.get_tagger()
class StoppableServer(_baseclass): allow_reuse_address = True def __init__(self, addr, zpar_model_path, model_list, *args, **kwds): # store the hostname and port number self.myhost, self.myport = addr # store the link to the loaded zpar object self.z = ZPar(zpar_model_path) # initialize the parent class _baseclass.__init__(self, addr, *args, **kwds) # Call the individual loading functions # and only register the appropriate methods if 'tagger' in model_list: tagger = self.z.get_tagger() self.register_function(tagger.tag_sentence) self.register_function(tagger.tag_file) if 'parser' in model_list: parser = self.z.get_parser() self.register_function(parser.parse_sentence) self.register_function(parser.parse_file) self.register_function(parser.parse_tagged_sentence) self.register_function(parser.parse_tagged_file) if 'depparser' in model_list: parser = self.z.get_depparser() self.register_function(parser.dep_parse_sentence) self.register_function(parser.dep_parse_file) self.register_function(parser.dep_parse_tagged_sentence) self.register_function(parser.dep_parse_tagged_file) # register the function to remotely stop the server self.register_function(self.stop_server) self.quit = False def serve_forever(self): while not self.quit: try: self.handle_request() except KeyboardInterrupt: print("\nKeyboard interrupt received, exiting.") break self.z.close() self.server_close() def stop_server(self): self.quit = True return 0, "Server terminated on host %r, port %r" % (self.myhost, self.myport)
def read_data_use(option, sen2id): file_name = option.use_data_path max_length = option.num_steps dict_size = option.dict_size time1 = time.time() Rake = RAKE.Rake(RAKE.SmartStopList()) z = ZPar(option.pos_path) tagger = z.get_tagger() time2 = time.time() print("read data load time: ", time2 - time1) with open(file_name) as f: data = [] vector = [] sta_vec_list = [] j = 0 for line in f: if len(line.strip().split()) > 15: line = ' '.join(line.strip().split()[:15]) sta_vec = list(np.zeros([option.num_steps - 1])) keyword = Rake.run(line.strip()) pos_list = tagger.tag_sentence(line.strip()).split() pos = list(zip(*[x.split('/') for x in pos_list]))[0] # pos=list(zip(*[x.split('/') for x in pos_list]))[0] if keyword != []: keyword = list(list(zip(*keyword))[0]) keyword_new = [] linewords = line.strip().split() for i in range(len(linewords)): for item in keyword: length11 = len(item.split()) if ' '.join(linewords[i:i + length11]) == item: keyword_new.extend( [i + k for k in range(length11)]) for i in range(len(keyword_new)): ind = keyword_new[i] if ind <= option.num_steps - 2: sta_vec[ind] = 1 if option.keyword_pos == True: sta_vec_list.append(keyword_pos2sta_vec(option, sta_vec, pos)) else: sta_vec_list.append(list(np.zeros([option.num_steps - 1]))) data.append(sen2id(line.strip().lower().split())) data_new = array_data(data, max_length, dict_size) return data_new, sta_vec_list # sentence, keyvector
def read_data_use1(option, sen2id): file_name = option.use_data_path max_length = option.num_steps dict_size = option.dict_size Rake = RAKE.Rake(RAKE.SmartStopList()) z = ZPar(option.pos_path) tagger = z.get_tagger() with open(file_name) as f: data = [] vector = [] sta_vec_list = [] j = 0 for line in f: print('sentence:' + line) sta_vec = list(np.zeros([option.num_steps - 1])) keyword = Rake.run(line.strip()) pos_list = tagger.tag_sentence(line.strip()).split() # pos=zip(*[x.split('/') for x in pos_list])[0] pos = list(zip(*[x.split('/') for x in pos_list]))[0] print(keyword) if keyword != []: keyword = list(list(zip(*keyword))[0]) keyword_new = [] for item in keyword: tem1 = [ line.strip().split().index(x) for x in item.split() if x in line.strip().split() ] print('id', tem1) keyword_new.extend(tem1) print(keyword_new) for i in range(len(keyword_new)): ind = keyword_new[i] if ind <= option.num_steps - 2: sta_vec[ind] = 1 if option.keyword_pos == True: sta_vec_list.append(keyword_pos2sta_vec(option, sta_vec, pos)) else: sta_vec_list.append(list(np.zeros([option.num_steps - 1]))) print(keyword_pos2sta_vec(option, sta_vec, pos)) data.append(sen2id(line.strip().lower().split())) data_new = array_data(data, max_length, dict_size) return data_new, sta_vec_list # sentence, keyvector
from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import pickle as pkl from config import config config = config() from utils import * import sys sys.path.insert(0, config.dict_path) from dict_use import * import RAKE Rake = RAKE.Rake(RAKE.SmartStopList()) from zpar import ZPar z = ZPar(config.pos_path) tagger = z.get_tagger() tt_proportion = 0.9 class dataset(object): def __init__(self, input, sequence_length, target): self.input = input self.target = target self.sequence_length = sequence_length self.length = len(input) def __call__(self, batch_size, step): batch_num = self.length // batch_size step = step % batch_num return self.input[step * batch_size:(step + 1) * batch_size], self.sequence_length[