inside_code = 32 elif (inside_code >= 65281 and inside_code <= 65374): #全角字符(除空格)根据关系转化 inside_code -= 65248 rstring += unichr(inside_code) return rstring try: import sofa except: sys.stderr.write('Error: Please excute the following command first:\n') sys.stderr.write('export SOFA_CONFIG=./config/drpc_client.xml\n') sys.exit(1) sofa.use('drpc.ver_1_0_0', 'S') sofa.use('nlpc.ver_1_0_0', 'nlpc') conf = sofa.Config() conf.load('./config/drpc_client.xml')#local if open_flag[0] == 1: wordrank_agent = S.ClientAgent(conf['sofa.service.nlpc_wordrank_208'])#local else: wordrank_agent = None; if open_flag[1] == 1: wordpos_agent = S.ClientAgent(conf['sofa.service.nlpc_wordpos_202']) #local else: wordpos_agent = None; if open_flag[2] == 1: depparser_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_query_107']) #local
def __init__(self, conf): sofa.use('OnlineDeployService.ver_1_0_0')
def main(): sofa.use('drpc.ver_1_0_0', 'S') sofa.use('nlpc.ver_1_0_0', 'wordseg') conf = sofa.Config() conf.load('./config/drpc_client.xml') wordseg_agent = S.ClientAgent(conf['sofa.service.nlpc_wordseg_3016']) while True: line = stdin.readline() if len(line) <= 0: return line = line.decode('utf-8').encode('gbk') m_input = wordseg.wordseg_input() m_input.query = str(line) m_input.lang_id = int(0) m_input.lang_para = int(0) input_data = sofa.serialize(m_input) for i in range(5) : try: ret, output_data = wordseg_agent.call_method(input_data) break except Exception as e: continue if len(output_data) == 0: stdout.write('No result' + '\n') continue m_output = wordseg.wordseg_output() m_output = sofa.deserialize(output_data, type(m_output)) m_output = m_output.scw_out if len(argv) == 2 and argv[1] == 'basic' or len(argv) == 1: stdout.write('=========== Basic Word Sep Result =============' + '\n') for i in range(m_output.wsbtermcount): posidx = GET_TERM_POS(m_output.wsbtermpos[i]) poslen = GET_TERM_LEN(m_output.wsbtermpos[i]) word = m_output.wordsepbuf[posidx : posidx + poslen] stdout.write('%s ' %word) stdout.write('\n') if len(argv) == 2 and argv[1] == 'segment' or len(argv) == 1: stdout.write('============ Word Phrase Result ==============' + '\n') for i in range(m_output.wpbtermcount): posidx = GET_TERM_POS(m_output.wpbtermpos[i]) poslen = GET_TERM_LEN(m_output.wpbtermpos[i]) word = m_output.wpcompbuf[posidx : posidx + poslen] stdout.write('%s ' %word) stdout.write('\n') if len(argv) == 2 and argv[1] == 'phrase' or len(argv) == 1: stdout.write('============ Sub Phrase Result ==============' + '\n') for i in range(m_output.spbtermcount): posidx = GET_TERM_POS(m_output.spbtermpos[i]) poslen = GET_TERM_LEN(m_output.spbtermpos[i]) word = m_output.subphrbuf[posidx : posidx + poslen] stdout.write('%s ' %word) stdout.write('\n') if len(argv) == 2 and argv[1] == 'new' or len(argv) == 1: stdout.write('============ New Word Result ==============' + '\n') pnewword = m_output.pnewword for i in range(pnewword.newwordbtermcount): posidx = GET_TERM_POS(pnewword.newwordbtermpos[i]) poslen = GET_TERM_LEN(pnewword.newwordbtermpos[i]) word = pnewword.newwordbuf[posidx : posidx + poslen] stdout.write('%s ' %word) stdout.write('\n') if len(argv) == 2 and argv[1] == 'human' or len(argv) == 1: stdout.write('=========== Human Name Result ==============' + '\n') for i in range(m_output.namebtermcount): posidx = GET_TERM_POS(m_output.namebtermpos[i]) poslen = GET_TERM_LEN(m_output.namebtermpos[i]) word = m_output.namebuf[posidx : posidx + poslen] stdout.write('%s ' %word) stdout.write('\n') if len(argv) == 2 and argv[1] == 'book' or len(argv) == 1: stdout.write('=============== book names =================' + '\n') for i in range(m_output.bnbtermcount): posidx = GET_TERM_POS(m_output.bnbtermpos[i]) poslen = GET_TERM_LEN(m_output.bnbtermpos[i]) word = m_output.booknamebuf[posidx : posidx + poslen] stdout.write('%s ' %word) stdout.write('\n')
if inside_code == 12288: #全角空格直接转换 inside_code = 32 elif (inside_code >= 65281 and inside_code <= 65374): #全角字符(除空格)根据关系转化 inside_code -= 65248 rstring += unichr(inside_code) return rstring try: import sofa except: sys.stderr.write('Error: Please excute the following command first:\n') sys.stderr.write('export SOFA_CONFIG=./config/drpc_client.xml\n') sys.exit(1) sofa.use('drpc.ver_1_0_0', 'S') sofa.use('nlpc.ver_1_0_0', 'nlpc') conf = sofa.Config() conf.load('./config/drpc_client.xml') #local if open_flag[0] == 1: wordrank_agent = S.ClientAgent( conf['sofa.service.nlpc_wordrank_208']) #local else: wordrank_agent = None if open_flag[1] == 1: wordpos_agent = S.ClientAgent( conf['sofa.service.nlpc_wordpos_202']) #local else: wordpos_agent = None if open_flag[2] == 1:
def main(): sofa.use('drpc.ver_1_0_0', 'S') sofa.use('nlpc.ver_1_0_0', 'nlpc') conf = sofa.Config() conf.load('./config/drpc_client.xml') #query_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_uni_query_107']) query_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_uni_web_107']) in_sentences = [] while True: line = sys.stdin.readline() if len(line) <= 0: break line = line.strip(' \t\n\r') in_sentences.append(str(line)) if len(in_sentences) < 1000: continue m_input = nlpc.depparser_uni_input() m_input.grain_size = 1 m_input.sentence_segmented = False m_input.sentences = in_sentences input_data = sofa.serialize(m_input) for i in range(5): try: ret, output_data = query_agent.call_method(input_data) break except Exception as e: continue if len(output_data) == 0: stdout.write('No result' + '\n') continue m_output = nlpc.depparser_uni_output() m_output = sofa.deserialize(output_data, type(m_output)) dep_sentences = m_output.dep_sentences sent_num = len(dep_sentences) for i in range(sent_num): dep_terms = dep_sentences[i].dep_terms term_num = len(dep_terms) for j in range(term_num): if dep_terms[j].lemma.strip() is None: dep_terms[j].lemma = '_' if dep_terms[j].cpostag.strip() is None: dep_terms[j].cpostag = '_' if dep_terms[j].postag.strip() is None: dep_terms[j].postag = '_' if dep_terms[j].ner.strip() is None: dep_terms[j].ner = '_' if dep_terms[j].feat.strip() is None: dep_terms[j].feat = '_' if dep_terms[j].deprel.strip() is None: dep_terms[j].deprel = '_' sys.stdout.write(str(j) + '\t' + dep_terms[j].word + '\t' + dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' + dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' + dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' + dep_terms[j].deprel + '\n') sys.stdout.write('\n') in_sentences = [] if len(in_sentences) > 0: m_input = nlpc.depparser_uni_input() m_input.grain_size = 1 m_input.sentence_segmented = False m_input.sentences = in_sentences input_data = sofa.serialize(m_input) m_input.sentences = in_sentences input_data = sofa.serialize(m_input) for i in range(5): try: ret, output_data = query_agent.call_method(input_data) break except Exception as e: continue if len(output_data) == 0: stdout.write('No result' + '\n') exit m_output = nlpc.depparser_uni_output() m_output = sofa.deserialize(output_data, type(m_output)) dep_sentences = m_output.dep_sentences sent_num = len(dep_sentences) for i in range(sent_num): dep_terms = dep_sentences[i].dep_terms term_num = len(dep_terms) for j in range(term_num): if dep_terms[j].lemma.strip() is None: dep_terms[j].lemma = '_' if dep_terms[j].cpostag.strip() is None: dep_terms[j].cpostag = '_' if dep_terms[j].postag.strip() is None: dep_terms[j].postag = '_' if dep_terms[j].ner.strip() is None: dep_terms[j].ner = '_' if dep_terms[j].feat.strip() is None: dep_terms[j].feat = '_' if dep_terms[j].deprel.strip() is None: dep_terms[j].deprel = '_' sys.stdout.write(str(j) + '\t' + dep_terms[j].word + '\t' + dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' + dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' + dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' + dep_terms[j].deprel + '\n') sys.stdout.write('\n') in_sentences = []
def main(): sofa.use('drpc.ver_1_0_0', 'S') sofa.use('nlpc.ver_1_0_0', 'nlpc') conf = sofa.Config() conf.load('./config/drpc_client.xml') #query_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_uni_query_107']) query_agent = S.ClientAgent( conf['sofa.service.nlpc_depparser_uni_web_107']) in_sentences = [] while True: line = sys.stdin.readline() if len(line) <= 0: break line = line.strip(' \t\n\r') in_sentences.append(str(line)) if len(in_sentences) < 1000: continue m_input = nlpc.depparser_uni_input() m_input.grain_size = 1 m_input.sentence_segmented = False m_input.sentences = in_sentences input_data = sofa.serialize(m_input) for i in range(5): try: ret, output_data = query_agent.call_method(input_data) break except Exception as e: continue if len(output_data) == 0: stdout.write('No result' + '\n') continue m_output = nlpc.depparser_uni_output() m_output = sofa.deserialize(output_data, type(m_output)) dep_sentences = m_output.dep_sentences sent_num = len(dep_sentences) for i in range(sent_num): dep_terms = dep_sentences[i].dep_terms term_num = len(dep_terms) for j in range(term_num): if dep_terms[j].lemma.strip() is None: dep_terms[j].lemma = '_' if dep_terms[j].cpostag.strip() is None: dep_terms[j].cpostag = '_' if dep_terms[j].postag.strip() is None: dep_terms[j].postag = '_' if dep_terms[j].ner.strip() is None: dep_terms[j].ner = '_' if dep_terms[j].feat.strip() is None: dep_terms[j].feat = '_' if dep_terms[j].deprel.strip() is None: dep_terms[j].deprel = '_' sys.stdout.write( str(j) + '\t' + dep_terms[j].word + '\t' + dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' + dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' + dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' + dep_terms[j].deprel + '\n') sys.stdout.write('\n') in_sentences = [] if len(in_sentences) > 0: m_input = nlpc.depparser_uni_input() m_input.grain_size = 1 m_input.sentence_segmented = False m_input.sentences = in_sentences input_data = sofa.serialize(m_input) m_input.sentences = in_sentences input_data = sofa.serialize(m_input) for i in range(5): try: ret, output_data = query_agent.call_method(input_data) break except Exception as e: continue if len(output_data) == 0: stdout.write('No result' + '\n') exit m_output = nlpc.depparser_uni_output() m_output = sofa.deserialize(output_data, type(m_output)) dep_sentences = m_output.dep_sentences sent_num = len(dep_sentences) for i in range(sent_num): dep_terms = dep_sentences[i].dep_terms term_num = len(dep_terms) for j in range(term_num): if dep_terms[j].lemma.strip() is None: dep_terms[j].lemma = '_' if dep_terms[j].cpostag.strip() is None: dep_terms[j].cpostag = '_' if dep_terms[j].postag.strip() is None: dep_terms[j].postag = '_' if dep_terms[j].ner.strip() is None: dep_terms[j].ner = '_' if dep_terms[j].feat.strip() is None: dep_terms[j].feat = '_' if dep_terms[j].deprel.strip() is None: dep_terms[j].deprel = '_' sys.stdout.write( str(j) + '\t' + dep_terms[j].word + '\t' + dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' + dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' + dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' + dep_terms[j].deprel + '\n') sys.stdout.write('\n') in_sentences = []