def __init__(self, address_morph, address_syntax, address_rst): self._ppl = PipelineCommon([ (ProcessorRemote(address_morph[0], address_morph[1], 'default'), ['text'], { 'sentences': 'sentences', 'tokens': 'tokens', 'postag': 'postag', 'lemma': 'lemma' }), (ConverterMystemToUd(), ['postag'], { 'morph': 'morph', 'postag': 'postag' }), (ProcessorRemote(address_syntax[0], address_syntax[1], '0'), ['tokens', 'sentences'], { 'syntax_dep_tree': 'syntax_dep_tree', 'postag': 'ud_postag' }), (ProcessorRemote(address_rst[0], address_rst[1], 'default'), [ 'text', 'tokens', 'sentences', 'lemma', 'morph', 'postag', 'syntax_dep_tree' ], { 'rst': 'rst' }) ]) self._name = 'default'
def __init__(self, basic_processor=('vmh1.isa.ru', 3344), udpipe_processor=('vmh1.isa.ru', 3355)): self.ppl = WrapperMultiProcessDocument([ PipelineCommon([ ( ProcessorRemote(basic_processor[0], basic_processor[1], 'default'), ['text'], { 'sentences' : 'sentences', 'tokens' : 'tokens', 'postag' : 'mystem_postags', 'lemma' : 'lemma' } ), ( ProcessorRemote(udpipe_processor[0], udpipe_processor[1], '0'), ['tokens', 'sentences'], { 'syntax_dep_tree' : 'syntax_dep_tree' } ), ( ConverterMystemToUd(), ['mystem_postags'], { 'morph' : 'postag', } ) ]) ])
def get_tree(text): from isanlp import PipelineCommon from isanlp.processor_remote import ProcessorRemote from isanlp.ru.converter_mystem_to_ud import ConverterMystemToUd from Parser.some_reparser import extract_semantic_relations HOST = 'localhost' proc_morph = ProcessorRemote(HOST, 3333, 'default') proc_syntax = ProcessorRemote(HOST, 3334, '0') syntax_ppl = PipelineCommon([ (proc_morph, ['text'], {'tokens' : 'tokens', 'sentences' : 'sentences', 'postag' : 'postag', 'lemma' : 'lemma'}), (proc_syntax, ['tokens','sentences'], {'syntax_dep_tree' : 'syntax_dep_tree'}), (ConverterMystemToUd(), ['postag'], {'postag' : 'postag', 'morph' : 'morph'}) ]) try: analysis_res = syntax_ppl(text) except: return None sentences = [] for i in analysis_res['sentences']: sentence = [] for j in range(i.begin, i.end): sentence.append(analysis_res['tokens'][j].text) sentences.append(sentence) vertices_list_list = [] relations = extract_semantic_relations(text) for j in range(len(analysis_res['lemma'])): vertices_list = [] for i in range(len(analysis_res['lemma'][j])): start, end = analysis_res['tokens'][i].begin, analysis_res['tokens'][i].end role_vert = [] for rel in relations: if rel['child']['start'] == start and rel['child']['end'] == end: role_vert.append(rel['tp']) vert = tree(word(analysis_res['lemma'][j][i], analysis_res['postag'][j][i], analysis_res['morph'][j][i], start, end, i, role = role_vert)) vertices_list.append(vert) vertices_list_list.append(vertices_list) root_list = [] for i in range(len(vertices_list_list)): list_ = vertices_list_list[i] for j in range(len(analysis_res['syntax_dep_tree'][i])): _ = analysis_res['syntax_dep_tree'][i][j] if _.parent != -1: list_[_.parent].add_child(list_[j], _.link_name) else: list_[j].sentence = sentences[i] root_list.append(list_[j]) return root_list
def __init__( self, udpipe=("tsa05.isa.ru", 3334), rst=("papertext.ru", 5555), cache_path="./rst-cache.pkl", ): udpipe_host, udpipe_port = udpipe rst_host, rst_port = rst self.cache_path = cache_path self.ppl = PipelineCommon( [ ( ProcessorRemote(udpipe_host, udpipe_port, "0"), ["text"], { "sentences": "sentences", "tokens": "tokens", "lemma": "lemma", "syntax_dep_tree": "syntax_dep_tree", "postag": "ud_postag", }, ), ( ProcessorMystem(delay_init=False), ["tokens", "sentences"], {"postag": "postag"}, ), ( ConverterMystemToUd(), ["postag"], {"morph": "morph", "postag": "postag"}, ), ( ProcessorRemote(rst_host, rst_port, "default"), [ "text", "tokens", "sentences", "postag", "morph", "lemma", "syntax_dep_tree", ], {"clauses": "clauses"}, ), ] ) self.__cache = {} self.__hasher = city_32() if os.path.exists(self.cache_path): self.__cache = jb.load(self.cache_path)
def get_tree(text): HOST = 'localhost' proc_morph = ProcessorRemote(HOST, 3333, 'default') proc_syntax = ProcessorRemote(HOST, 3334, '0') syntax_ppl = PipelineCommon([(proc_morph, ['text'], { 'tokens': 'tokens', 'sentences': 'sentences', 'postag': 'postag', 'lemma': 'lemma' }), (proc_syntax, ['tokens', 'sentences'], { 'syntax_dep_tree': 'syntax_dep_tree' }), (ConverterMystemToUd(), ['postag'], { 'postag': 'postag', 'morph': 'morph' })]) analysis_res = syntax_ppl(text) sentences = [] for i in analysis_res['sentences']: sentence = [] for j in range(i.begin, i.end): sentence.append(analysis_res['tokens'][j].text) sentences.append(sentence) vertices_list_list = [] for j in range(len(analysis_res['lemma'])): vertices_list = [] for i in range(len(analysis_res['lemma'][j])): vert = tree( word(analysis_res['lemma'][j][i], analysis_res['postag'][j][i], analysis_res['morph'][j][i], i)) vertices_list.append(vert) vertices_list_list.append(vertices_list) root_list = [] for i in range(len(vertices_list_list)): list_ = vertices_list_list[i] for j in range(len(analysis_res['syntax_dep_tree'][i])): _ = analysis_res['syntax_dep_tree'][i][j] if _.parent != -1: list_[_.parent].add_child(list_[j], _.link_name) else: list_[j].sentence = sentences[i] root_list.append(list_[j]) return root_list
import os from isanlp import PipelineCommon from isanlp.processor_remote import ProcessorRemote host = 'localhost' port_morph = int(os.environ['TEST_MORPH_PORT']) port_srl = int(os.environ['TEST_SRL_PORT']) text_path = os.environ['TEST_EN_PATH'] with open(text_path, encoding='utf8') as f: text = f.read() ppl = PipelineCommon([(ProcessorRemote(host=host, port=port_morph, pipeline_name='default'), ['text'], {'tokens': 'tokens', 'sentences': 'sentences', 'lemma': 'lemma', 'postag': 'postag'}), (ProcessorRemote(host=host, port=port_srl, pipeline_name='default'), ['tokens', 'sentences'], {'srl': 'srl'}) ]) annotations = ppl(text)
import os from isanlp.processor_remote import ProcessorRemote from isanlp.processor_syntaxnet_remote import ProcessorSyntaxNetRemote from isanlp import PipelineCommon from isanlp.ru.converter_mystem_to_ud import ConverterMystemToUd port_morph = int(os.environ['TEST_MORPH_PORT']) port_syntax = int(os.environ['TEST_SYNTAX_PORT']) port_srl = int(os.environ['TEST_SRL_PORT']) text_path = os.environ['TEST_PATH'] with open(text_path, encoding='utf8') as f: text = f.read() ppl = PipelineCommon([(ProcessorRemote(host='localhost', port=port_morph, pipeline_name='default'), ['text'], { 'tokens': 'tokens', 'sentences': 'sentences', 'lemma': 'lemma', 'postag': 'mystem_postag' }), (ConverterMystemToUd(), ['mystem_postag'], { 'morph': 'morph', 'postag': 'postag' }), (ProcessorSyntaxNetRemote(host='localhost', port=port_syntax), ['tokens', 'sentences'], { 'syntax_dep_tree': 'syntax_dep_tree' }),
import os from isanlp.processor_remote import ProcessorRemote port = int(os.environ['TEST_PORT']) text_path = os.environ['TEST_PATH'] with open(text_path, encoding='utf8') as f: text = f.read() proc = ProcessorRemote(host='localhost', port=port, pipeline_name='default') annotations = proc(text)