def __init__(self, datasets_path, corpus_name, parse_type, lang='english'): self.datasets_path = datasets_path self.corpus_name = corpus_name self.corpus_path = path.join(datasets_path, 'raw', corpus_name) self.docs_path = path.join(self.corpus_path, "docs") self.topics_file = path.join(self.corpus_path, "topics.xml") self.models_path = path.join(self.corpus_path, "models") self.smodels_path = path.join(self.corpus_path, "smodels") self.jar_path = path.join(PROJECT_PATH, "summarizer", "jars") os.environ['CLASSPATH'] = self.jar_path self.cleaned_path = path.join(datasets_path, "processed") if parse_type == 'parse': if lang == 'english': self.parser = stanford.StanfordParser(model_path="%s/englishPCFG.ser.gz" % (self.jar_path)) if lang == 'german': self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path)) # self.cleaned_path = path.join(datasets_path, "processed.parse") if parse_type == 'props': # TODO if lang == 'english': self.props_parser = ClausIE.get_instance() if lang == 'german': self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path)) self.cleaned_path = path.join(datasets_path, "processed.props") self.add_dataset_index(lang)
def parse(sentence, parser, request_formats): tree_str = "" if parser == "pdx": # Use the default PSU Parser - but first use stanford and convert to PSU format stanford_parser = stanford.StanfordParser(model_path=model_path) psu_tree = Tree(parser=stanford_parser) tree = psu_tree.parse_sentence(sentence, require_tense=False) elif parser == "stanford": stanford_parser = stanford.StanfordParser(model_path=model_path) #tree_parser = Tree(parser=stanford_parser) tree = next(stanford_parser.raw_parse(sentence)) print(tree_str) # Create dictionary of parse objects to return response_formats = {} if "tree_image" in request_formats: print("do image thingy") # print('getting object from tree.__repr__()') # img = tree.__repr__() psu_tree = Tree() # img = psu_tree.write_tree_stream(tree) img_byte_arr = psu_tree.write_tree_stream(tree) import base64 # encoded_img = base64.encodebytes(img_byte_arr.getvalue()).decode('ascii') encoded_img = base64.encodebytes(img_byte_arr).decode('ascii') response_formats["tree_image"] = encoded_img if "tree_ascii" in request_formats: print("add tree_ascii to output") from nltk.treeprettyprinter import TreePrettyPrinter ascii_str = str(TreePrettyPrinter(tree)).rstrip() #ascii_str = nltk.Tree.pretty_print(tree) response_formats["tree_ascii"] = ascii_str if "bracket_diagram" in request_formats: print("add labelled_bracket to output") bracket_diagram = str(tree) open_b, close_b = "[]" bracket_diagram = bracket_diagram.replace("(", open_b).replace( ")", close_b) bracket_diagram = " ".join(bracket_diagram.split()) response_formats["bracket_diagram"] = bracket_diagram if "tree_str" in request_formats: print("add tree_str to output") tree_str = str(tree) tree_str = " ".join(tree_str.split()) response_formats["tree_str"] = tree_str res = { 'sentence': sentence, 'parser': parser, 'response_formats': response_formats } return res
def setlang(self, lang, kind): os.environ[ 'STANFORD_PARSER'] = '/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/' os.environ['JAVA_HOME'] = '/usr/lib/jvm/jdk1.8.0_91/jre/jre/bin/' os.environ[ 'STANFORD_MODELS'] = '/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models.jar' if lang == "English": if kind == "con": self.parser = stanford.StanfordParser( model_path= "/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ) elif kind == "de": pass elif kind == "neu": pass elif lang == "Chinese": if kind == "con": self.parser = stanford.StanfordParser( model_path= "/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models/edu/stanford/nlp/models/lexparser/chineseFactored.ser.gz" ) elif kind == "de": pass elif kind == "neu": pass elif lang == "French": if kind == "con": self.parser = stanford.StanfordParser( model_path= "/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models/edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz" ) elif kind == "de": pass elif kind == "neu": pass elif lang == "German": if kind == "con": self.parser = stanford.StanfordParser( model_path= "/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models/edu/stanford/nlp/models/lexparser/germanPCFG.ser.gz" ) elif kind == "de": pass elif kind == "neu": pass elif lang == "Spanish": if kind == "con": self.parser = stanford.StanfordParser( model_path= "/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models/edu/stanford/nlp/models/lexparser/spanishPCFG.ser.gz" ) elif kind == "de": pass elif kind == "neu": pass
def init_parser(parseType='C'): # initializes the parser # # parseType == 'C' is a constituency tree (via stanford PCFG) # parseType == 'D' is a dependency tree (english_SD) os.environ['STANFORD_PARSER'] = PARSER_LOC os.environ['STANFORD_MODELS'] = MODEL_LOC if parseType == 'C': parser = stanford.StanfordParser(model_path=CONST_LOC) elif parseType == 'D': parser = stanford.StanfordParser(model_path=CONST_LOC) else: print 'Unrecognized parser type request' return return parser
def compress_sent_tree(sentence): ''' compress a given sentence (as a string of words) return a compressed sentence (also as a string of words) ''' parser = stanford.StanfordParser ( model_path = model_path, path_to_models_jar = parser_jar ) try: #parse s_parsed = parser.raw_parse(sentence) #form a tree for s in s_parsed: #print("parse: ", str(s)) tree1 = tree.Tree.fromstring(str(s)) break positions, position_flags = get_position_and_flags(tree1) #for k, v in position_flags.items(): # print("{1}: {0}".format(k, v)) compressed = realize(tree1, positions, position_flags) except: print("\nno compression performed duo to an error in parse. sentence: {0} \n".format(sentence)) compressed = sentence #print ("compressed: ", compressed) return compressed
def get_postags(annotations, sentence,posrange): global previous_sentence_tagged #posrange = 2 if str(sentence) in previous_sentence_tagged: postagsandwords = previous_sentence_tagged[str(sentence)] else: os.environ['STANFORD_PARSER'] = os.getcwd() +'\\jars\\stanford-parser.jar' os.environ['STANFORD_MODELS'] = os.getcwd() +'\\jars\\stanford-parser-3.5.2-models.jar' os.environ['JAVAHOME'] ="C:\\Program Files (x86)\\Java\\jre1.8.0_73\\bin\\java.exe" #os.environ['JAVAHOME'] ="C:\Program Files (x86)\Java\jre1.8.0_66\bin\java.exe" #os.environ['JAVAHOME'] ="C:\Program Files (x86)\Java\jre1.8.0_45\\bin\\java.exe" #print sentences parser = stanford.StanfordParser(model_path=os.getcwd() +"\\jars\\englishPCFG.ser.gz") sentenceParsed = parser.parse_one([sentence]) postagsandwords = sentenceParsed.pos() previous_sentence_tagged[str(sentence)] = postagsandwords postags = [i[1] for i in postagsandwords] #print postags annotations_postags = [] for annotation in annotations: annotationpostags = [] start = int(annotation[0]) - posrange end = int(annotation[1]) + posrange for i in range(start, end+1): if i < 0: annotationpostags.append("S") elif i > len(postags)-1: annotationpostags.append("E") else: annotationpostags.append(postags[i]) annotations_postags.append(annotationpostags) return annotations_postags
def main(argv): debug = False try: opts, args = getopt.getopt(argv, "hd", ["help", "debug"]) except getopt.GetoptError as e: usage() sys.exit(2) for opt, arg in opts: if opt in ["-h", "help"]: usage() sys.exit(2) if opt in ["-d", "debug"]: debug = True parser = stanford.StanfordParser() line = raw_input("Enter line: ") while line != 'stop': sent = list(parser.raw_parse(line))[0] if debug: print sent # print parse tree if sent[0].label() == "SBARQ": print answer(sent) else: try: describe(sent) except ValueError as e: print "Error describing sentence. " + e if debug: print smap # print semantic map line = raw_input("Enter line: ")
def find_head(target): parser = stanford.StanfordParser(model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz", java_options='-mx4g') heads = [] for sentences in target: head = [] for sentence in sentences: flag = 0 if (len(sentence.split())<200): parsing_tree = parser.raw_parse(sentence) for i in (list(parsing_tree))[0].subtrees(): if i.label() == 'NP': leaves = i.leaves() rightmost_np = leaves[-1] if rightmost_np.isalpha(): head.append(rightmost_np) flag = 1 if flag == 0: head.append('null') else: head.append('null') heads.append(head) print('..') fp = open('heads.dat','wb') pickle.dump(heads,fp,True) fp.close() return heads
def __init__(self): os.environ[ 'STANFORD_PARSER'] = '/Volumes/Transcend/stanford/stanford-parser.jar' os.environ[ 'STANFORD_MODELS'] = '/Volumes/Transcend/stanford/stanford-parser-3.9.1-models.jar' self.parser = stanford.StanfordParser( model_path="edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz")
def __init__(self, index_name, ip): os.environ['STANFORD_PARSER'] = '/Volumes/Transcend/stanford/stanford-parser.jar' os.environ['STANFORD_MODELS'] = '/Volumes/Transcend/stanford/stanford-parser-3.9.1-models.jar' self.parser = stanford.StanfordParser(model_path="edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz") self.index_name = index_name # 无用户名密码状态 self.es = Elasticsearch([ip])
def __init__(self, alg, lang): # read settings from config file Config = configparser.ConfigParser() scriptLocation = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) Config.read(os.path.join(scriptLocation, 'settings.conf')); os.environ['JAVAHOME'] = Config.get('JAVA', 'JAVAHOME') os.environ['STANFORD_PARSER'] = Config.get('CORENLP', 'STANFORD_PARSER') os.environ['STANFORD_MODELS'] = Config.get('CORENLP', 'STANFORD_MODELS') lexParserPath = "" if lang == 'de': lexParserPath = Config.get('CORENLP', 'LEXPARSER_DE') elif lang == 'en': lexParserPath = Config.get('CORENLP', 'LEXPARSER_EN') else: sys.stderr.write("ERROR: Language '%s' not supported. Please use one of the supported languages.\n" % lang) sys.exit(1) self.lexParser = stanford.StanfordParser(model_path= lexParserPath) pccPath = Config.get('MISC', 'PCCPARSER') pccLoc = importlib.util.spec_from_file_location("PCCParser", pccPath) self.PCCParser = importlib.util.module_from_spec(pccLoc) pccLoc.loader.exec_module(self.PCCParser) conllPath = Config.get('MISC', 'CONLLPARSER') conllLoc = importlib.util.spec_from_file_location('CONLLPARSER', conllPath) self.CONLLParser = importlib.util.module_from_spec(conllLoc) conllLoc.loader.exec_module(self.CONLLParser)
def __init__(self): self.parser = stanford.StanfordParser() self.squad = SQuADutil() self.squad.get_data(mode="pickle") self.neglist = ['``', '\'\'', ',', '.', '?'] self.NN = ["NN", "NNS", "NNP", "NNPS"] self.V = ["VB", "VBG", "VBN", "VBP", "VBZ"]
def compress_sent_maxent(sentence): try: parser = stanford.StanfordParser ( model_path = model_path, path_to_models_jar = parser_jar ) #parse s_parsed = parser.raw_parse(sentence) #form a tree for s in s_parsed: tree1 = tree.Tree.fromstring(str(s)) break tokens = tree1.leaves() N = 5 #beam search parameter flags = beam_search(tree1, tokens, N)[N-1] position_flags = {} positions = tree1.treepositions() idx = 0 for p in positions: position_flags[p] = flags[idx] idx += 1 compressed = realize(tree1, tree1.treepositions(), position_flags) except: compressed = sentence return compressed
def myMain(): parser = stanford.StanfordParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar, model_path=model_path) test_sentences = ( "A man in white shirt on bicycle with a dog riding in the back.", "several young students working at a desk with multiple computers", "a person running a bike on a road with trees in the background", "A white tank with an book on it in classroom.", "A young woman standing in a kitchen eats a plate of vegetables.", "A man in a red shirt and a red hat is on a motorcycle on a hill side", "Stone keepers on the ground is holding a gem of time.", "Two chefs in a restaurant kitchen preparing food. ", "A commercial dish washing station with a toilet in it.", "A geoup of people on bicycles coming down a street.", "a bathroom with a toilet between a sink and a shower", "Elephant walking through the middle of the road in front of a car. ", "A horse drawn carriage among several other motor vehicles on a road.", "A car is parked near a parking meter.", "a row of bikes and mopeds is parked along the street", ) sentences = parser.raw_parse_sents(test_sentences) for line in sentences: for sentence in line: # sentence.draw() zhuweibin = myFilter(sentence)
def __init__(self, productions_filename=None): super(QuestionEvaluator, self).__init__() self.parser = stanford.StanfordParser(encoding='utf8') if (productions_filename != None): self.read_productions(productions_filename) else: self.grammar = None
def clear_data(self): self.parser = stanford.StanfordParser(model_path=STANFORD_MODEL_PATH) self.first_NP = '' self.first_VP = '' self.parse_tree = None self.subject = RDF_Triple.RDF_SOP('subject') self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB') self.Object = RDF_Triple.RDF_SOP('object')
def init_parser(): print 'Initializing parser...' # initializes the parser os.environ['STANFORD_PARSER'] = PARSER_LOC os.environ['STANFORD_MODELS'] = MODEL_LOC parser = stanford.StanfordParser(model_path=PCFG_LOC) print 'Complete' return parser
def __init__(self, datasets_path, corpus_name, parse_type, lang='english'): self.datasets_path = datasets_path self.corpus_name = corpus_name self.corpus_path = path.join(datasets_path, corpus_name) self.docs_path = path.join(self.corpus_path, "docs") self.topics_file = path.join(self.corpus_path, "topics.xml") self.models_path = path.join(self.corpus_path, "models") self.smodels_path = path.join(self.corpus_path, "smodels") self.jar_path = JAR_PATH os.environ['CLASSPATH'] = self.jar_path self.cleaned_path = path.join(datasets_path, "processed_data") if parse_type == 'parse': if lang == 'english': self.parser = stanford.StanfordParser(model_path="%s/englishPCFG.ser.gz" % (self.jar_path)) if lang == 'german': self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
def _prepare_parser(self): """Prepare stanford parser - Args: - Returns: parser: stanfrod parser """ parser = stanford.StanfordParser(model_path=self.model_path) return parser
def test_parser(): import nltk from nltk.parse import stanford from nltk.parse.stanford import StanfordParser os.environ[ 'STANFORD_PARSER'] = 'F:/eclipse_doctor/KnowledgeGraph/stanford-parser/stanford-parser.jar' os.environ[ 'STANFORD_MODELS'] = 'F:/eclipse_doctor/KnowledgeGraph/stanford-parser/stanford-parser-3.7.0-models.jar' java_path = "C:/ProgramData/Oracle/Java/javapath" os.environ['JAVAHOME'] = java_path start = datetime.now() print start parser = stanford.StanfordParser( model_path= "F:/eclipse_doctor/KnowledgeGraph/stanford-parser/englishPCFG.ser.gz") end = datetime.now() print end print "cost time: " + str((end - start).microseconds) sent = 'angulated abutment is an abutment whose body is not parallel to the long axis of the implant. It is utilized when the implant is at a different inclination in relation to the proposed prosthesis.' start = datetime.now() print start trees = parser.parse(sent.split()) end = datetime.now() print end print "cost time: " + str((end - start).microseconds) print 'len(trees)', len(list(trees)) path_project = os.path.abspath( os.path.join(os.getcwd(), os.pardir, os.pardir)) path_data = path_project + os.sep + "input" + os.sep + "items_tagged_modified.json" data = json.load(codecs.open(path_data, encoding='UTF-8')) start_all = datetime.now() cnt = 0 trees_all = [] for item in data: pos2definition = item["pos2definition"] for pos2def in pos2definition: definition = pos2def["definition"] definition_pure = re.sub(r'\([\s\S]*?\)', "", definition) text = nltk.word_tokenize(definition_pure) sents_pos_period = cut_list(text, ['.']) for sent_list in sents_pos_period: cnt += 1 start = datetime.now() # print start trees = parser.parse(' '.join(sent_list).split()) trees_all.append(trees) end = datetime.now() # print end # print "cost time: "+str((end - start).microseconds) end_all = datetime.now() print end_all sum_time = (end_all - start_all).seconds sum_time_mic = (end_all - start_all).microseconds avg_time = (end_all - start_all).seconds * 1.0 / cnt print sum_time, sum_time_mic, avg_time, cnt
def __init__(self): os.environ['STANFORD_PARSER'] = STANFORD_PARSER_PATH os.environ['STANFORD_MODELS'] = STANFORD_MODELS_PATH os.environ['JAVAHOME'] = JAVA_HOME stanford_model_path = CHINESE_MODEL_PATH self.s_parser = stanford.StanfordParser(model_path=stanford_model_path) par_model_path = os.path.join( LTP_DATA_DIR, 'parser.model') # 依存句法分析模型路径,模型名称为`parser.model` from pyltp import Parser self.parser = Parser() # 初始化实例 self.parser.load(par_model_path) # 加载模型 cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model') # 分词模型路径,模型名称为`cws.model` from pyltp import Segmentor self.segmentor = Segmentor() # 初始化实例 self.segmentor.load(cws_model_path) # 加载模型 pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model') # 词性标注模型路径,模型名称为`pos.model` from pyltp import Postagger self.postagger = Postagger() # 初始化实例 self.postagger.load(pos_model_path) # 加载模型 ner_model_path = os.path.join( LTP_DATA_DIR, 'ner.model') # 命名实体识别模型路径,模型名称为`pos.model` from pyltp import NamedEntityRecognizer self.recognizer = NamedEntityRecognizer() # 初始化实例 self.recognizer.load(ner_model_path) # 加载模型 q_words = { 'q1_person': ['谁', '那个', '哪个'], 'q1_time': ['那年', '时间', '哪年', '何时', '多久', '时候', '年'], 'q1_amount': ['多', '几', '多少', '第几'], 'q1_place': ['哪儿', '哪家', '哪里人', '哪里', '那家', '那里人', '那里'], 'q1_result': ['怎么', '为什么', '为何', '如何', '何'], 'q1_judge': ['是否', '还是', '吗'], 'q0_other': ['哪些', '那些', '干什么'], 'q0_definition': ['什么样', '什么', '怎么样', '怎样'], } self.question_words = [] self.word2key = {} for k, v in q_words.items(): self.question_words += v for _v in v: self.word2key[_v] = k self.stop_words = set() with open('../data/all-stop-word.txt') as f_stop: for i in f_stop.readlines(): self.stop_words.add(i.strip()) self.articles = []
def init_data(self): self.parser = stanford.StanfordParser(model_path=r"/home/mael/MASTER_2/TEXTE/RDF-Triple-API-master/stanford-parser-full-2015-01-30/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz") self.NP = '' self.VP = '' #On recupere l'arbre du parseur stanford self.stanford_tree = self.parser.raw_parse(self.sentence)[0] self.subject = Extractor.RDF_ELEMENT('subject') self.predicate = Extractor.RDF_ELEMENT('predicate') self.Object = Extractor.RDF_ELEMENT('object')
def __init__(self): #Initialize the SVO Methods self.noun_types = ["NN", "NNP", "NNPS", "NNS", "PRP"] self.verb_types = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"] self.adjective_types = ["JJ", "JJR", "JJS"] self.pred_verb_phrase_siblings = None self.parser = stanford.StanfordParser() self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
def mainON(field2word, subset): ''' process data, from .qu, .lo, and .fi to .ta, .lox, .qux and .ficorr, .vacorr ''' sub_folder = subset.split('_')[0] data_root = FLAGS.data_root os.environ['STANFORD_PARSER'] = FLAGS.stanford_parser os.environ['STANFORD_MODELS'] = FLAGS.stanford_models parser = stanford.StanfordParser(model_path=FLAGS.stanford_model_path) schema = ' '.join(field2word.keys()) if not path.isdir(join(data_root, 'overnight_generated')): os.makedirs(join(data_root, 'overnight_generated')) (f_ta, f_lox, f_qux, f_ficorr, f_vacorr) = [ open( join(data_root, 'overnight_generated', '%s.%s' % (subset, suffix)), 'w') for suffix in ['ta', 'lox', 'qux', 'ficorr', 'vacorr'] ] with open(data_root + 'overnight_source/%s/%s.qu' % (sub_folder, subset)) as f_qu, open( data_root + 'overnight_source/%s/%s.lon' % (sub_folder, subset)) as f_lo: query, logic = f_qu.readline(), f_lo.readline() idx = 0 while query and logic: idx += 1 print '### example: %d ###' % idx print query print logic tagged2, field_corr, value_corr, newquery, newlogical = codebase.tagger.sentTagging_treeON3( parser, field2word, query, schema, logic) print field_corr print value_corr print tagged2 print newquery print newlogical print '\n' f_qux.write(newquery + '\n') f_lox.write(newlogical + '\n') f_ficorr.write(field_corr + '\n') f_vacorr.write(value_corr + '\n') f_ta.write(tagged2 + '\n') query, logic = f_qu.readline(), f_lo.readline() f_ta.close() f_lox.close() f_qux.close() f_vacorr.close() f_ficorr.close() return
def get_parses(sentences): os.environ['CLASSPATH'] = dir+'stanford-parser' os.environ['STANFORD_PARSER'] = dir+'stanford-parser/stanford-parser.jar' os.environ['STANFORD_MODELS'] = dir+'stanford-parser/stanford-parser-3.6.0-models.jar' parser = stanford.StanfordParser(model_path=dir+"stanford-parser/models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz") iter_trees = parser.raw_parse_sents(sentences) # this line might take quite a few seconds if the file size is large list_trees = [] for iter_tree in iter_trees: for tree in iter_tree: list_trees.append(tree) return list_trees
def clear_data(self): self.parser = stanford.StanfordParser( model_path= r"/users/ted/stanford nlp/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ) self.first_NP = '' self.first_VP = '' self.parse_tree = None self.subject = RDF_Triple.RDF_SOP('subject') self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB') self.Object = RDF_Triple.RDF_SOP('object')
def stanford_parser_tree(sentences): os.environ[ 'STANFORD_PARSER'] = 'D:/Facultate anul 3/AI/Projectlibraries/stanford-parser-full-2016-10-31/stanford-parser.jar' os.environ[ 'STANFORD_MODELS'] = 'D:/Facultate anul 3/AI/Projectlibraries/stanford-parser-full-2016-10-31/stanford-parser-3.7.0-models.jar' parser = stanford.StanfordParser( model_path= "E:/FII/3/IA/stanford-parser-and-models/jars/englishPCFG.ser.gz") sentences = parser.raw_parse_sents((sentences)) return sentences
def get_parser(self): os.environ['CLASSPATH'] = dir + 'stanford-parser' os.environ[ 'STANFORD_PARSER'] = dir + 'stanford-parser/stanford-parser.jar' os.environ[ 'STANFORD_MODELS'] = dir + 'stanford-parser/stanford-parser-3.6.0-models.jar' parser = stanford.StanfordParser( model_path=dir + "stanford-parser/models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ) return parser
def __init__(self, stanford_parser_path): sep = classpath_separator[system()] if 'CLASSPATH' not in os.environ: os.environ['CLASSPATH'] = ".{}".format(sep) os.environ['CLASSPATH'] += "{}{}".format(stanford_parser_path, sep) print(os.environ['CLASSPATH']) self.parser = stanford.StanfordParser( model_path="{}/englishPCFG.ser.gz".format(stanford_parser_path))
def __init__(self): self.config = configparser.ConfigParser() self.config.read('config.ini') os.environ['JAVAHOME'] = self.config['lexparser']['javahome'] os.environ['STANFORD_PARSER'] = self.config['lexparser'][ 'stanfordParser'] os.environ['STANFORD_MODELS'] = self.config['lexparser'][ 'stanfordModels'] os.environ['CLASSPATH'] = self.config['lexparser']['path'] self.lexParser = stanford.StanfordParser( model_path=self.config['lexparser']['germanModel'])