def __init__(self): """ Checks the location of the jar files. Spawns the server as a process. """ print start_corenlp if VERBOSE: logger.info(start_corenlp) self.corenlp = pexpect.spawn(start_corenlp) # show progress bar while loading the models widgets = ['Loading Models: ', Fraction()] pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start() self.corenlp.expect("done.", timeout=20) # Load pos tagger model (~5sec) pbar.update(1) self.corenlp.expect("done.", timeout=200) # Load NER-all classifier (~33sec) pbar.update(2) self.corenlp.expect("done.", timeout=600) # Load NER-muc classifier (~60sec) pbar.update(3) self.corenlp.expect("done.", timeout=600) # Load CoNLL classifier (~50sec) pbar.update(4) self.corenlp.expect("done.", timeout=200) # Loading PCFG (~3sec) pbar.update(5) self.corenlp.expect("Entering interactive shell.") pbar.finish()
def _spawn_corenlp(self): if VERBOSE: print self.start_corenlp self.corenlp = pexpect.spawn(self.start_corenlp, timeout=60, maxread=8192, searchwindowsize=80) # show progress bar while loading the models if VERBOSE: widgets = ['Loading Models: ', Fraction()] pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start() # Model timeouts: # pos tagger model (~5sec) # NER-all classifier (~33sec) # NER-muc classifier (~60sec) # CoNLL classifier (~50sec) # PCFG (~3sec) timeouts = [20, 200, 600, 600, 20] for i in xrange(5): self.corenlp.expect("done.", timeout=timeouts[i]) # Load model pbar.update(i + 1) self.corenlp.expect("Entering interactive shell.") pbar.finish() # interactive shell self.corenlp.expect("\nNLP> ")
def __init__(self, corenlp_path=DIRECTORY, memory="3g", properties='default.properties'): """ Checks the location of the jar files. Spawns the server as a process. """ # spawn the server start_corenlp = init_corenlp_command(corenlp_path, memory, properties) if VERBOSE: print start_corenlp self.corenlp = pexpect.spawn(start_corenlp) # show progress bar while loading the models if VERBOSE: widgets = ['Loading Models: ', Fraction()] pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start() # Model timeouts: # pos tagger model (~5sec) # NER-all classifier (~33sec) # NER-muc classifier (~60sec) # CoNLL classifier (~50sec) # PCFG (~3sec) timeouts = [20, 200, 600, 600, 20] for i in xrange(5): self.corenlp.expect("done.", timeout=timeouts[i]) # Load model pbar.update(i + 1) self.corenlp.expect("Entering interactive shell.") pbar.finish() # interactive shell self.corenlp.expect("\nNLP> ", timeout=3)
def __init__(self, corenlp_path=None): """ Checks the location of the jar files. Spawns the server as a process. """ jars = ["stanford-corenlp-3.4.1.jar", #"stanford-corenlp-3.4.1-models.jar", "stanford-chinese-corenlp-2014-02-24-models.jar", # add chinese models #"joda-time.jar", #"xom.jar", #"jollyday.jar" ] # if CoreNLP libraries are in a different directory, # change the corenlp_path variable to point to them if not corenlp_path: #corenlp_path = "./stanford-corenlp-full-2014-08-27/" corenlp_path = "/home/kqc/tools/stanford-corenlp-full-2014-08-27/" # my own corenlp dir java_path = "java" classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP" # include the properties file, so you can change defaults # but any changes in output format will break parse_parser_results() props = "-props StanfordCoreNLP-chinese.properties" # for chinese #props = "-props default.properties" # add and check classpaths jars = [corenlp_path + jar for jar in jars] for jar in jars: if not os.path.exists(jar): logger.error("Error! Cannot locate %s" % jar) sys.exit(1) # spawn the server #start_corenlp = "%s -Xmx1800m -cp %s %s %s" % (java_path, ':'.join(jars), classname, props) start_corenlp = "%s -Xmx3g -cp %s %s %s" % (java_path, ':'.join(jars), classname, props) # for chinese if VERBOSE: logger.debug(start_corenlp) self.corenlp = pexpect.spawnu(start_corenlp, encoding='utf8') # show progress bar while loading the models widgets = ['Loading Models: ', Fraction()] pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start() # increase the timeout setting for chinese self.corenlp.expect(u"done.", timeout=2000) # Load pos tagger model (~5sec) pbar.update(1) self.corenlp.expect(u"done.", timeout=2000) # Load NER-all classifier (~33sec) pbar.update(2) self.corenlp.expect(u"done.", timeout=6000) # Load NER-muc classifier (~60sec) pbar.update(3) self.corenlp.expect(u"done.", timeout=6000) # Load CoNLL classifier (~50sec) pbar.update(4) self.corenlp.expect(u"done.", timeout=2000) # Loading PCFG (~3sec) pbar.update(5) self.corenlp.expect(u"Entering interactive shell.") pbar.finish()
def setup(self): """ Checks the location of the jar files. Spawns the server as a process. """ jars = [ "stanford-corenlp-3.2.0.jar", "stanford-corenlp-3.2.0-models.jar", "joda-time.jar", "xom.jar", "jollyday.jar" ] # if CoreNLP libraries are in a different directory, # change the corenlp_path variable to point to them corenlp_path = os.path.relpath(__file__).split( '/')[0] + "/stanford-corenlp-full-2013-06-20/" #corenlp_path = "stanford-corenlp-full-2013-06-20/" java_path = "java" classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP" # include the properties file, so you can change defaults # but any changes in output format will break parse_parser_results() props = "-props " + os.path.relpath(__file__).split( '/')[0] + "/default.properties" # add and check classpaths jars = [corenlp_path + jar for jar in jars] for jar in jars: if not os.path.exists(jar): print "Error! Cannot locate %s" % jar sys.exit(1) #Change from ':' to ';' # spawn the server start_corenlp = "%s -Xmx2500m -cp %s %s %s" % ( java_path, ':'.join(jars), classname, props) if VERBOSE: print start_corenlp self.corenlp = pexpect.spawn(start_corenlp) # show progress bar while loading the models widgets = ['Loading Models: ', Fraction()] pbar = ProgressBar(widgets=widgets, maxval=4, force_update=True).start() self.corenlp.expect("done.", timeout=20) # Load pos tagger model (~5sec) pbar.update(1) self.corenlp.expect("done.", timeout=200) # Load NER-all classifier (~33sec) pbar.update(2) self.corenlp.expect("done.", timeout=600) # Load NER-muc classifier (~60sec) pbar.update(3) self.corenlp.expect("done.", timeout=600) # Load CoNLL classifier (~50sec) pbar.update(4) # self.corenlp.expect("done.", timeout=200) # Loading PCFG (~3sec) # pbar.update(5) self.corenlp.expect("Entering interactive shell.") pbar.finish()
def __init__(self, corenlp_path=None): """ Checks the location of the jar files. Spawns the server as a process. """ jars = [ "stanford-corenlp-3.6.0.jar", "stanford-corenlp-3.6.0-models.jar", "joda-time.jar", "xom.jar", "jollyday.jar", "ejml-0.23.jar", "slf4j-api.jar", "slf4j-simple.jar" ] # if CoreNLP libraries are in a different directory, # change the corenlp_path variable to point to them if not corenlp_path: corenlp_path = "./stanford-corenlp-full-2015-12-09/" java_path = "java" classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP" # include the properties file, so you can change defaults # but any changes in output format will break parse_parser_results() props = "-props default.properties" # add and check classpaths jars = [corenlp_path + jar for jar in jars] for jar in jars: if not os.path.exists(jar): logger.error("Error! Cannot locate %s" % jar) sys.exit(1) # spawn the server start_corenlp = "%s -Xmx3600m -cp %s %s %s" % ( java_path, ':'.join(jars), classname, props) if VERBOSE: logger.debug(start_corenlp) self.corenlp = pexpect.spawn(start_corenlp) # show progress bar while loading the models widgets = ['Loading Models: ', Fraction()] pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start() self.corenlp.expect("done.", timeout=20) # Load pos tagger model (~5sec) pbar.update(1) self.corenlp.expect("done.", timeout=200) # Load NER-all classifier (~33sec) pbar.update(2) self.corenlp.expect("done.", timeout=600) # Load NER-muc classifier (~60sec) pbar.update(3) self.corenlp.expect("done.", timeout=600) # Load CoNLL classifier (~50sec) pbar.update(4) self.corenlp.expect("done.", timeout=200) # Loading PCFG (~3sec) pbar.update(5) self.corenlp.expect("Entering interactive shell.") pbar.finish()
def __init__(self, corenlp_path="stanford-corenlp-full-2013-04-04/", memory="3g"): """ Checks the location of the jar files. Spawns the server as a process. """ # spawn the server start_corenlp = init_corenlp_command(corenlp_path, memory) if VERBOSE: print start_corenlp self.corenlp = pexpect.spawn(start_corenlp) # show progress bar while loading the models if VERBOSE: widgets = ['Loading Models: ', Fraction()] pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start() self.corenlp.expect("done.", timeout=20) # Load pos tagger model (~5sec) if VERBOSE: pbar.update(1) self.corenlp.expect("done.", timeout=200) # Load NER-all classifier (~33sec) if VERBOSE: pbar.update(2) self.corenlp.expect("done.", timeout=600) # Load NER-muc classifier (~60sec) if VERBOSE: pbar.update(3) self.corenlp.expect("done.", timeout=600) # Load CoNLL classifier (~50sec) if VERBOSE: pbar.update(4) self.corenlp.expect("done.", timeout=200) # Loading PCFG (~3sec) if VERBOSE: pbar.update(5) self.corenlp.expect("Entering interactive shell.") if VERBOSE: pbar.finish() # interactive shell self.corenlp.expect("\nNLP> ", timeout=3)
def __init__(self, corenlp_path=None): """ Checks the location of the jar files. Spawns the server as a process. """ jars = [ "stanford-corenlp-2017-04-14-build.jar", "stanford-corenlp-models-current.jar", "stanford-chinese-corenlp-models-current.jar", #"stanford-english-corenlp-models-current.jar", #"stanford-english-kbp-corenlp-models-current.jar", "joda-time.jar", "xom.jar", "jollyday.jar" ] # if CoreNLP libraries are in a different directory, # change the corenlp_path variable to point to them if not corenlp_path: corenlp_path = "./CoreNLP/" java_path = "java" classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP" # include the properties file, so you can change defaults # but any changes in output format will break parse_parser_results() #props = "-props default.properties" props = "-props StanfordCoreNLP-chinese.properties" # add and check classpaths jars = [corenlp_path + jar for jar in jars] for jar in jars: if not os.path.exists(jar): logger.error("Error! Cannot locate %s" % jar) sys.exit(1) # spawn the server start_corenlp = "%s -Xmx5g -cp %s %s %s" % (java_path, ':'.join(jars), classname, props) #start_corenlp = 'java -Xmx100g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -serverProperties StanfordCoreNL-chinese.properties -port 14444 -timeout 100000' if VERBOSE: logger.debug(start_corenlp) self.corenlp = pexpect.spawnu(start_corenlp) # show progress bar while loading the models widgets = ['Loading Models: ', Fraction()] pbar = ProgressBar(widgets=widgets, maxval=4, force_update=True).start() self.corenlp.expect([u"done.", pexpect.EOF], timeout=200) # Load pos tagger model (~5sec) pbar.update(1) self.corenlp.expect([u"done.", pexpect.EOF], timeout=2000) # Load NER-all classifier (~33sec) pbar.update(2) self.corenlp.expect([u"done.", pexpect.EOF], timeout=6000) # Load NER-muc classifier (~60sec) pbar.update(3) self.corenlp.expect([u"done.", pexpect.EOF], timeout=6000) # Load CoNLL classifier (~50sec) pbar.update(4) #self.corenlp.expect([u"done.", pexpect.EOF], timeout=2000) # Loading PCFG (~3sec) #pbar.update(5) self.corenlp.expect([u"Entering interactive shell.", pexpect.EOF]) pbar.finish()