Пример #1
0
    def __init__(self, corenlp_path=None):
        """
        Checks the location of the jar files.
        Spawns the server as a process.
        """
        jars = [
            "stanford-corenlp-3.5.2.jar", "stanford-corenlp-3.5.2-models.jar",
            "joda-time.jar", "xom.jar", "ejml-0.23.jar", "jollyday.jar"
        ]

        # if CoreNLP libraries are in a different directory,
        # change the corenlp_path variable to point to them
        if not corenlp_path:
            corenlp_path = "./stanford-corenlp-full-2015-04-20/"

        java_path = "java"
        classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
        # include the properties file, so you can change defaults
        # but any changes in output format will break parse_parser_results()
        props = "-props default.properties"

        # add and check classpaths
        jars = [corenlp_path + jar for jar in jars]
        for jar in jars:
            if not os.path.exists(jar):
                logger.error("Error! Cannot locate %s" % jar)
                sys.exit(1)

        # spawn the server
        start_corenlp = "%s -Xmx1800m -cp %s %s %s" % (
            java_path, ':'.join(jars), classname, props)
        print start_corenlp
        if VERBOSE:
            logger.debug(start_corenlp)
        self.corenlp = pexpect.spawn(start_corenlp)

        # show progress bar while loading the models
        widgets = ['Loading Models: ', Fraction()]
        pbar = ProgressBar(widgets=widgets, maxval=5,
                           force_update=True).start()
        self.corenlp.expect("done.",
                            timeout=200)  # Load pos tagger model (~5sec)
        pbar.update(1)
        self.corenlp.expect("done.",
                            timeout=200)  # Load NER-all classifier (~33sec)
        pbar.update(2)
        self.corenlp.expect("done.",
                            timeout=600)  # Load NER-muc classifier (~60sec)
        pbar.update(3)
        self.corenlp.expect("done.",
                            timeout=600)  # Load CoNLL classifier (~50sec)
        pbar.update(4)
        self.corenlp.expect("done.", timeout=200)  # Loading PCFG (~3sec)
        pbar.update(5)
        self.corenlp.expect("Entering interactive shell.")
        pbar.finish()
Пример #2
0
    def setup(self):
        """
        Checks the location of the jar files.
        Spawns the server as a process.
        """
        jars = ["stanford-corenlp-3.2.0.jar",
                "stanford-corenlp-3.2.0-models.jar",
                "joda-time.jar",
                "xom.jar",
                "jollyday.jar"]
       
        # if CoreNLP libraries are in a different directory,
        # change the corenlp_path variable to point to them
        corenlp_path = os.path.relpath(__file__).split('/')[0]+"/stanford-corenlp-full-2013-06-20/"
        #corenlp_path = "stanford-corenlp-full-2013-06-20/"
        
        java_path = "java"
        classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
        # include the properties file, so you can change defaults
        # but any changes in output format will break parse_parser_results()
        props = "-props "+ os.path.relpath(__file__).split('/')[0]+"/default.properties"
        
        # add and check classpaths
        jars = [corenlp_path + jar for jar in jars]
        for jar in jars:
            if not os.path.exists(jar):
                print "Error! Cannot locate %s" % jar
                sys.exit(1)

        #Change from ':' to ';'
        # spawn the server
        start_corenlp = "%s -Xmx2500m -cp %s %s %s" % (java_path, ':'.join(jars), classname, props)
        if VERBOSE: print start_corenlp
        self.corenlp = pexpect.spawn(start_corenlp)
        
        # show progress bar while loading the models
        widgets = ['Loading Models: ', Fraction()]
        pbar = ProgressBar(widgets=widgets, maxval=4, force_update=True).start()
        self.corenlp.expect("done.", timeout=20) # Load pos tagger model (~5sec)
        pbar.update(1)
        self.corenlp.expect("done.", timeout=200) # Load NER-all classifier (~33sec)
        pbar.update(2)
        self.corenlp.expect("done.", timeout=600) # Load NER-muc classifier (~60sec)
        pbar.update(3)
        self.corenlp.expect("done.", timeout=600) # Load CoNLL classifier (~50sec)
        pbar.update(4)
#        self.corenlp.expect("done.", timeout=200) # Loading PCFG (~3sec)
#        pbar.update(5)
        self.corenlp.expect("Entering interactive shell.")
        pbar.finish()
Пример #3
0
    def __init__(self,
                 corenlp_path="stanford-corenlp-full-2013-04-04/",
                 memory="3g"):
        """
        Checks the location of the jar files.
        Spawns the server as a process.
        """

        # spawn the server
        start_corenlp = init_corenlp_command(corenlp_path, memory)
        if VERBOSE: print start_corenlp
        self.corenlp = pexpect.spawn(start_corenlp)

        # show progress bar while loading the models
        if VERBOSE:
            widgets = ['Loading Models: ', Fraction()]
            pbar = ProgressBar(widgets=widgets, maxval=5,
                               force_update=True).start()
        self.corenlp.expect("done.",
                            timeout=20)  # Load pos tagger model (~5sec)
        if VERBOSE: pbar.update(1)
        self.corenlp.expect("done.",
                            timeout=200)  # Load NER-all classifier (~33sec)
        if VERBOSE: pbar.update(2)
        self.corenlp.expect("done.",
                            timeout=600)  # Load NER-muc classifier (~60sec)
        if VERBOSE: pbar.update(3)
        self.corenlp.expect("done.",
                            timeout=600)  # Load CoNLL classifier (~50sec)
        if VERBOSE: pbar.update(4)
        self.corenlp.expect("done.", timeout=200)  # Loading PCFG (~3sec)
        if VERBOSE: pbar.update(5)
        self.corenlp.expect("Entering interactive shell.")
        if VERBOSE: pbar.finish()

        # interactive shell
        self.corenlp.expect("\nNLP> ", timeout=3)
Пример #4
0
    def _spawn_corenlp(self):
        if VERBOSE:
            print self.start_corenlp
        self.corenlp = pexpect.spawn(self.start_corenlp, maxread=8192, searchwindowsize=80)

        # show progress bar while loading the models
        if VERBOSE:
            widgets = ['Loading Models: ', Fraction()]
            pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start()
            # Model timeouts:
            # pos tagger model (~5sec)
            # NER-all classifier (~33sec)
            # NER-muc classifier (~60sec)
            # CoNLL classifier (~50sec)
            # PCFG (~3sec)
            timeouts = [20, 200, 600, 600, 20]
            for i in xrange(5):
                self.corenlp.expect("done.", timeout=timeouts[i])  # Load model
                pbar.update(i + 1)
            self.corenlp.expect("Entering interactive shell.")
            pbar.finish()

        # interactive shell
        self.corenlp.expect("\nNLP> ")
Пример #5
0
    def __init__(self, corenlp_path=None):
        """
        Checks the location of the jar files.
        Spawns the server as a process.
        """
        jars = [
            "stanford-corenlp-2017-04-14-build.jar",
            "stanford-corenlp-models-current.jar",
            "stanford-chinese-corenlp-models-current.jar",
            #"stanford-english-corenlp-models-current.jar",
            #"stanford-english-kbp-corenlp-models-current.jar",
            "joda-time.jar",
            "xom.jar",
            "jollyday.jar"
        ]

        # if CoreNLP libraries are in a different directory,
        # change the corenlp_path variable to point to them
        if not corenlp_path:
            corenlp_path = "./CoreNLP/"

        java_path = "java"
        classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
        # include the properties file, so you can change defaults
        # but any changes in output format will break parse_parser_results()
        #props = "-props default.properties"
        props = "-props StanfordCoreNLP-chinese.properties"

        # add and check classpaths
        jars = [corenlp_path + jar for jar in jars]
        for jar in jars:
            if not os.path.exists(jar):
                logger.error("Error! Cannot locate %s" % jar)
                sys.exit(1)

        # spawn the server
        start_corenlp = "%s -Xmx5g -cp %s %s %s" % (java_path, ':'.join(jars),
                                                    classname, props)
        #start_corenlp = 'java -Xmx100g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer  -serverProperties StanfordCoreNL-chinese.properties -port 14444 -timeout 100000'
        if VERBOSE:
            logger.debug(start_corenlp)
        self.corenlp = pexpect.spawnu(start_corenlp)

        # show progress bar while loading the models
        widgets = ['Loading Models: ', Fraction()]
        pbar = ProgressBar(widgets=widgets, maxval=4,
                           force_update=True).start()
        self.corenlp.expect([u"done.", pexpect.EOF],
                            timeout=200)  # Load pos tagger model (~5sec)
        pbar.update(1)
        self.corenlp.expect([u"done.", pexpect.EOF],
                            timeout=2000)  # Load NER-all classifier (~33sec)
        pbar.update(2)
        self.corenlp.expect([u"done.", pexpect.EOF],
                            timeout=6000)  # Load NER-muc classifier (~60sec)
        pbar.update(3)
        self.corenlp.expect([u"done.", pexpect.EOF],
                            timeout=6000)  # Load CoNLL classifier (~50sec)
        pbar.update(4)
        #self.corenlp.expect([u"done.", pexpect.EOF], timeout=2000) # Loading PCFG (~3sec)
        #pbar.update(5)
        self.corenlp.expect([u"Entering interactive shell.", pexpect.EOF])
        pbar.finish()