Пример #1
0
 def get_parses(self, sentences):
     """ Gets a dependency semantic parse as conll from a sentence str. """
     # TODO: server version of Malt?
     with deleting(mkdtemp(suffix='XXXXXX', prefix='semafor.')) as temp_dir:
         input_filename = os.path.join(temp_dir, "sentence")
         output_filename = os.path.join(temp_dir, "conll")
         with codecs.open(input_filename, 'w', encoding="utf8") as input_file:
             input_file.write(u'\n'.join(sentences))
         os.system("cd %s && ./bin/runMalt.sh %s %s" %
                   (SEMAFOR_HOME, input_filename, temp_dir))
         with codecs.open(output_filename, encoding="utf8") as output_file:
             output = output_file.read()
         return output
Пример #2
0
 def get_parses(self, sentences):
     """ Gets a dependency semantic parse as conll from a sentence str. """
     # TODO: server version of Malt?
     with deleting(mkdtemp(suffix='XXXXXX', prefix='semafor.')) as temp_dir:
         input_filename = os.path.join(temp_dir, "sentence")
         output_filename = os.path.join(temp_dir, "conll")
         with codecs.open(input_filename, 'w', encoding="utf8") as input_file:
             input_file.write(u'\n'.join(sentences))
         os.system("cd %s && ./bin/runMalt.sh %s %s" %
                   (SEMAFOR_HOME, input_filename, temp_dir))
         with codecs.open(output_filename, encoding="utf8") as output_file:
             output = output_file.read()
         return output
Пример #3
0
 def tag_sentences(self, sentences):
     """ Runs tokenization and part-of-speech tagging a sentence str. """
     #TODO: could probably just use nltk
     with deleting(mkdtemp(suffix='XXXXXX', prefix='semafor.')) as temp_dir:
         input_filename = os.path.join(temp_dir, "sentence")
         output_filename = os.path.join(temp_dir, "pos.tagged")
         with codecs.open(input_filename, 'w', encoding="utf8") as input_file:
             input_file.write(u'\n'.join(sentences))
         os.system("cd %s && ./tokenize_and_postag.sh %s %s" %
                   (TAGGING_SCRIPT_HOME, input_filename, output_filename))
         with codecs.open(output_filename, encoding="utf8") as output_file:
             output = output_file.read()
         return output
Пример #4
0
 def tag_sentences(self, sentences):
     """ Runs tokenization and part-of-speech tagging a sentence str. """
     #TODO: could probably just use nltk
     with deleting(mkdtemp(suffix='XXXXXX', prefix='semafor.')) as temp_dir:
         input_filename = os.path.join(temp_dir, "sentence")
         output_filename = os.path.join(temp_dir, "pos.tagged")
         with codecs.open(input_filename, 'w', encoding="utf8") as input_file:
             input_file.write(u'\n'.join(sentences))
         os.system("cd %s && ./tokenize_and_postag.sh %s %s" %
                   (TAGGING_SCRIPT_HOME, input_filename, output_filename))
         with codecs.open(output_filename, encoding="utf8") as output_file:
             output = output_file.read()
         return output