def get_parses(self, sentences): """ Gets a dependency semantic parse as conll from a sentence str. """ # TODO: server version of Malt? with deleting(mkdtemp(suffix='XXXXXX', prefix='semafor.')) as temp_dir: input_filename = os.path.join(temp_dir, "sentence") output_filename = os.path.join(temp_dir, "conll") with codecs.open(input_filename, 'w', encoding="utf8") as input_file: input_file.write(u'\n'.join(sentences)) os.system("cd %s && ./bin/runMalt.sh %s %s" % (SEMAFOR_HOME, input_filename, temp_dir)) with codecs.open(output_filename, encoding="utf8") as output_file: output = output_file.read() return output
def tag_sentences(self, sentences): """ Runs tokenization and part-of-speech tagging a sentence str. """ #TODO: could probably just use nltk with deleting(mkdtemp(suffix='XXXXXX', prefix='semafor.')) as temp_dir: input_filename = os.path.join(temp_dir, "sentence") output_filename = os.path.join(temp_dir, "pos.tagged") with codecs.open(input_filename, 'w', encoding="utf8") as input_file: input_file.write(u'\n'.join(sentences)) os.system("cd %s && ./tokenize_and_postag.sh %s %s" % (TAGGING_SCRIPT_HOME, input_filename, output_filename)) with codecs.open(output_filename, encoding="utf8") as output_file: output = output_file.read() return output