def from_json(cls, obj): source = AnnotatedSentence.from_json(obj["source"]) target = AnnotatedSentence.from_json(obj["target"]) truth = Truth(obj["truth"]) gold_truth = obj["gold_truth"] is not None and Truth(obj["gold_truth"]) # TODO(chaganty): how to serialize previous_state_action? previous_state_action = None return State(source, target, truth, gold_truth, previous_state_action)
def test_from_tokens(self): text = "This is a test." tokens = "This is a test .".split() pos = "DT VBZ DT NN .".split() sentence = AnnotatedSentence.from_tokens(text, tokens) assert sentence.text == text assert len(sentence) == 5 assert sentence[1].word == "is" sentence = AnnotatedSentence.from_tokens(text, tokens, pos) assert sentence.text == text assert len(sentence) == 5 assert sentence[1].word == "is" assert sentence[1].pos == "VBZ"
def test_parse_pb(self, document_pb): sentence_pb = document_pb.sentence[0] sentence = AnnotatedSentence.from_pb(sentence_pb) assert sentence.text == u"Barack Hussein Obama is an American politician who is the 44th and current President of the United States." assert len(sentence) == 19 assert sentence[1].word == "Hussein" assert sentence[1].ner == "PERSON"
def test_from_tokens(self): text = "This is a test." tokens = "This is a test .".split() sentence = AnnotatedSentence.from_tokens(text, tokens) assert sentence.text == text assert len(sentence) == 5 assert sentence[1].word == "is"
def test_depparse(self, document_pb): sentence_pb = document_pb.sentence[0] sentence = AnnotatedSentence.from_pb(sentence_pb) dp = sentence.depparse() assert dp.roots == [6] # politician assert (2, 'nsubj') in dp.children(6) # Obama is child of politician assert (3, 'cop') in dp.children(6) # 'is' is ia copula assert (0, 'compound') in dp.children(2) # 'Barack' is part of the compount that is Obama.
def test_depparse_json(self, document_pb): sentence_pb = document_pb.sentence[0] sentence = AnnotatedSentence.from_pb(sentence_pb) dp = sentence.depparse() edges = dp.to_json() # politician is root assert any((edge['dep'] == 'root' and edge['dependent'] == 7 and edge['dependentgloss'] == 'politician') for edge in edges) # Obama is child of politician assert any((edge['governer'] == 7 and edge['dep'] == 'nsubj' and edge['dependent'] == 3 and edge['dependentgloss'] == 'Obama') for edge in edges) # 'is' is ia copula assert any((edge['governer'] == 7 and edge['dep'] == 'cop' and edge['dependent'] == 4 and edge['dependentgloss'] == 'is') for edge in edges) # 'Barack' is part of the compount that is Obama. assert any((edge['governer'] == 3 and edge['dep'] == 'compound' and edge['dependent'] == 1 and edge['dependentgloss'] == 'Barack') for edge in edges)
def test_depparse_json(self, document_pb): sentence_pb = document_pb.sentence[0] sentence = AnnotatedSentence.from_pb(sentence_pb) dp = sentence.depparse() edges = dp.to_json() # politician is root assert any( (edge["dep"] == "root" and edge["dependent"] == 7 and edge["dependentgloss"] == "politician") for edge in edges ) # Obama is child of politician assert any( ( edge["governer"] == 7 and edge["dep"] == "nsubj" and edge["dependent"] == 3 and edge["dependentgloss"] == "Obama" ) for edge in edges ) # 'is' is ia copula assert any( ( edge["governer"] == 7 and edge["dep"] == "cop" and edge["dependent"] == 4 and edge["dependentgloss"] == "is" ) for edge in edges ) # 'Barack' is part of the compount that is Obama. assert any( ( edge["governer"] == 3 and edge["dep"] == "compound" and edge["dependent"] == 1 and edge["dependentgloss"] == "Barack" ) for edge in edges )
def test_sentence_dict_to_pb(json_dict): orig_text = 'Really?' sent_dict = json_dict['sentences'][1] sent = AnnotatedSentence.dict_to_pb(sent_dict) assert sent.text == orig_text assert sent.token[1].word == u'?'
def test_json_to_pb(self, json_dict): orig_text = 'Really?' sent_dict = json_dict['sentences'][1] sent = AnnotatedSentence.from_json(sent_dict) assert sent.text == orig_text assert sent[1].word == u'?'
def make_sentence(text, typed_parse): pos, words = make_pos_tokens(typed_parse) return AnnotatedSentence.from_tokens(text, words, pos)