def determine_feature_value_for_instance(self, igt): function_list = [] try: words = igt["w"] dparse = igt["w-ds"] except KeyError: return found_root = False found_subj = False found_obj = False root_word = "" if len(dparse) > 0 and dparse[0].text != "root": return # early exit if it doesn't have a root. unusable else: found_root = True pos = dparse[0] root_word = pos.attributes["dep"] word = words[pos.attributes["dep"]] segmentation = word.segmentation word_token = ref.resolve(igt, segmentation) function_list.append(WordPos(word_token, "V", segmentation)) # loop looking for subject and object of that root for pos in dparse: if pos.text != "root": word = words[pos.attributes["dep"]] segmentation = word.segmentation word_token = ref.resolve(igt, segmentation) head_word = pos.attributes["head"] if pos.text == "nsubj" and not found_subj and head_word == root_word: function_list.append(WordPos(word_token, "S", segmentation)) found_subj = True if found_obj: break # break out of the loop. we have everything we need elif pos.text == "dobj" and not found_obj and head_word == root_word: function_list.append(WordPos(word_token, "O", segmentation)) found_obj = True if found_subj: break # break out of the loop. we have everything we need # if we've found something, let's add some probabilities if found_root and len(function_list) > 1: sorted_list = sorted(function_list) word_order = FeatureProbe.list_to_word_order(sorted_list) if found_subj and found_obj: self.instance_count += 1 self.order_counts[word_order] += 1.0 if self.debug: self.debug_log("WORD-ORDER: " + word_order)
def determine_feature_value_for_instance(self, igt): try: words = igt["w"] dparse = igt["w-ds"] except KeyError: return if len(dparse) < 0: return # early exit. wtf. # loop looking for all adjectives for pos in dparse: if pos.text == "amod": word = words[pos.attributes["dep"]] word_segmentation = word.segmentation word_token = ref.resolve(igt, word_segmentation) try: if not self.is_head_noun(pos, igt, dparse): continue head_word = words[pos.attributes["head"]] head_word_segmentation = head_word.segmentation adj = WordPos(word_token, "adj", word_segmentation) noun = WordPos(head_word, "noun", head_word_segmentation) if adj > noun: self.order_counts["Noun-Adjective"] += 1.0 else: # assume noun>adj self.order_counts["Adjective-Noun"] += 1.0 self.instance_count += 1 except KeyError: continue
def test_resolve(self): self.assertRaises(XigtError, ref.resolve, self.xc1, 'p1') self.assertRaises(XigtError, ref.resolve, self.xc1[0]['t'], 'p1') self.assertEqual(ref.resolve(self.xc1[0], 'p1'), 'inu=ga san-biki hoe-ru') self.assertEqual(ref.resolve(self.xc1[0]['p'], 'p1'), 'inu=ga san-biki hoe-ru') self.assertEqual(ref.resolve(self.xc1[0], 't1'), 'Three dogs bark.') self.assertEqual(ref.resolve(self.xc1[0], 'p1[0:6]'), 'inu=ga') self.assertEqual(ref.resolve(self.xc1[0], 'p1[0:6,7:15]'), 'inu=ga san-biki') self.assertEqual(ref.resolve(self.xc3[0], 'w1'), 'inu=ga') self.assertEqual(ref.resolve(self.xc3[0], 'm1'), 'inu') self.assertEqual(ref.resolve(self.xc3[0], 'g1'), 'dog')
def resolve_ref(self, refattr): algnexpr = self.attributes[refattr] if self.tier is None: raise XigtStructureError( 'Cannot resolve item reference; item (id: {}) is not ' 'contained by a Tier.'.format(self.id)) reftier_id = self.tier.attributes[refattr] if self.igt is None: raise XigtStructureError( 'Cannot resolve item reference; item\'s tier (id: {}) ' 'is not contained by an Igt.'.format(self.tier.id)) reftier = self.igt.get(reftier_id) if reftier is None: raise XigtStructureError( 'Referred tier (id: {}) does not exist in the Igt.'.format( reftier_id)) value = ref.resolve(reftier, algnexpr) return value
def resolve_ref(self, refattr): algnexpr = self.attributes[refattr] if self.tier is None: raise XigtStructureError( 'Cannot resolve item reference; item (id: {}) is not ' 'contained by a Tier.' .format(self.id) ) reftier_id = self.tier.attributes[refattr] if self.igt is None: raise XigtStructureError( 'Cannot resolve item reference; item\'s tier (id: {}) ' 'is not contained by an Igt.' .format(self.tier.id) ) reftier = self.igt.get(reftier_id) if reftier is None: raise XigtStructureError( 'Referred tier (id: {}) does not exist in the Igt.' .format(reftier_id) ) value = ref.resolve(reftier, algnexpr) return value
def test_resolve(self): with pytest.raises(XigtError): ref.resolve(self.xc1, 'p1') with pytest.raises(XigtError): ref.resolve(self.xc1[0]['t'], 'p1') assert ref.resolve(self.xc1[0], 'p1') == 'inu=ga san-biki hoe-ru' assert ref.resolve(self.xc1[0]['p'], 'p1') == 'inu=ga san-biki hoe-ru' assert ref.resolve(self.xc1[0], 't1') == 'Three dogs bark.' assert ref.resolve(self.xc1[0], 'p1[0:6]') == 'inu=ga' assert ref.resolve(self.xc1[0], 'p1[0:6,7:15]') == 'inu=ga san-biki' assert ref.resolve(self.xc3[0], 'w1') == 'inu=ga' assert ref.resolve(self.xc3[0], 'm1') == 'inu' assert ref.resolve(self.xc3[0], 'g1') == 'dog'