def generate_gloss_words(inst, create=True): """ Given an IGT instance, create the gloss word-level tier. 1. If a "glosses" type exists, either referencing the . 2. If it does not exist, tokenize the gloss line and return it. 3. If there are NO tokens on the gloss line for whatever reason... Return None. :param inst: Instance which to create the tiers from. :type inst: RGIgt :rtype: RGWordTier """ # 1. Look for an existing words tier that aligns with the normalized tier... gloss_tier = xigt_find(inst, type=GLOSS_WORD_TYPE, # Add the "others" to find only the "glosses" tiers that # are at the word level... # TODO FIXME: Find more elegant solution others=[lambda x: is_word_level_gloss(x), lambda x: ODIN_GLOSS_TAG in odin_tags(x)]) # 2. If it exists, return it. Otherwise, look for the glosses tier. if gloss_tier is None: if create: n = generate_normal_tier(inst) gloss_line_item = retrieve_normal_lines(inst, ODIN_GLOSS_TAG)[0] # If the value of the gloss line is None, or it's simply an empty string... if gloss_line_item is None or gloss_line_item.value() is None or not gloss_line_item.value().strip(): raise EmptyGlossException() else: gloss_tier = create_words_tier(gloss_line_item, GLOSS_WORD_ID, GLOSS_WORD_TYPE, aln_attribute=CONTENT, tokenizer=whitespace_tokenizer) # Set the "gloss type" to the "word-level" add_word_level_info(gloss_tier, INTENT_GLOSS_WORD) inst.append(gloss_tier) return gloss_tier else: return None else: # If we have alignment, we can remove the metadata, because # that indicates the type for us. if gloss_tier.alignment is not None: remove_word_level_info(gloss_tier) return gloss_tier
def glosses(inst) -> Tier: # Make sure that we don't pick up the gloss-word tier by accident. f = [lambda x: not is_word_level_gloss(x)] gt = xigt_find(inst, type=GLOSS_MORPH_TYPE, others=f) # If we don't already have a sub-token-level glosses tier, let's create # it. Remembering that we want to use CONTENT to align the tier, not # SEGMENTATION. if gt is None: gt = words_to_morph_tier(gloss(inst), GLOSS_MORPH_TYPE, GLOSS_MORPH_ID, CONTENT) # Add the meta information that this is not a word-level gloss. add_word_level_info(gt, INTENT_GLOSS_MORPH) inst.append(gt) # If we have alignment, remove the metadata attribute. if gt.alignment is not None: remove_word_level_info(gt) return gt