def from_xml(variants_node): """ Convert a <variants> XML node into a list of VariantForm objects. """ wordclass = variants_node.get('wordclass') vf_list = [] for vf_node in variants_node.findall('./vf'): variant_form = VariantForm(vf_node.text, vf_node.get('start'), vf_node.get('end')) if vf_node.get('regional'): variant_form.regional = True if vf_node.get('irregular'): variant_form.irregular = True if vf_node.get('enEnding'): variant_form.has_en_ending = True if vf_node.get('undated'): variant_form.undated = True vf_list.append(variant_form) return (wordclass, vf_list)
def _recurse(self): tmp = [] i = self.count for stem in self.output: for variant_form in self.tokens[i]: # Extend the lemma string by appending this component stem_extended = stem.form + variant_form.form + self.connectors[i] # Narrow the date range so that the date range is always # limited to the overlap of the components handled so far if variant_form.date.start > stem.date.start: adjusted_start = variant_form.date.start else: adjusted_start = stem.date.start if variant_form.date.end < stem.date.end: adjusted_end = variant_form.date.end else: adjusted_end = stem.date.end new_variant_form = VariantForm(stem_extended, adjusted_start, adjusted_end) if stem.irregular or variant_form.irregular: new_variant_form.irregular = True if stem.regional or variant_form.regional: new_variant_form.regional = True if (new_variant_form.date.span() >= 0 and (not self.reference_date or new_variant_form.date.overlap(self.reference_date))): tmp.append(new_variant_form) if tmp: self.output = tmp self.count += 1 if self.count == self.num_tokens(): return self._recurse()