def unknown_entry_partially_encode(entry): old_morph_part = copy(entry.encoded_parts['morph_part']) entry.encoded_parts['morph_part'] = [] counter = 0 while old_morph_part: content_node = [old_morph_part[0]] node_content = SafeString(old_morph_part[0].text) if counter == 0: content_node = unknown_initial_xml(content_node[0].text) elif node_content.strip().startswith( '(') and node_content.strip().endswith(')'): content_node = [nf.create_extra_morph(node_content)] elif node_content.strip() in (punctuation + '–'): content_node = [nf.create_pc_node(node_content)] elif old_morph_part[0].get('rend') == "italic": if node_content.strip() in ('m', 'f', 'n'): content_node = [nf.create_gram_grp(node_content)] elif len(entry.encoded_parts['morph_part']) == 1 and len(old_morph_part) >= 2 and old_morph_part[1].get('rend' ) == 'bold' and \ node_content.strip() == 'и' and entry.encoded_parts['morph_part'][0].tag == nf.get_ns('form'): content_node = [] entry.encoded_parts['morph_part'][0].append(old_morph_part[0]) entry.encoded_parts['morph_part'][0].append( nf.create_orth_node(SafeString(old_morph_part[1].text))) old_morph_part.pop(0) else: content_node = nf.create_usg_node(node_content) elif node_content.strip() in ('1', '2', '3', '4') and ( len(old_morph_part) == 1 or old_morph_part[1].text.strip() != '.'): content_node = [nf.create_gram_grp(node_content, 'iType')] [entry.encoded_parts['morph_part'].append(x) for x in content_node] old_morph_part.pop(0) counter += 1
def noun_xml(content0, content1): result = [] content0_split = content0.rsplit(', ', 1) form_lemma = nf.create_form_lemma_node(content0_split[0]) result.append(form_lemma) if content0_split[1]: pc = nf.create_pc_node(', ') form_inflected = nf.create_form_inflected_node(content0_split[1]) result.append(pc) result.append(form_inflected) gram_grp = nf.create_gram_grp(content1) result.append(gram_grp) return result
def verb_xml(entry_type, content0, content1): result = [] content0_split = content0.split(', ') form_lemma = nf.create_form_lemma_node(content0_split[0]) result.append(form_lemma) if len(content0_split) > 1: for word in content0_split[1:]: pc = nf.create_pc_node(', ') form_inflected = nf.create_form_inflected_node(word) result.append(pc) result.append(form_inflected) if entry_type != 'special_verb': gram_grp = nf.create_gram_grp(content1, "iType") result.append(gram_grp) return result
def adj_1_2_decl_xml(content0, content1): form_lemma = nf.create_form_lemma_node(content0) gram_grp = nf.create_gram_grp(content1, "????") return [form_lemma, gram_grp]
def adv_conjunct_xml(content0, content1): form_lemma = nf.create_form_lemma_node(content0) gram_grp = nf.create_gram_grp(content1, "????") return [form_lemma, gram_grp]