def test_Example(self): from dictionaria.lib.ingest import Example e = Example([('tx', 'a'), ('ft', 'b')]) id_ = e.id e.set('ref', 'x') self.assertNotEquals(id_, e.id)
def __call__(self, entry): example = None lx = None rf = None items = [] for marker, content in entry: if marker == 'lx': lx = content if marker in self.example_props: if marker == 'rf': rf = content elif marker == 'xv': # new example starts if example: # but last one is unfinished self.log.write( '# incomplete example in lx: %s - missing xe:\n%s\n\n' % (lx, example)) example = Example([('tx', content)]) elif marker == 'xe': # example ends if example: if rf: example.insert(0, ('rf', rf)) example.append(('ft', content)) items.append(('xref', self.xref(example))) rf = None example = None else: self.log.write( '# incomplete example in lx: %s - missing xv\n' % lx) else: if not example: self.log.write('incomplete example in lx: %s - missing xv\n' % lx) else: example.append((self.example_props[marker], content)) else: items.append((marker, content)) return entry.__class__(items)
def get_words(self): """ :return: generator for the words contained within the entry. """ word = None # if an entry has only one \ps marker but multiple words, the value of \ps is used # as part-of-speech for all words. pos = None example = None meaning = None # flag signaling whether we are dealing with the first meaning of a word or # subsequent ones. first_meaning = True # now we loop over the (marker, value) pairs of the entry: for k, v in self: # individual words are identified by \lx or \se (sub-entry) markers. if k in ['lx', 'se']: if word: yield self.checked_word(word, meaning) word = Word(v) if pos: word.ps = pos meaning = Meaning() elif k == 'sn': # a new sense number: initialize a new Meaning. if not first_meaning: self.checked_word(word, meaning) meaning = Meaning() first_meaning = False # meaning-specific markers: elif k in ['de', 'ge']: # FIXME: we must support multiple meanings expressed by # semicolon-separated \ge values, e.g. "jump ; jump at" setattr(meaning, k, v) elif k == 'sd': meaning.sd.append(v) elif k == 'xv': if example: example.xv += ' %s' % v else: example = Example(v) elif k in ['xvm', 'xeg']: if getattr(example, k): v = getattr(example, k) + ' ' + v setattr(example, k, v) elif k == 'xe': if example: example.xe = v try: assert meaning meaning.x.append(example) except AssertionError: print( 'no meanings for (sense or subentry of) word %s' % word.form) example = None else: print('xe without xv for word %s' % word.form) elif k == 'xref': meaning.xref.append(v) # word-specific markers: elif k in ['hm', 'ph']: if getattr(word, k) is None: # only record first occurrence of the marker! setattr(word, k, v) elif k == 'ps': pos = word.ps = v elif k in ['cf', 'mn']: for vv in v.split(','): if vv.strip(): word.rel.append((k, vv.strip())) elif k == 'gxx': word.non_english_meanings[k].extend(sfm.FIELD_SPLITTER_PATTERN.split(v)) else: word.data[k].append(v) if word: yield self.checked_word(word, meaning)