Exemplo n.º 1
0
 def init_person_name_corrections(self, object_list):
     if len(object_list) == 0:
         name = self.field_params.get('person_name')
         if name is not None and len(name) > 5:
             fio = TRussianFio(name, from_search_request=False)
             if fio.is_resolved:
                 name = TRussianFio.convert_to_rml_encoding(fio.get_normalized_person_name())
                 corrections = FIO_MISSPELL_CORRECTOR.correct_misspell(name)
                 if len(corrections) > 0:
                     if name != corrections[0]:
                         self.person_name_corrections = list(TRussianFio.convert_from_rml_encoding(c) for c in corrections[:10])
                         self.log('person names corrections count = {}'.format(len(self.person_name_corrections)))
 def write_list(self, outp):
     used = set()
     for person_name in self.build_person_names():
         fio = TRussianFio(person_name.strip(),
                           from_search_request=False,
                           make_lower=False)
         if fio.is_resolved:
             norm = TRussianFio.convert_to_rml_encoding(
                 fio.get_normalized_person_name())
             if not is_in_rml_alphabet(norm):
                 self.logger.debug("bad alphabet {}".format(
                     person_name.strip()))
             else:
                 if norm not in used:
                     outp.write("{}\n".format(norm))
                     used.add(norm)
                 norm = TRussianFio.convert_to_rml_encoding(
                     fio.get_abridged_normalized_person_name())
                 if norm not in used:
                     outp.write("{}\n".format(norm))
                     used.add(norm)
     self.logger.info("create {} person names in {}".format(
         len(used), outp.name))
     assert len(used) > 0