Exemplo n.º 1
0
 def init_person_name_corrections(self, object_list):
     if len(object_list) == 0:
         name = self.field_params.get('person_name')
         if name is not None and len(name) > 5:
             fio = TRussianFio(name, from_search_request=False)
             if fio.is_resolved:
                 name = TRussianFio.convert_to_rml_encoding(fio.get_normalized_person_name())
                 corrections = FIO_MISSPELL_CORRECTOR.correct_misspell(name)
                 if len(corrections) > 0:
                     if name != corrections[0]:
                         self.person_name_corrections = list(TRussianFio.convert_from_rml_encoding(c) for c in corrections[:10])
                         self.log('person names corrections count = {}'.format(len(self.person_name_corrections)))
Exemplo n.º 2
0
 def init_person_info(self, section_json):
     person_info = section_json.get('person')
     if person_info is None:
         fio = section_json.get('fio')
         if fio is None:
             raise TSmartParserSectionJson.SerializerException(
                 "cannot find nor 'person' neither 'fio' key in json")
     else:
         fio = person_info.get('name', person_info.get('name_raw'))
         if fio is None:
             raise TSmartParserSectionJson.SerializerException(
                 "cannot find 'name' or 'name_raw'     in json")
     fio = normalize_fio_before_db_insert(fio)
     resolved_fio = TRussianFio(fio)
     if not resolved_fio.is_resolved:
         raise TSmartParserSectionJson.SerializerException(
             "cannot resolve person name {}".format(fio))
     self.section.person_name = resolved_fio.get_normalized_person_name()
     self.section.position = person_info.get("role")
     self.section.department = person_info.get("department")
 def write_list(self, outp):
     used = set()
     for person_name in self.build_person_names():
         fio = TRussianFio(person_name.strip(),
                           from_search_request=False,
                           make_lower=False)
         if fio.is_resolved:
             norm = TRussianFio.convert_to_rml_encoding(
                 fio.get_normalized_person_name())
             if not is_in_rml_alphabet(norm):
                 self.logger.debug("bad alphabet {}".format(
                     person_name.strip()))
             else:
                 if norm not in used:
                     outp.write("{}\n".format(norm))
                     used.add(norm)
                 norm = TRussianFio.convert_to_rml_encoding(
                     fio.get_abridged_normalized_person_name())
                 if norm not in used:
                     outp.write("{}\n".format(norm))
                     used.add(norm)
     self.logger.info("create {} person names in {}".format(
         len(used), outp.name))
     assert len(used) > 0