Пример #1
0
 def remove(self, *strings):
     """
     Remove the lower case and no-period version of the string arguments from the set.
     Returns ``self`` for chaining.
     """
     [self.elements.remove(lc(s)) for s in strings if lc(s) in self.elements]
     return self
Пример #2
0
 def is_suffix(self, piece):
     """
     Is in the suffixes set and not :py:func:`is_an_initial()`. 
     
     Some suffixes may be acronyms (M.B.A) while some are not (Jr.), 
     so we remove the periods from `piece` when testing against
     `C.suffix_acronyms`.
     """
     # suffixes may have periods inside them like "M.D."
     return ((lc(piece).replace('.','') in self.C.suffix_acronyms) \
         or (lc(piece) in self.C.suffix_not_acronyms)) \
         and not self.is_an_initial(piece)
Пример #3
0
 def cap_word(self, word):
     if self.is_prefix(word) or self.is_conjunction(word):
         return word.lower()
     exceptions = self.C.capitalization_exceptions
     if lc(word) in exceptions:
         return exceptions[lc(word)]
     mac_match = self.C.regexes.mac.match(word)
     if mac_match:
         def cap_after_mac(m):
             return m.group(1).capitalize() + m.group(2).capitalize()
         return self.C.regexes.mac.sub(cap_after_mac, word)
     else:
         return word.capitalize()
Пример #4
0
 def add(self, *strings):
     """
     Add the lower case and no-period version of the string arguments to the set.
     Returns ``self`` for chaining.
     """
     [self.elements.add(lc(s)) for s in strings]
     return self
Пример #5
0
 def add(self, *strings):
     """
     Add the lower case and no-period version of the string arguments to the set.
     Returns ``self`` for chaining.
     """
     for s in strings:
         if type(s) == binary_type:
             s = s.decode(sys.stdin.encoding)
         self.elements.add(lc(s))
     return self
Пример #6
0
 def handle_firstnames(self):
     """
     If there are only two parts and one is a title, assume it's a last name
     instead of a first name. e.g. Mr. Johnson. Unless it's a special title
     like "Sir", then when it's followed by a single name that name is always
     a first name. 
     """
     if self.title \
             and len(self) == 2 \
             and not lc(self.title) in self.C.first_name_titles:
         self.last, self.first = self.first, self.last
Пример #7
0
 def handle_firstnames(self):
     """
     If there are only two parts and one is a title, assume it's a last name
     instead of a first name. e.g. Mr. Johnson. Unless it's a special title
     like "Sir", then when it's followed by a single name that name is always
     a first name. 
     """
     if self.title \
             and len(self) == 2 \
             and not lc(self.title) in self.C.first_name_titles:
         self.last, self.first = self.first, self.last
Пример #8
0
 def add_with_encoding(self, s, encoding=None):
     """
     Add the lower case and no-period version of the string to the set. Pass an
     explicit `encoding` parameter to specify the encoding of binary strings that
     are not DEFAULT_ENCODING (UTF-8).
     """
     stdin_encoding = None
     if sys.stdin:
         stdin_encoding = sys.stdin.encoding
     encoding = encoding or stdin_encoding or DEFAULT_ENCODING
     if type(s) == binary_type:
         s = s.decode(encoding)
     self.elements.add(lc(s))
Пример #9
0
 def add_with_encoding(self, s, encoding=None):
     """
     Add the lower case and no-period version of the string to the set. Pass an
     explicit `encoding` parameter to specify the encoding of binary strings that
     are not DEFAULT_ENCODING (UTF-8).
     """
     stdin_encoding = None
     if sys.stdin:
         stdin_encoding = sys.stdin.encoding
     encoding = encoding or stdin_encoding or DEFAULT_ENCODING
     if type(s) == binary_type:
         s = s.decode(encoding)
     self.elements.add(lc(s))
Пример #10
0
    def cap_word(self, word):
        if self.is_prefix(word) or self.is_conjunction(word):
            return lc(word)
        exceptions = self.C.capitalization_exceptions
        if word in exceptions:
            return exceptions[word]
        mac_match = self.C.regexes.mac.match(word)
        if mac_match:

            def cap_after_mac(m):
                return m.group(1).capitalize() + m.group(2).capitalize()

            return self.C.regexes.mac.sub(cap_after_mac, word)
        else:
            return word.capitalize()
Пример #11
0
 def is_prefix(self, piece):
     """Is in the prefixes set and not :py:func:`is_an_initial()`."""
     return lc(piece) in self.C.prefixes and not self.is_an_initial(piece)
Пример #12
0
 def is_conjunction(self, piece):
     """Is in the conjuctions set or :py:func:`is_an_initial()`."""
     return lc(piece) in self.C.conjunctions and not self.is_an_initial(piece)
Пример #13
0
 def is_rootname(self, piece):
     """
     Is not a known title, suffix or prefix. Just first, middle, last names.
     """
     return lc(piece) not in self.C.suffixes_prefixes_titles \
         and not self.is_an_initial(piece) 
Пример #14
0
 def is_suffix(self, piece):
     """Is in the suffixes set or :py:func:`is_an_initial()`."""
     return lc(piece) in self.C.suffixes and not self.is_an_initial(piece)
Пример #15
0
 def is_conjunction(self, piece):
     """Is in the conjuctions set or :py:func:`is_an_initial()`."""
     return lc(
         piece) in self.C.conjunctions and not self.is_an_initial(piece)
Пример #16
0
 def is_suffix(self, piece):
     """Is in the suffixes set or :py:func:`is_an_initial()`."""
     # suffixes may have periods inside them like "M.D."
     return lc(piece).replace(
         '.', '') in self.C.suffixes and not self.is_an_initial(piece)
Пример #17
0
 def is_rootname(self, piece):
     '''Is not a known title, suffix or prefix. Just first, middle, last names.'''
     return lc(piece) not in self.C.suffixes_prefixes_titles \
         and not self.is_an_initial(piece)
Пример #18
0
 def is_prefix(self, piece):
     """
     Lowercase and no periods version of piece is in the 
     `~nameparser.config.titles.PREFIXES` set.
     """
     return lc(piece) in self.C.prefixes
Пример #19
0
 def is_suffix(self, piece):
     """Is in the suffixes set or :py:func:`is_an_initial()`."""
     return lc(piece) in self.C.suffixes and not self.is_an_initial(piece)
Пример #20
0
 def is_title(self, value):
     """Is in the :py:data:`~nameparser.config.titles.TITLES` set."""
     return lc(value) in self.C.titles
Пример #21
0
 def is_title(self, value):
     """Is in the :py:data:`~nameparser.config.titles.TITLES` set."""
     return lc(value) in self.C.titles
Пример #22
0
 def is_prefix(self, piece):
     """
     Lowercase and no periods version of piece is in the 
     :py:data:`~nameparser.config.prefixes.PREFIXES` set.
     """
     return lc(piece) in self.C.prefixes
Пример #23
0
 def is_suffix(self, piece):
     """Is in the suffixes set or :py:func:`is_an_initial()`."""
     # suffixes may have periods inside them like "M.D."
     return lc(piece).replace('.','') in self.C.suffixes and not self.is_an_initial(piece)