def clean_authors(authors): cleaned_authors = [] authors = authors.lower() # get rid of commas where there are suffixes, like Jr. or III authors = authors.replace(", jr.", " jr.") authors = authors.replace(", iii", " iii") authors = authors.replace(", ph.d", "") # special cases authors = authors.replace("organizer:", "") authors = authors.replace("roel m,", "roel m.") if authors == 'kozue miyashiro, etsuko harada, t.': author_list = ['kozue miyashiro', 'etsuko harada, t.'] else: author_list = authors.split(",") for author in author_list: author = HumanName(author.lower()) if author.first == '' or author.last == '': raise ValueError("invalid author name: {}".format(author)) author.capitalize() author.string_format = u"{last}, {title} {first} {middle}, {suffix}" cleaned_authors.append(unicode(author)) return cleaned_authors
def extractFirstName(name, order): '''Split on dots''' name = ' '.join(name.split('.')) '''Split on - ''' name = ' '.join(name.split('-')) '''Replace numbers by whitespace''' oldname = name name = re.sub("\d+", "", name) if not len(name): name = re.sub("\d+", "_", oldname) oldname = name '''Replace ? by whitespace''' name = re.sub("\?", "", name) if not len(name): name = re.sub("\?", "_", oldname) name = ' '.join(name.split('_')) '''Use the Python name parser''' try: firstName = getFirstNameFromHumanName(HumanName(name), order) except: firstName = getFirstNameFromSplitName(name.split(), order) '''If fail, use heuristics''' if firstName.strip() == name.strip(): '''firstName('Ben Voigt') = 'Ben Voigt'!!!''' if len(name.split()) == 2: firstName = getFirstNameFromSplitName(name.split(), order) else: '''Try CamelCase''' uncamel = ' '.join(splitCamelCase(name).split('_')) if uncamel != name: try: firstName = HumanName(uncamel).first if len(firstName.split()) == 2: firstName = getFirstNameFromSplitName(firstName.split(), order) except: firstName = getFirstNameFromSplitName(uncamel.split(), order) if firstName == 'Mc': firstName = '' if len(firstName) == 1: firstName = '' return firstName.lower()
def names_compare(name1, name2): """ Takes string arguments with human names and returns indication of match between names. :param name1: String argument with name :param name2: String argument with name :return: "exact" for definite matches, "last" for only last name matches, False for non-matches """ if not isinstance(name1, str) or not isinstance(name2, str): raise TypeError( "CosineCalc.names_compare must receive both string arguments.") name1 = HumanName(name1.lower()) name2 = HumanName(name2.lower()) # Check for exact matches last_names_match = name1.last == name2.last first_names_match = name1.first == name2.first # Check for short names order = len(name1.first) < len(name2.first) if order: nick_name = name1.first in name2.first else: nick_name = name2.first in name1.first if last_names_match and first_names_match: result = "exact" elif last_names_match and nick_name: result = "exact" elif last_names_match: result = "last" else: result = False return result
def extractFirstName(name, order): '''Split on dots''' name = ' '.join(name.split('.')) '''Split on - ''' name = ' '.join(name.split('-')) '''Replace numbers by whitespace''' oldname = name name = re.sub("\d+", "", name) if not len(name): name = re.sub("\d+", "_", oldname) oldname = name '''Replace ? by whitespace''' name = re.sub("\?", "", name) if not len(name): name = re.sub("\?", "_", oldname) name = ' '.join(name.split('_')) '''Use the Python name parser''' try: firstName = getFirstNameFromHumanName(HumanName(name), order) except: firstName = getFirstNameFromSplitName(name.split(), order) '''If fail, use heuristics''' if firstName.strip() == name.strip(): '''firstName('Ben Voigt') = 'Ben Voigt'!!!''' if len(name.split()) == 2: firstName = getFirstNameFromSplitName(name.split(), order) else: '''Try CamelCase''' uncamel = ' '.join(splitCamelCase(name).split('_')) if uncamel != name: try: firstName = HumanName(uncamel).first if len(firstName.split()) == 2: firstName = getFirstNameFromSplitName( firstName.split(), order) except: firstName = getFirstNameFromSplitName( uncamel.split(), order) if firstName == 'Mc': firstName = '' if len(firstName) == 1: firstName = '' return firstName.lower()
def initContact(contactId: str): assert contactId not in activeContacts activeContacts.add(contactId) contact = dir.getContact(contactId) contactsToEmails[contactId] = contact['email'] name = " ".join( filter(None, [ contact.get('title_before_name'), contact.get('first_name'), contact.get('last_name'), contact.get('title_after_name') ])) #name = name.translate(str.maketrans('', '', string.punctuation)) name = name.translate(str.maketrans('', '', '@')) name = HumanName(name.lower().strip()) name.capitalize() if re.search('^(\w\.)+$', name.first): name.first = name.first.upper() contactsToNames[contactId] = name.__str__()