def extractFirstName(name, order): '''Split on dots''' name = ' '.join(name.split('.')) '''Split on - ''' name = ' '.join(name.split('-')) '''Replace numbers by whitespace''' oldname = name name = re.sub("\d+", "", name) if not len(name): name = re.sub("\d+", "_", oldname) oldname = name '''Replace ? by whitespace''' name = re.sub("\?", "", name) if not len(name): name = re.sub("\?", "_", oldname) name = ' '.join(name.split('_')) '''Use the Python name parser''' try: firstName = getFirstNameFromHumanName(HumanName(name), order) except: firstName = getFirstNameFromSplitName(name.split(), order) '''If fail, use heuristics''' if firstName.strip() == name.strip(): '''firstName('Ben Voigt') = 'Ben Voigt'!!!''' if len(name.split()) == 2: firstName = getFirstNameFromSplitName(name.split(), order) else: '''Try CamelCase''' uncamel = ' '.join(splitCamelCase(name).split('_')) if uncamel != name: try: firstName = HumanName(uncamel).first if len(firstName.split()) == 2: firstName = getFirstNameFromSplitName(firstName.split(), order) except: firstName = getFirstNameFromSplitName(uncamel.split(), order) if firstName == 'Mc': firstName = '' if len(firstName) == 1: firstName = '' return firstName.lower()
def GetNameLink(name): # Finds and returns formatted name and wikilinks for given name. name = HumanName(name) name.capitalize(force=True) name = str(name) soup = GetSoup("https://en.wikipedia.org/wiki/" + name.replace(" ", "_"), False).text wikitext = name tennis = [ "International Tennis Federation", "Prize money", "Grand Slam", "tennis career", "Wikipedia does not have", "may refer to", "WTA", "ITF", "ATP" ] pipe = False if soup != None: if any([f in soup for f in tennis ]): # player article exists, or no article exists if "Redirected from" in soup: soup = GetSoup(soup, True) title = str(soup.title.string).replace(" - Wikipedia", "").strip() wikitext = title pipe = True # if name is redirect, pipes wikilink to avoid anachronist names, e.g. using "Margaret Court" instead of "Margaret Smith" before she married. else: # article exists for name but for different person wikitext = name + " (tennis)" pipe = True wikilink = "[[" + wikitext + ("|" + name if pipe else "") + "]]" split_name = name.split(" ") abbr_name = "-".join( f[0] for f in split_name[0].split("-")) + " " + " ".join( split_name[1:] ) # reduce name to first name initials + last name, e.g. "J-L Struff" abbr_wikilink = "[[" + wikitext + "|" + abbr_name + "]]" return [name, wikilink, abbr_wikilink]
def extractFirstName(name, order): '''Split on dots''' name = ' '.join(name.split('.')) '''Split on - ''' name = ' '.join(name.split('-')) '''Replace numbers by whitespace''' oldname = name name = re.sub("\d+", "", name) if not len(name): name = re.sub("\d+", "_", oldname) oldname = name '''Replace ? by whitespace''' name = re.sub("\?", "", name) if not len(name): name = re.sub("\?", "_", oldname) name = ' '.join(name.split('_')) '''Use the Python name parser''' try: firstName = getFirstNameFromHumanName(HumanName(name), order) except: firstName = getFirstNameFromSplitName(name.split(), order) '''If fail, use heuristics''' if firstName.strip() == name.strip(): '''firstName('Ben Voigt') = 'Ben Voigt'!!!''' if len(name.split()) == 2: firstName = getFirstNameFromSplitName(name.split(), order) else: '''Try CamelCase''' uncamel = ' '.join(splitCamelCase(name).split('_')) if uncamel != name: try: firstName = HumanName(uncamel).first if len(firstName.split()) == 2: firstName = getFirstNameFromSplitName( firstName.split(), order) except: firstName = getFirstNameFromSplitName( uncamel.split(), order) if firstName == 'Mc': firstName = '' if len(firstName) == 1: firstName = '' return firstName.lower()
def get_last_name(author): author = HumanName(author.split("and")[0]) return author.last