def build_names(): cnt = 0 for name,hints,nicknames in GivenNames: if type(name) != tuple: name = (name,) name = tuple([names.normalize(n) for n in name]) if type(hints) != tuple: hints = (hints,) for n in name: for h in hints: if not hint.is_hint(h): print((name,h,' is not a hint')) else: cnt += 1 if n in GivenNameDict: GivenNameDict[n].append(h) else: GivenNameDict[n] = [h] if type(nicknames) != tuple: nicknames = (nicknames,) for nick in nicknames: nick = names.normalize(nick) if nick in Nicknames: Nicknames[nick].append(list(name)) else: Nicknames[nick] = list(name) return cnt
def build_names(): cnt = 0 for name, hints, nicknames in GivenNames: if type(name) != tuple: name = (name, ) name = tuple([names.normalize(n) for n in name]) if type(hints) != tuple: hints = (hints, ) for n in name: for h in hints: if not hint.is_hint(h): print((name, h, ' is not a hint')) else: cnt += 1 if n in GivenNameDict: GivenNameDict[n].append(h) else: GivenNameDict[n] = [h] if type(nicknames) != tuple: nicknames = (nicknames, ) for nick in nicknames: nick = names.normalize(nick) if nick in Nicknames: Nicknames[nick].append(list(name)) else: Nicknames[nick] = list(name) return cnt
def lookup(conn, surnames): for name in surnames: # handle hyphenated surnames for subname in name.split('-'): norm = names.normalize(subname) e = ethnicity(conn, norm) o = origin(conn, norm) print(('%-15s %5.2f%% %-6s %s' % (subname,e[1],e[0],' '.join(o))))
def lookup(conn, surnames): for name in surnames: # handle hyphenated surnames for subname in name.split('-'): norm = names.normalize(subname) e = ethnicity(conn, norm) o = origin(conn, norm) print( ('%-15s %5.2f%% %-6s %s' % (subname, e[1], e[0], ' '.join(o))))
def yob_generator(): files = glob.glob('./' + dir + '/yob*.txt') for filename in files: year = re.search('yob(\d+)\.txt', filename).group(1) if int(year) >= 1900: for line in open(filename, 'r'): (name,gender,total) = line.strip().split(",") norm = names.normalize(name) yield (year, norm, gender, total)
def classify(name): name = names.normalize(name) # get list of all names to look up name = [name] + (Nicknames[name] if name in Nicknames else []) # get list of list of hints hint = [GivenNameDict[n] if n in GivenNameDict else [] for n in name] # merge lists hint = list(itertools.chain(*hint)) # unique-ify list FIXME: no good; order matters #hint = list(set(hint)) return hint
def lookup(conn, givennames): for name in givennames: norm = names.normalize(name) g = gender(conn, norm) pct, span = 70, 20 plo, phi = birthspan_pct(conn, norm, pct) (slo, shi), spct = birthspan(conn, norm, span) hints = givenname_origin.classify(norm) if g['F'] == 0. and g['M'] == 0. and spct == 0 and hints == []: # name not found print('%-15s %3.0f%%' % (name, max(g.values()) * 100.)) else: print('%-15s %3.0f%%%s %3.0f%%@%.0fyr=%d-%d %.0f%%=%d-%d %s' % (name, max(g.values()) * 100., 'F' if g['F'] >= g['M'] else 'M', spct, span, slo, shi, pct, plo, phi, hints))
def lookup(conn, givennames): for name in givennames: norm = names.normalize(name) g = gender(conn, norm) pct, span = 70, 20 plo,phi = birthspan_pct(conn, norm, pct) (slo,shi),spct = birthspan(conn, norm, span) hints = givenname_origin.classify(norm) if g['F'] == 0. and g['M'] == 0. and spct == 0 and hints == []: # name not found print('%-15s %3.0f%%' % (name, max(g.values())*100.)) else: print('%-15s %3.0f%%%s %3.0f%%@%.0fyr=%d-%d %.0f%%=%d-%d %s' % (name, max(g.values())*100., 'F' if g['F'] >= g['M'] else 'M', spct, span, slo, shi, pct, plo, phi, hints))