def get_person_biography(self, personID): cont = self._mretrieve(imdbURL_person_main % personID + 'bio') d = {} spouses = _findBetween(cont, 'Spouse</h5>', ('</table>', '</dd>'), maxRes=1) if spouses: sl = [] for spouse in spouses[0].split('</tr>'): if spouse.count('</td>') > 1: spouse = spouse.replace('</td>', '::</td>', 1) spouse = _unHtml(spouse) spouse = spouse.replace(':: ', '::').strip() if spouse: sl.append(spouse) if sl: d['spouse'] = sl misc_sects = _findBetween(cont, '<h5>', '<br/>') misc_sects[:] = [x.split('</h5>') for x in misc_sects] misc_sects[:] = [x for x in misc_sects if len(x) == 2] for sect, data in misc_sects: sect = sect.lower().replace(':', '').strip() if d.has_key(sect): continue if sect == 'salary': sect = 'salary history' elif sect == 'spouse': continue elif sect == 'nickname': sect = 'nick names' elif sect == 'where are they now': sect = 'where now' elif sect == 'personal quotes': sect = 'quotes' data = data.replace('</p><p>', '::') data = data.replace('<br><br>', ' ') # for multi-paragraphs 'bio' data = data.replace('</td> <td valign="top">', '@@@@') data = data.replace('</td> </tr>', '::') data = _unHtml(data) data = [x.strip() for x in data.split('::')] data[:] = [x.replace('@@@@', '::') for x in data if x] if sect == 'height' and data: data = data[0] elif sect == 'birth name': data = canonicalName(data[0]) elif sect == 'date of birth': date, notes = date_and_notes(data[0]) if date: d['birth date'] = date if notes: d['birth notes'] = notes continue elif sect == 'date of death': date, notes = date_and_notes(data[0]) if date: d['death date'] = date if notes: d['death notes'] = notes continue elif sect == 'mini biography': ndata = [] for bio in data: byidx = bio.rfind('IMDb Mini Biography By') if byidx != -1: bio = u'%s::%s' % (bio[byidx + 23:].lstrip(), bio[:byidx].rstrip()) ndata.append(bio) data[:] = ndata d[sect] = data return {'data': d}
def do_br(self, attrs): if self._stop_here or not self._in_content: return # Inside li tags in filmography, some useless information after a br. self._seen_br = True self._cur_txt = self._cur_txt.strip() if not (self._in_post_section and self._section and self._cur_txt): self._in_post_section = False self._cur_txt = u'' return # We're at the end of a section. if self._section == 'birth date': date, notes = date_and_notes(self._cur_txt) if date: self._data['birth date'] = date if notes: self._data['birth notes'] = notes elif self._section == 'death date': date, notes = date_and_notes(self._cur_txt) if date: self._data['death date'] = date if notes: self._data['death notes'] = notes elif self._section == 'akas': sep = ' | ' if self.kind == 'character': sep = ' / ' akas = self._cur_txt.split(sep) if akas: self._data['akas'] = akas # XXX: not providing an 'else', we're deliberately ignoring # other sections. self._in_post_section = False if self.kind == 'character': # XXX: I'm not confident this is the best place for this... self._section = 'filmography' self._cur_txt = u''
def _add_items(self): # Add a new section in the biography. if self._in_content and self._sect_name and self._sect_data: sect = self._sect_name.strip().lower() # XXX: to get rid of the last colons and normalize section names. if sect[-1] == ':': sect = sect[:-1] if sect == 'salary': sect = 'salary history' elif sect == 'nickname': sect = 'nick names' elif sect == 'where are they now': sect = 'where now' elif sect == 'personal quotes': sect = 'quotes' elif sect == 'date of birth': sect = 'birth date' elif sect == 'date of death': sect = 'death date' data = self._sect_data.strip() d_split = data.split('::') d_split[:] = filter(None, [x.strip() for x in d_split]) # Do some transformation on some special cases. if sect == 'salary history': newdata = [] for j in d_split: j = filter(None, [x.strip() for x in j.split('@@@@')]) newdata.append('::'.join(j)) d_split[:] = newdata elif sect == 'nick names': d_split[:] = [normalizeName(x) for x in d_split] elif sect == 'birth name': d_split = canonicalName(d_split[0]) elif sect == 'height': d_split = d_split[0] elif sect == 'spouse': d_split[:] = [x.replace(' (', '::(', 1).replace(' ::', '::') for x in d_split] # Birth/death date are in both maindetails and bio pages; # it's safe to collect both of them. if sect == 'birth date': date, notes = date_and_notes(d_split[0]) if date: self._bio_data['birth date'] = date if notes: self._bio_data['birth notes'] = notes elif sect == 'death date': date, notes = date_and_notes(d_split[0]) if date: self._bio_data['death date'] = date if notes: self._bio_data['death notes'] = notes elif d_split: # Multiple items are added separately (e.g.: 'trivia' is # a list of strings). self._bio_data[sect] = d_split self._sect_name = u'' self._sect_data = u'' self._in_sect = 0
def _parseBiography(biol): """Parse the biographies.data file.""" res = {} bio = ' '.join(_parseList(biol, 'BG', mline=0)) bio = _parseBioBy(biol) if bio: res['mini biography'] = bio for x in biol: x4 = x[:4] x6 = x[:6] if x4 == 'DB: ': date, notes = date_and_notes(x[4:]) if date: res['birth date'] = date if notes: res['birth notes'] = notes #bdate = x.strip() #i = bdate.find(',') #if i != -1: # res['birth notes'] = bdate[i+1:].strip() # bdate = bdate[:i] #res['birth date'] = bdate[4:] elif x4 == 'DD: ': date, notes = date_and_notes(x[4:]) if date: res['death date'] = date if notes: res['death notes'] = notes #ddate = x.strip() #i = ddate.find(',') #if i != -1: # res['death notes'] = ddate[i+1:].strip() # ddate = ddate[:i] #res['death date'] = ddate[4:] elif x6 == 'SP: * ': res.setdefault('spouse', []).append(x[6:].strip()) elif x4 == 'RN: ': n = x[4:].strip() if not n: continue rn = build_name(analyze_name(n, canonical=1), canonical=1) res['birth name'] = rn elif x6 == 'AT: * ': res.setdefault('articles', []).append(x[6:].strip()) elif x4 == 'HT: ': res['height'] = x[4:].strip() elif x6 == 'PT: * ': res.setdefault('pictorials', []).append(x[6:].strip()) elif x6 == 'CV: * ': res.setdefault('magazine covers', []).append(x[6:].strip()) elif x4 == 'NK: ': res.setdefault('nick names', []).append(normalizeName(x[4:])) elif x6 == 'PI: * ': res.setdefault('portrayed', []).append(x[6:].strip()) elif x6 == 'SA: * ': sal = x[6:].strip().replace(' -> ', '::') res.setdefault('salary history', []).append(sal) trl = _parseList(biol, 'TR') if trl: res['trivia'] = trl quotes = _parseList(biol, 'QU') if quotes: res['quotes'] = quotes otherworks = _parseList(biol, 'OW') if otherworks: res['other works'] = otherworks books = _parseList(biol, 'BO') if books: res['books'] = books agent = _parseList(biol, 'AG') if agent: res['agent address'] = agent wherenow = _parseList(biol, 'WN') if wherenow: res['where now'] = wherenow[0] biomovies = _parseList(biol, 'BT') if biomovies: res['biographical movies'] = biomovies guestapp = _buildGuests([x[6:].strip() for x in biol if x[:6] == 'GA: * ']) if guestapp: res['notable tv guest appearances'] = guestapp tm = _parseList(biol, 'TM') if tm: res['trademarks'] = tm interv = _parseList(biol, 'IT') if interv: res['interviews'] = interv return res
def get_person_biography(self, personID): cont = self._mretrieve(self.urls['person_main'] % personID + 'bio') d = {} spouses = _findBetween(cont, 'Spouse</h5>', ('</table>', '</dd>'), maxRes=1) if spouses: sl = [] for spouse in spouses[0].split('</tr>'): if spouse.count('</td>') > 1: spouse = spouse.replace('</td>', '::</td>', 1) spouse = _unHtml(spouse) spouse = spouse.replace(':: ', '::').strip() if spouse: sl.append(spouse) if sl: d['spouse'] = sl nnames = _findBetween(cont, '<h5>Nickname</h5>', ('<br/> <br/>','<h5>'), maxRes=1) if nnames: nnames = nnames[0] if nnames: nnames = [x.strip().replace(' (', '::(', 1) for x in nnames.split('<br/>')] if nnames: d['nick names'] = nnames misc_sects = _findBetween(cont, '<h5>', '<br/>') misc_sects[:] = [x.split('</h5>') for x in misc_sects] misc_sects[:] = [x for x in misc_sects if len(x) == 2] for sect, data in misc_sects: sect = sect.lower().replace(':', '').strip() if d.has_key(sect) and sect != 'mini biography': continue elif sect in ('spouse', 'nickname'): continue if sect == 'salary': sect = 'salary history' elif sect == 'where are they now': sect = 'where now' elif sect == 'personal quotes': sect = 'quotes' data = data.replace('</p><p>', '::') data = data.replace('<br><br>', ' ') # for multi-paragraphs 'bio' data = data.replace('</td> <td valign="top">', '@@@@') data = data.replace('</td> </tr>', '::') data = _unHtml(data) data = [x.strip() for x in data.split('::')] data[:] = [x.replace('@@@@', '::') for x in data if x] if sect == 'height' and data: data = data[0] elif sect == 'birth name': data = canonicalName(data[0]) elif sect == 'date of birth': date, notes = date_and_notes(data[0]) if date: d['birth date'] = date if notes: d['birth notes'] = notes continue elif sect == 'date of death': date, notes = date_and_notes(data[0]) if date: d['death date'] = date if notes: d['death notes'] = notes continue elif sect == 'mini biography': ndata = [] for bio in data: byidx = bio.rfind('IMDb Mini Biography By') if byidx != -1: bioAuth = bio[:byidx].rstrip() else: bioAuth = 'Anonymous' bio = u'%s::%s' % (bioAuth, bio[byidx+23:].lstrip()) ndata.append(bio) data[:] = ndata if 'mini biography' in d: d['mini biography'].append(ndata[0]) continue d[sect] = data return {'data': d}
def get_person_main(self, personID, _parseChr=False): if not _parseChr: url = imdbURL_person_main % personID + 'maindetails' else: url = imdbURL_character_main % personID s = self._mretrieve(url) r = {} name = _findBetween(s, '<title>', '</title>', maxRes=1) if not name: if _parseChr: w = 'characterID' else: w = 'personID' raise IMDbDataAccessError, 'unable to get %s "%s"' % (w, personID) name = _unHtml(name[0]) if _parseChr: name = name.replace('(Character)', '').strip() name = name.replace('- Filmography by type', '').strip() else: name = name.replace('- Filmography by', '').strip() r = analyze_name(name, canonical=not _parseChr) for dKind in ('birth', 'death'): date = _findBetween(s, '<h5>Date of %s:</h5>' % dKind.capitalize(), ('<a class', '</div>', '<br/><br/>'), maxRes=1) if date: date = _unHtml(date[0]) if date: date, notes = date_and_notes(date) if date: r['%s date' % dKind] = date if notes: r['%s notes' % dKind] = notes akas = _findBetween(s, 'Alternate Names:</h5>', ('</div>', '<br/><br/>'), maxRes=1) if akas: akas = akas[0] if akas.find(' | ') != -1: akas = _unHtml(akas).split(' | ') else: akas = _unHtml(akas).split(' / ') if akas: r['akas'] = akas hs = _findBetween(s, 'name="headshot"', '</a>', maxRes=1) if hs: hs[:] = _findBetween(hs[0], 'src="', '"', maxRes=1) if hs: r['headshot'] = hs[0] # Build a list of tuples such [('hrefLink', 'section name')] workkind = _findBetween(s, '<div class="strip jump">', '</div>', maxRes=1) if workkind: workkind[:] = _findBetween(workkind[0], 'href="#', '</a>') else: # Assume there's only one section and/or there are no # section links, for some reason. workkind[:] = _findBetween(s, '<h5><a name=', '</a></h5>') workkind[:] = [x.lstrip('"').rstrip(':').lower() for x in workkind] ws = [] for work in workkind: wsplit = work.split('">', 1) if len(wsplit) == 2: sect = wsplit[0] if '"' in sect: sect = sect[:sect.find('"')] ws.append((sect, wsplit[1].lower())) # XXX: I think "guest appearances" are gone. if s.find('<a href="#guest-appearances"') != -1: ws.append(('guest-appearances', 'notable tv guest appearances')) #if _parseChr: # ws.append(('filmography', 'filmography')) for sect, sectName in ws: raws = u'' # Everything between the current section link and the end # of the <ol> tag. if _parseChr and sect == 'filmography': inisect = s.find('<div class="filmo">') else: inisect = s.find('<a name="%s' % sect) if inisect != -1: endsect = s[inisect:].find('</ol>') if endsect != -1: raws = s[inisect:inisect + endsect] if not raws: continue mlist = _findBetween(raws, '<li>', ('</li>', '<br>', '<br/>')) for m in mlist: # For every movie in the current section. movieID = re_imdbID.findall(m) if not movieID: self._mobile_logger.debug('no movieID in %s', m) continue if not _parseChr: chrIndx = m.find(' .... ') else: chrIndx = m.find(' Played by ') chids = [] if chrIndx != -1: chrtxt = m[chrIndx + 6:] if _parseChr: chrtxt = chrtxt[5:] for ch in chrtxt.split(' / '): chid = re_imdbID.findall(ch) if not chid: chids.append(None) else: chids.append(chid[-1]) if not chids: chids = None elif len(chids) == 1: chids = chids[0] movieID = str(movieID[0]) # Search the status. stidx = m.find('<i>') status = u'' if stidx != -1: stendidx = m.rfind('</i>') if stendidx != -1: status = _unHtml(m[stidx + 3:stendidx]) m = m.replace(m[stidx + 3:stendidx], '') m = _unHtml(m) if not m: self._mobile_logger.warn('no title fo rmovieID %s', movieID) continue movie = build_movie(m, movieID=movieID, status=status, roleID=chids, modFunct=self._defModFunct, accessSystem=self.accessSystem, _parsingCharacter=_parseChr) r.setdefault(sectName, []).append(movie) # If available, take the always correct name from a form. itag = _getTagsWith(s, 'NAME="primary"', maxRes=1) if not itag: itag = _getTagsWith(s, 'name="primary"', maxRes=1) if itag: vtag = _findBetween(itag[0], 'VALUE="', ('"', '>'), maxRes=1) if not vtag: vtag = _findBetween(itag[0], 'value="', ('"', '>'), maxRes=1) if vtag: try: vtag = unquote(str(vtag[0])) vtag = unicode(vtag, 'latin_1') r.update(analyze_name(vtag)) except UnicodeEncodeError: pass return {'data': r, 'info sets': ('main', 'filmography')}
def get_person_biography(self, personID): cont = self._mretrieve(self.urls["person_main"] % personID + "bio") d = {} spouses = _findBetween(cont, "Spouse</h5>", ("</table>", "</dd>"), maxRes=1) if spouses: sl = [] for spouse in spouses[0].split("</tr>"): if spouse.count("</td>") > 1: spouse = spouse.replace("</td>", "::</td>", 1) spouse = _unHtml(spouse) spouse = spouse.replace(":: ", "::").strip() if spouse: sl.append(spouse) if sl: d["spouse"] = sl nnames = _findBetween(cont, "<h5>Nickname</h5>", ("<br/> <br/>", "<h5>"), maxRes=1) if nnames: nnames = nnames[0] if nnames: nnames = [x.strip().replace(" (", "::(", 1) for x in nnames.split("<br/>")] if nnames: d["nick names"] = nnames misc_sects = _findBetween(cont, "<h5>", "<br/>") misc_sects[:] = [x.split("</h5>") for x in misc_sects] misc_sects[:] = [x for x in misc_sects if len(x) == 2] for sect, data in misc_sects: sect = sect.lower().replace(":", "").strip() if d.has_key(sect) and sect != "mini biography": continue elif sect in ("spouse", "nickname"): continue if sect == "salary": sect = "salary history" elif sect == "where are they now": sect = "where now" elif sect == "personal quotes": sect = "quotes" data = data.replace("</p><p>", "::") data = data.replace("<br><br>", " ") # for multi-paragraphs 'bio' data = data.replace('</td> <td valign="top">', "@@@@") data = data.replace("</td> </tr>", "::") data = _unHtml(data) data = [x.strip() for x in data.split("::")] data[:] = [x.replace("@@@@", "::") for x in data if x] if sect == "height" and data: data = data[0] elif sect == "birth name": data = canonicalName(data[0]) elif sect == "date of birth": date, notes = date_and_notes(data[0]) if date: d["birth date"] = date if notes: d["birth notes"] = notes continue elif sect == "date of death": date, notes = date_and_notes(data[0]) if date: d["death date"] = date if notes: d["death notes"] = notes continue elif sect == "mini biography": ndata = [] for bio in data: byidx = bio.rfind("IMDb Mini Biography By") if byidx != -1: bioAuth = bio[:byidx].rstrip() else: bioAuth = "Anonymous" bio = u"%s::%s" % (bioAuth, bio[byidx + 23 :].lstrip()) ndata.append(bio) data[:] = ndata if "mini biography" in d: d["mini biography"].append(ndata[0]) continue d[sect] = data return {"data": d}
def get_person_main(self, personID, _parseChr=False): if not _parseChr: url = imdbURL_person_main % personID + 'maindetails' else: url = imdbURL_character_main % personID s = self._mretrieve(url) r = {} name = _findBetween(s, '<title>', '</title>', maxRes=1) if not name: if _parseChr: w = 'characterID' else: w = 'personID' raise IMDbDataAccessError, 'unable to get %s "%s"' % (w, personID) name = _unHtml(name[0]) if _parseChr: name = name.replace('(Character)', '').strip() r = analyze_name(name, canonical=not _parseChr) for dKind in ('birth', 'death'): date = _findBetween(s, '<h5>Date of %s:</h5>' % dKind.capitalize(), ('<a class', '</div>', '<br/><br/>'), maxRes=1) if date: date = _unHtml(date[0]) if date: date, notes = date_and_notes(date) if date: r['%s date' % dKind] = date if notes: r['%s notes' % dKind] = notes akas = _findBetween(s, 'Alternate Names:</h5>', ('</div>', '<br/><br/>'), maxRes=1) if akas: akas = akas[0] if akas.find(' | ') != -1: akas = _unHtml(akas).split(' | ') else: akas = _unHtml(akas).split(' / ') if akas: r['akas'] = akas hs = _findBetween(s, 'name="headshot"', '</a>', maxRes=1) if hs: hs[:] = _findBetween(hs[0], 'src="', '"', maxRes=1) if hs: r['headshot'] = hs[0] # Build a list of tuples such [('hrefLink', 'section name')] workkind = _findBetween(s, '<div class="strip jump">', '</div>', maxRes=1) if workkind: workkind[:] = _findBetween(workkind[0], 'href="#', '</a>') else: # Assume there's only one section and/or there are no # section links, for some reason. workkind[:] = _findBetween(s, '<h5><a name=', '</a></h5>') workkind[:] = [x.lstrip('"').rstrip(':').lower() for x in workkind] ws = [] for work in workkind: wsplit = work.split('">', 1) if len(wsplit) == 2: ws.append((wsplit[0], wsplit[1].lower())) # XXX: I think "guest appearances" are gone. if s.find('<a href="#guest-appearances"') != -1: ws.append(('guest-appearances', 'notable tv guest appearances')) if _parseChr: ws.append(('filmography', 'filmography')) for sect, sectName in ws: raws = u'' # Everything between the current section link and the end # of the <ol> tag. if _parseChr and sect == 'filmography': inisect = s.find('<div class="filmo">') else: inisect = s.find('<a name="%s' % sect) if inisect != -1: endsect = s[inisect:].find('</ol>') if endsect != -1: raws = s[inisect:inisect+endsect] if not raws: continue mlist = _findBetween(raws, '<li>', ('</li>', '<br>', '<br/>')) for m in mlist: # For every movie in the current section. movieID = re_imdbID.findall(m) if not movieID: continue if not _parseChr: chrIndx = m.find(' .... ') else: chrIndx = m.find(' Played by ') chids = [] if chrIndx != -1: chrtxt = m[chrIndx+6:] if _parseChr: chrtxt = chrtxt[5:] for ch in chrtxt.split(' / '): chid = re_imdbID.findall(ch) if not chid: chids.append(None) else: chids.append(chid[-1]) if not chids: chids = None elif len(chids) == 1: chids = chids[0] movieID = str(movieID[0]) # Search the status. stidx = m.find('<i>') status = u'' if stidx != -1: stendidx = m.rfind('</i>') if stendidx != -1: status = _unHtml(m[stidx+3:stendidx]) m = m.replace(m[stidx+3:stendidx], '') m = _unHtml(m) if not m: continue movie = build_movie(m, movieID=movieID, status=status, roleID=chids, modFunct=self._defModFunct, accessSystem=self.accessSystem, _parsingCharacter=_parseChr) r.setdefault(sectName, []).append(movie) # If available, take the always correct name from a form. itag = _getTagsWith(s, 'NAME="primary"', maxRes=1) if not itag: itag = _getTagsWith(s, 'name="primary"', maxRes=1) if itag: vtag = _findBetween(itag[0], 'VALUE="', ('"', '>'), maxRes=1) if not vtag: vtag = _findBetween(itag[0], 'value="', ('"', '>'), maxRes=1) if vtag: try: vtag = unquote(str(vtag[0])) vtag = unicode(vtag, 'latin_1') r.update(analyze_name(vtag, canonical=0)) except UnicodeEncodeError: pass photo = _findBetween(s, '<div class="photo">', '</div>', maxRes=1) image_url = '' if (len(photo)>0): img = _findBetween(photo[0], '<img', '/a>', maxRes=1) if (len(img)>0): image_url = _findBetween(img[0],' src="', '"', maxRes=1)[0] r['image_url'] = image_url return {'data': r, 'info sets': ('main', 'filmography')}
def _add_items(self): # Add a new section in the biography. if self._in_content and self._sect_name and self._sect_data: sect = self._sect_name.strip().lower() # XXX: to get rid of the last colons and normalize section names. if sect[-1] == ':': sect = sect[:-1] if sect == 'salary': sect = 'salary history' elif sect == 'nickname': sect = 'nick names' elif sect == 'where are they now': sect = 'where now' elif sect == 'personal quotes': sect = 'quotes' elif sect == 'date of birth': sect = 'birth date' elif sect == 'date of death': sect = 'death date' data = self._sect_data.strip() d_split = data.split('::') d_split[:] = filter(None, [x.strip() for x in d_split]) # Do some transformation on some special cases. if sect == 'salary history': newdata = [] for j in d_split: j = filter(None, [x.strip() for x in j.split('@@@@')]) newdata.append('::'.join(j)) d_split[:] = newdata elif sect == 'nick names': d_split[:] = [normalizeName(x) for x in d_split] elif sect == 'birth name': d_split = canonicalName(d_split[0]) elif sect == 'height': d_split = d_split[0] elif sect == 'spouse': d_split[:] = [ x.replace(' (', '::(', 1).replace(' ::', '::') for x in d_split ] # Birth/death date are in both maindetails and bio pages; # it's safe to collect both of them. if sect == 'birth date': date, notes = date_and_notes(d_split[0]) if date: self._bio_data['birth date'] = date if notes: self._bio_data['birth notes'] = notes elif sect == 'death date': date, notes = date_and_notes(d_split[0]) if date: self._bio_data['death date'] = date if notes: self._bio_data['death notes'] = notes elif d_split: # Multiple items are added separately (e.g.: 'trivia' is # a list of strings). self._bio_data[sect] = d_split self._sect_name = u'' self._sect_data = u'' self._in_sect = 0
def _parseBiography(biol): """Parse the biographies.data file.""" res = {} bio = ' '.join(_parseList(biol, 'BG', mline=0)) bio = _parseBioBy(biol) if bio: res['mini biography'] = bio for x in biol: x4 = x[:4] x6 = x[:6] if x4 == 'DB: ': date, notes = date_and_notes(x[4:]) if date: res['birth date'] = date if notes: res['birth notes'] = notes elif x4 == 'DD: ': date, notes = date_and_notes(x[4:]) if date: res['death date'] = date if notes: res['death notes'] = notes elif x6 == 'SP: * ': res.setdefault('spouse', []).append(x[6:].strip()) elif x4 == 'RN: ': n = x[4:].strip() if not n: continue rn = build_name(analyze_name(n, canonical=1), canonical=1) res['birth name'] = rn elif x6 == 'AT: * ': res.setdefault('articles', []).append(x[6:].strip()) elif x4 == 'HT: ': res['height'] = x[4:].strip() elif x6 == 'PT: * ': res.setdefault('pictorials', []).append(x[6:].strip()) elif x6 == 'CV: * ': res.setdefault('magazine covers', []).append(x[6:].strip()) elif x4 == 'NK: ': res.setdefault('nick names', []).append(normalizeName(x[4:])) elif x6 == 'PI: * ': res.setdefault('portrayed', []).append(x[6:].strip()) elif x6 == 'SA: * ': sal = x[6:].strip().replace(' -> ', '::') res.setdefault('salary history', []).append(sal) trl = _parseList(biol, 'TR') if trl: res['trivia'] = trl quotes = _parseList(biol, 'QU') if quotes: res['quotes'] = quotes otherworks = _parseList(biol, 'OW') if otherworks: res['other works'] = otherworks books = _parseList(biol, 'BO') if books: res['books'] = books agent = _parseList(biol, 'AG') if agent: res['agent address'] = agent wherenow = _parseList(biol, 'WN') if wherenow: res['where now'] = wherenow[0] biomovies = _parseList(biol, 'BT') if biomovies: res['biographical movies'] = biomovies guestapp = _buildGuests([x[6:].strip() for x in biol if x[:6] == 'GA: * ']) if guestapp: res['notable tv guest appearances'] = guestapp tm = _parseList(biol, 'TM') if tm: res['trademarks'] = tm interv = _parseList(biol, 'IT') if interv: res['interviews'] = interv return res