def postprocess_data(self, data): result = {} for item in ('names refs', 'titles refs', 'characters refs'): result[item] = {} for k, v in data.get(item, []): k = k.strip() v = v.strip() if not (k and v): continue if not v.endswith('/'): continue imdbID = analyze_imdbid(v) if item == 'names refs': obj = Person(personID=imdbID, name=k, accessSystem=self._as, modFunct=self._modFunct) elif item == 'titles refs': obj = Movie(movieID=imdbID, title=k, accessSystem=self._as, modFunct=self._modFunct) else: obj = Character(characterID=imdbID, name=k, accessSystem=self._as, modFunct=self._modFunct) # XXX: companies aren't handled: are they ever found in text, # as links to their page? result[item][k] = obj return result
def _add_ref(self, kind): """Add a reference entry to the names and titles dictionaries.""" if kind == 'tt': if self._titleRefCID and self._titleCN: if not self._titlesRefs.has_key(self._titleCN): try: movie = Movie(movieID=str(self._titleRefCID), title=self._titleCN, accessSystem=self._as, modFunct=self._modFunct) self._titlesRefs[self._titleCN] = movie except IMDbParserError: pass self._titleRefCID = u'' self._titleCN = u'' self._inTTRef = 0 self._inLinkTTRef = 0 elif kind == 'nm' and self._nameRefCID and self._nameCN: # XXX: 'Neo' and 'Keanu Reeves' are two separated # entry in the dictionary. Check the ID value instead # of the key? if not self._namesRefs.has_key(self._nameCN): try: person = Person(name=self._nameCN, personID=str(self._nameRefCID), accessSystem=self._as, modFunct=self._modFunct) self._namesRefs[self._nameCN] = person except IMDbParserError: pass self._nameRefCID = u'' self._nameCN = u'' self._inNMRef = 0 elif kind == 'ch' and self._characterRefCID and self._characterCN: if not self._charactersRefs.has_key(self._characterCN): try: character = Character(name=self._characterCN, characterID=str( self._characterRefCID), accessSystem='http') self._charactersRefs[self._characterCN] = character except IMDbParserError: pass self._characterRefCID = u'' self._characterCN = u'' self._inCHRef = 0