示例#1
0
 def postprocess_data(self, data):
     result = {}
     for item in ('names refs', 'titles refs', 'characters refs'):
         result[item] = {}
         for k, v in data.get(item, []):
             k = k.strip()
             v = v.strip()
             if not (k and v):
                 continue
             if not v.endswith('/'): continue
             imdbID = analyze_imdbid(v)
             if item == 'names refs':
                 obj = Person(personID=imdbID,
                              name=k,
                              accessSystem=self._as,
                              modFunct=self._modFunct)
             elif item == 'titles refs':
                 obj = Movie(movieID=imdbID,
                             title=k,
                             accessSystem=self._as,
                             modFunct=self._modFunct)
             else:
                 obj = Character(characterID=imdbID,
                                 name=k,
                                 accessSystem=self._as,
                                 modFunct=self._modFunct)
             # XXX: companies aren't handled: are they ever found in text,
             #      as links to their page?
             result[item][k] = obj
     return result
示例#2
0
 def _add_ref(self, kind):
     """Add a reference entry to the names and titles dictionaries."""
     if kind == 'tt':
         if self._titleRefCID and self._titleCN:
             if not self._titlesRefs.has_key(self._titleCN):
                 try:
                     movie = Movie(movieID=str(self._titleRefCID),
                                   title=self._titleCN,
                                   accessSystem=self._as,
                                   modFunct=self._modFunct)
                     self._titlesRefs[self._titleCN] = movie
                 except IMDbParserError:
                     pass
             self._titleRefCID = u''
             self._titleCN = u''
             self._inTTRef = 0
             self._inLinkTTRef = 0
     elif kind == 'nm' and self._nameRefCID and self._nameCN:
         # XXX: 'Neo' and 'Keanu Reeves' are two separated
         #      entry in the dictionary.  Check the ID value instead
         #      of the key?
         if not self._namesRefs.has_key(self._nameCN):
             try:
                 person = Person(name=self._nameCN,
                                 personID=str(self._nameRefCID),
                                 accessSystem=self._as,
                                 modFunct=self._modFunct)
                 self._namesRefs[self._nameCN] = person
             except IMDbParserError:
                 pass
         self._nameRefCID = u''
         self._nameCN = u''
         self._inNMRef = 0
     elif kind == 'ch' and self._characterRefCID and self._characterCN:
         if not self._charactersRefs.has_key(self._characterCN):
             try:
                 character = Character(name=self._characterCN,
                                       characterID=str(
                                           self._characterRefCID),
                                       accessSystem='http')
                 self._charactersRefs[self._characterCN] = character
             except IMDbParserError:
                 pass
         self._characterRefCID = u''
         self._characterCN = u''
         self._inCHRef = 0