def getQuotes(movieID, dataF, indexF): """Return a list of quotes.""" index = getFullIndex(indexF, movieID) qtL = [] if index is not None: try: qtf = open(dataF, 'rt') except IOError, e: raise IMDbDataAccessError, str(e) fsize = stat(dataF)[ST_SIZE] qtf.seek(index) qtf.readline() qttl = [] while 1: line = latin2utf(qtf.readline()) line = line.rstrip() if line: if line.startswith(' ') and qttl[-1] and \ not qttl[-1].endswith('::'): line = line.lstrip() if line: qttl[-1] += ' %s' % line elif line.startswith('# '): if qttl: qtL.append('::'.join(qttl)) break else: line = line.lstrip() if line: qttl.append(line) elif qttl: qtL.append('::'.join(qttl)) qttl[:] = [] elif qtf.tell() > fsize: break qtf.close()
def _readCompanyNamsKeyFile(keyFile): """Iterate over the given file, returning tuples suited for the common.locsql.scan_company_names function.""" try: kf = open(keyFile, 'r') except IOError, e: raise IMDbDataAccessError, str(e) for line in kf: ls = line.split('|') n = ls[0] if not n: continue yield (long(ls[1], 16), latin2utf(n)) kf.close()
def _readNamesKeyFile(keyFile): """Iterate over the given file, returning tuples suited for the common.locsql.scan_names function.""" try: kf = open(keyFile, 'r') except IOError, e: raise IMDbDataAccessError, str(e) for line in kf: ls = line.split('|') if not ls[0]: continue named = analyze_name(latin2utf(ls[0])) yield (long(ls[1], 16), named) kf.close()
def _scan_titles(keyFile, title1, title2, title3, results=0): """Scan the given file, using the cutils.search_title C function, for title variations.""" title1, title2, title3 = [x.encode('latin_1', 'replace') for x in title1, title2, title3] st = search_title(keyFile, title1, title2, title3, results) res = [] for x in st: tmpd = analyze_title(latin2utf(x[2])) res.append((x[0], (x[1], tmpd))) return res
def _scan_titles(keyFile, title1, title2, title3, results=0, _only_episodes=0): """Scan the given file, using the cutils.search_title C function, for title variations.""" title1, title2, title3 = [x.encode('latin_1', 'replace') for x in title1, title2, title3] st = search_title(keyFile, title1, title2, title3, results, _only_episodes) res = [] for x in st: tmpd = analyze_title(latin2utf(x[2])) res.append((x[0], (x[1], tmpd))) return res
def _readTitlesKeyFile(keyFile, searchingEpisode=0): """Iterate over the given file, returning tuples suited for the common.locsql.scan_titles function.""" try: kf = open(keyFile, 'r') except IOError, e: raise IMDbDataAccessError, str(e) for line in kf: ls = line.split('|') t = ls[0] if not t: continue if searchingEpisode: if t[-1] != '}': continue elif t[-1] == '}': continue titled = analyze_title(latin2utf(t)) yield (long(ls[1], 16), titled) kf.close()
def getTaglines(movieID, indexF, dataF): """Return a list of taglines.""" index = getFullIndex(indexF, movieID) tgL = [] if index is not None: try: tgf = open(dataF, 'rt') except IOError, e: raise IMDbDataAccessError, str(e) tgf.seek(index) tgf.readline() while 1: line = latin2utf(tgf.readline().strip()) if not line: break tgL.append(line) tgf.close()
def parseMinusList(movieID, dataF, indexF): """Parser for lists like goofs.data, crazy-credits.data and so on.""" offset = getFullIndex(indexF, movieID) if offset is None: return [] try: fdata = open(dataF, 'rt') except IOError, e: raise IMDbDataAccessError, str(e) fdata.seek(offset) fsize = stat(dataF)[ST_SIZE] rlist = [] tmplist = [] line = fdata.readline() while line: line = latin2utf(fdata.readline()) if line.startswith('# '): if tmplist: rlist.append(' '.join(tmplist)) break elif line.startswith('- '): if tmplist: rlist.append(' '.join(tmplist)) l = line[2:].strip() if l: tmplist[:] = [l] else: tmplist[:] = [] else: l = line.strip() if l: tmplist.append(l) elif fdata.tell() > fsize: if tmplist: rlist.append(' '.join(tmplist)) break fdata.close()
return None idx = convBin(piddata, 'fulloffset') try: dfptr = open(compDF, 'rb') except IOError, e: import warnings warnings.warn('Unable to access companies information, ' 'please run the companies4local.py script: %s' % e) return None dfptr.seek(idx) # Check companyID. chID = dfptr.read(3) if companyID != convBin(chID, 'companyID'): return None length = convBin(dfptr.read(2), 'longlength') name = latin2utf(dfptr.read(length)) dfptr.close() return name def getCompanyFilmography(companyID, compIF, compDF, movieIF, movieKF): """Build a filmography list for the specified companyID.""" try: ifptr = open(compIF, 'rb') except IOError, e: import warnings warnings.warn('Unable to access companies information, ' 'please run the companies4local.py script: %s' % e) return None ifptr.seek(4L * companyID) piddata = ifptr.read(4)
# latin_1 encoded strings. name1, name2, name3 = [x.encode('latin_1', 'replace') for x in name1, name2, name3] try: sn = search_name(keyFile, name1, name2, name3, results, _scan_character) except IOError, e: if _scan_character: import warnings warnings.warn('Unable to access characters information: %s' % e) return [] else: raise res = [] for x in sn: tmpd = analyze_name(latin2utf(x[2])) res.append((x[0], (x[1], tmpd))) return res except ImportError: import warnings warnings.warn('Unable to import the cutils.search_name function.' ' Searching names using the "local" data access system' ' will be REALLY slow.') from imdb.parser.common.locsql import scan_names def _readNamesKeyFile(keyFile): """Iterate over the given file, returning tuples suited for the common.locsql.scan_names function.""" try: kf = open(keyFile, 'r') except IOError, e: raise IMDbDataAccessError, str(e)
x.encode('latin_1', 'replace') for x in name1, name2, name3 ] try: sn = search_name(keyFile, name1, name2, name3, results, _scan_character) except IOError, e: if _scan_character: import warnings warnings.warn('Unable to access characters information: %s' % e) return [] else: raise res = [] for x in sn: tmpd = analyze_name(latin2utf(x[2])) res.append((x[0], (x[1], tmpd))) return res except ImportError: import warnings warnings.warn('Unable to import the cutils.search_name function.' ' Searching names using the "local" data access system' ' will be REALLY slow.') from imdb.parser.common.locsql import scan_names def _readNamesKeyFile(keyFile): """Iterate over the given file, returning tuples suited for the common.locsql.scan_names function.""" try: kf = open(keyFile, 'r')
return None idx = convBin(piddata, 'fulloffset') try: dfptr = open(charDF, 'rb') except IOError, e: import warnings warnings.warn('Unable to access characters information, ' 'please run the characters4local.py script: %s' % e) return None dfptr.seek(idx) # Check characterID. chID = dfptr.read(3) if characterID != convBin(chID, 'characterID'): return None length = convBin(dfptr.read(2), 'longlength') name = latin2utf(dfptr.read(length)) dfptr.close() return name def getCharacterFilmography(characterID, charIF, charDF, movieIF, movieKF, personIF, personKF, limit=None): """Build a filmography list for the specified characterID.""" try: ifptr = open(charIF, 'rb') except IOError, e: import warnings warnings.warn('Unable to access characters information, ' 'please run the characters4local.py script: %s' % e) return None ifptr.seek(4L*characterID)
return res def getBio(personID, indexF, dataF): """Get biography information for the given person.""" bioidx = getFullIndex(indexF, personID) if bioidx is None: return {} try: fbio = open(dataF, 'r') except IOError, e: raise IMDbDataAccessError, str(e) fbio.seek(bioidx) fbio.readline() rlines = [] while 1: line = latin2utf(fbio.readline()) if not line or line[:4] == 'NM: ': break rlines.append(line) fbio.close() return _parseBiography(rlines) def getFilmography(dataF, indexF, keyF, attrIF, attrKF, offset, charNF=None, doCast=0, doWriters=0):
return res def getBio(personID, indexF, dataF): """Get biography information for the given person.""" bioidx = getFullIndex(indexF, personID) if bioidx is None: return {} try: fbio = open(dataF, 'r') except IOError, e: raise IMDbDataAccessError, str(e) fbio.seek(bioidx) fbio.readline() rlines = [] while 1: line = latin2utf(fbio.readline()) if not line or line[:4] == 'NM: ': break rlines.append(line) fbio.close() return _parseBiography(rlines) def getFilmography(dataF, indexF, keyF, attrIF, attrKF, offset, charNF=None, doCast=0, doWriters=0): """Gather information from the given files about the person entry found at offset; return a list of Movie objects, with the relevant attributes.""" name, res = getRawData(dataF, offset, doCast, doWriters) resList = [] for movie in res: title = getLabel(movie['movieID'], indexF, keyF)