Python latin2utf 예제들, utils.latin2utf Python 예제들

예제 #1

0

파일 보기

def getQuotes(movieID, dataF, indexF):
    """Return a list of quotes."""
    index = getFullIndex(indexF, movieID)
    qtL = []
    if index is not None:
        try:
            qtf = open(dataF, 'rt')
        except IOError, e:
            raise IMDbDataAccessError, str(e)
        fsize = stat(dataF)[ST_SIZE]
        qtf.seek(index)
        qtf.readline()
        qttl = []
        while 1:
            line = latin2utf(qtf.readline())
            line = line.rstrip()
            if line:
                if line.startswith('  ') and qttl[-1] and \
                        not qttl[-1].endswith('::'):
                    line = line.lstrip()
                    if line: qttl[-1] += ' %s' % line
                elif line.startswith('# '):
                    if qttl: qtL.append('::'.join(qttl))
                    break
                else:
                    line = line.lstrip()
                    if line: qttl.append(line)
            elif qttl:
                qtL.append('::'.join(qttl))
                qttl[:] = []
            elif qtf.tell() > fsize:
                break
        qtf.close()

예제 #2

0

파일 보기

파일: movieParser.py 프로젝트: conwetlab/ezweb-gadgets

def getQuotes(movieID, dataF, indexF):
    """Return a list of quotes."""
    index = getFullIndex(indexF, movieID)
    qtL = []
    if index is not None:
        try:
            qtf = open(dataF, 'rt')
        except IOError, e:
            raise IMDbDataAccessError, str(e)
        fsize = stat(dataF)[ST_SIZE]
        qtf.seek(index)
        qtf.readline()
        qttl = []
        while 1:
            line = latin2utf(qtf.readline())
            line = line.rstrip()
            if line:
                if line.startswith('  ') and qttl[-1] and \
                        not qttl[-1].endswith('::'):
                    line = line.lstrip()
                    if line: qttl[-1] += ' %s' % line
                elif line.startswith('# '):
                    if qttl: qtL.append('::'.join(qttl))
                    break
                else:
                    line = line.lstrip()
                    if line: qttl.append(line)
            elif qttl:
                qtL.append('::'.join(qttl))
                qttl[:] = []
            elif qtf.tell() > fsize: break
        qtf.close()

예제 #3

0

파일 보기

파일: __init__.py 프로젝트: 070499/repo-scripts

 def _readCompanyNamsKeyFile(keyFile):
     """Iterate over the given file, returning tuples suited for
     the common.locsql.scan_company_names function."""
     try: kf = open(keyFile, 'r')
     except IOError, e: raise IMDbDataAccessError, str(e)
     for line in kf:
         ls = line.split('|')
         n = ls[0]
         if not n: continue
         yield (long(ls[1], 16), latin2utf(n))
     kf.close()

예제 #4

0

파일 보기

파일: __init__.py 프로젝트: 070499/repo-scripts

 def _readNamesKeyFile(keyFile):
     """Iterate over the given file, returning tuples suited for
     the common.locsql.scan_names function."""
     try: kf = open(keyFile, 'r')
     except IOError, e: raise IMDbDataAccessError, str(e)
     for line in kf:
         ls = line.split('|')
         if not ls[0]: continue
         named = analyze_name(latin2utf(ls[0]))
         yield (long(ls[1], 16), named)
     kf.close()

예제 #5

0

파일 보기

 def _readNamesKeyFile(keyFile):
     """Iterate over the given file, returning tuples suited for
     the common.locsql.scan_names function."""
     try: kf = open(keyFile, 'r')
     except IOError, e: raise IMDbDataAccessError, str(e)
     for line in kf:
         ls = line.split('|')
         if not ls[0]: continue
         named = analyze_name(latin2utf(ls[0]))
         yield (long(ls[1], 16), named)
     kf.close()

예제 #6

0

파일 보기

 def _scan_titles(keyFile, title1, title2, title3, results=0):
     """Scan the given file, using the cutils.search_title
     C function, for title variations."""
     title1, title2, title3 = [x.encode('latin_1', 'replace')
                                 for x in title1, title2, title3]
     st = search_title(keyFile, title1, title2, title3, results)
     res = []
     for x in st:
         tmpd = analyze_title(latin2utf(x[2]))
         res.append((x[0], (x[1], tmpd)))
     return res

예제 #7

0

파일 보기

 def _readCompanyNamsKeyFile(keyFile):
     """Iterate over the given file, returning tuples suited for
     the common.locsql.scan_company_names function."""
     try: kf = open(keyFile, 'r')
     except IOError, e: raise IMDbDataAccessError, str(e)
     for line in kf:
         ls = line.split('|')
         n = ls[0]
         if not n: continue
         yield (long(ls[1], 16), latin2utf(n))
     kf.close()

예제 #8

0

파일 보기

파일: __init__.py 프로젝트: 070499/repo-scripts

 def _scan_titles(keyFile, title1, title2, title3, results=0,
                 _only_episodes=0):
     """Scan the given file, using the cutils.search_title
     C function, for title variations."""
     title1, title2, title3 = [x.encode('latin_1', 'replace')
                                 for x in title1, title2, title3]
     st = search_title(keyFile, title1, title2, title3, results,
                         _only_episodes)
     res = []
     for x in st:
         tmpd = analyze_title(latin2utf(x[2]))
         res.append((x[0], (x[1], tmpd)))
     return res

예제 #9

0

파일 보기

파일: __init__.py 프로젝트: 070499/repo-scripts

 def _readTitlesKeyFile(keyFile, searchingEpisode=0):
     """Iterate over the given file, returning tuples suited for
     the common.locsql.scan_titles function."""
     try: kf = open(keyFile, 'r')
     except IOError, e: raise IMDbDataAccessError, str(e)
     for line in kf:
         ls = line.split('|')
         t = ls[0]
         if not t: continue
         if searchingEpisode:
             if t[-1] != '}': continue
         elif t[-1] == '}': continue
         titled = analyze_title(latin2utf(t))
         yield (long(ls[1], 16), titled)
     kf.close()

예제 #10

0

파일 보기

 def _readTitlesKeyFile(keyFile, searchingEpisode=0):
     """Iterate over the given file, returning tuples suited for
     the common.locsql.scan_titles function."""
     try: kf = open(keyFile, 'r')
     except IOError, e: raise IMDbDataAccessError, str(e)
     for line in kf:
         ls = line.split('|')
         t = ls[0]
         if not t: continue
         if searchingEpisode:
             if t[-1] != '}': continue
         elif t[-1] == '}': continue
         titled = analyze_title(latin2utf(t))
         yield (long(ls[1], 16), titled)
     kf.close()

예제 #11

0

파일 보기

def getTaglines(movieID, indexF, dataF):
    """Return a list of taglines."""
    index = getFullIndex(indexF, movieID)
    tgL = []
    if index is not None:
        try:
            tgf = open(dataF, 'rt')
        except IOError, e:
            raise IMDbDataAccessError, str(e)
        tgf.seek(index)
        tgf.readline()
        while 1:
            line = latin2utf(tgf.readline().strip())
            if not line: break
            tgL.append(line)
        tgf.close()

예제 #12

0

파일 보기

파일: movieParser.py 프로젝트: conwetlab/ezweb-gadgets

def getTaglines(movieID, indexF, dataF):
    """Return a list of taglines."""
    index = getFullIndex(indexF, movieID)
    tgL = []
    if index is not None:
        try:
            tgf = open(dataF, 'rt')
        except IOError, e:
            raise IMDbDataAccessError, str(e)
        tgf.seek(index)
        tgf.readline()
        while 1:
            line = latin2utf(tgf.readline().strip())
            if not line: break
            tgL.append(line)
        tgf.close()

예제 #13

0

파일 보기

def parseMinusList(movieID, dataF, indexF):
    """Parser for lists like goofs.data, crazy-credits.data and so on."""
    offset = getFullIndex(indexF, movieID)
    if offset is None: return []
    try:
        fdata = open(dataF, 'rt')
    except IOError, e:
        raise IMDbDataAccessError, str(e)
    fdata.seek(offset)
    fsize = stat(dataF)[ST_SIZE]
    rlist = []
    tmplist = []
    line = fdata.readline()
    while line:
        line = latin2utf(fdata.readline())
        if line.startswith('# '):
            if tmplist: rlist.append(' '.join(tmplist))
            break
        elif line.startswith('- '):
            if tmplist: rlist.append(' '.join(tmplist))
            l = line[2:].strip()
            if l: tmplist[:] = [l]
            else: tmplist[:] = []
        else:
            l = line.strip()
            if l: tmplist.append(l)
            elif fdata.tell() > fsize:
                if tmplist: rlist.append(' '.join(tmplist))
                break
    fdata.close()

예제 #14

0

파일 보기

파일: movieParser.py 프로젝트: conwetlab/ezweb-gadgets

def parseMinusList(movieID, dataF, indexF):
    """Parser for lists like goofs.data, crazy-credits.data and so on."""
    offset = getFullIndex(indexF, movieID)
    if offset is None: return []
    try:
        fdata = open(dataF, 'rt')
    except IOError, e:
        raise IMDbDataAccessError, str(e)
    fdata.seek(offset)
    fsize = stat(dataF)[ST_SIZE]
    rlist = []
    tmplist = []
    line = fdata.readline()
    while line:
        line = latin2utf(fdata.readline())
        if line.startswith('# '):
            if tmplist: rlist.append(' '.join(tmplist))
            break
        elif line.startswith('- '):
            if tmplist: rlist.append(' '.join(tmplist))
            l = line[2:].strip()
            if l: tmplist[:] = [l]
            else: tmplist[:] = []
        else:
            l = line.strip()
            if l: tmplist.append(l)
            elif fdata.tell() > fsize:
                if tmplist: rlist.append(' '.join(tmplist))
                break
    fdata.close()

예제 #15

0

파일 보기

        return None
    idx = convBin(piddata, 'fulloffset')
    try:
        dfptr = open(compDF, 'rb')
    except IOError, e:
        import warnings
        warnings.warn('Unable to access companies information, '
                      'please run the companies4local.py script: %s' % e)
        return None
    dfptr.seek(idx)
    # Check companyID.
    chID = dfptr.read(3)
    if companyID != convBin(chID, 'companyID'):
        return None
    length = convBin(dfptr.read(2), 'longlength')
    name = latin2utf(dfptr.read(length))
    dfptr.close()
    return name


def getCompanyFilmography(companyID, compIF, compDF, movieIF, movieKF):
    """Build a filmography list for the specified companyID."""
    try:
        ifptr = open(compIF, 'rb')
    except IOError, e:
        import warnings
        warnings.warn('Unable to access companies information, '
                      'please run the companies4local.py script: %s' % e)
        return None
    ifptr.seek(4L * companyID)
    piddata = ifptr.read(4)

예제 #16

0

파일 보기

파일: __init__.py 프로젝트: 070499/repo-scripts

        # latin_1 encoded strings.
        name1, name2, name3 = [x.encode('latin_1', 'replace')
                                for x in name1, name2, name3]
        try:
            sn = search_name(keyFile, name1, name2, name3, results,
                    _scan_character)
        except IOError, e:
            if _scan_character:
                import warnings
                warnings.warn('Unable to access characters information: %s' % e)
                return []
            else:
                raise
        res = []
        for x in sn:
            tmpd = analyze_name(latin2utf(x[2]))
            res.append((x[0], (x[1], tmpd)))
        return res
except ImportError:
    import warnings
    warnings.warn('Unable to import the cutils.search_name function.'
                    '  Searching names using the "local" data access system'
                    ' will be REALLY slow.')

    from imdb.parser.common.locsql import scan_names

    def _readNamesKeyFile(keyFile):
        """Iterate over the given file, returning tuples suited for
        the common.locsql.scan_names function."""
        try: kf = open(keyFile, 'r')
        except IOError, e: raise IMDbDataAccessError, str(e)

예제 #17

0

파일 보기

        # latin_1 encoded strings.
        name1, name2, name3 = [x.encode('latin_1', 'replace')
                                for x in name1, name2, name3]
        try:
            sn = search_name(keyFile, name1, name2, name3, results,
                    _scan_character)
        except IOError, e:
            if _scan_character:
                import warnings
                warnings.warn('Unable to access characters information: %s' % e)
                return []
            else:
                raise
        res = []
        for x in sn:
            tmpd = analyze_name(latin2utf(x[2]))
            res.append((x[0], (x[1], tmpd)))
        return res
except ImportError:
    import warnings
    warnings.warn('Unable to import the cutils.search_name function.'
                    '  Searching names using the "local" data access system'
                    ' will be REALLY slow.')

    from imdb.parser.common.locsql import scan_names

    def _readNamesKeyFile(keyFile):
        """Iterate over the given file, returning tuples suited for
        the common.locsql.scan_names function."""
        try: kf = open(keyFile, 'r')
        except IOError, e: raise IMDbDataAccessError, str(e)

예제 #18

0

파일 보기

파일: movieParser.py 프로젝트: bopopescu/ServerStatus

def parseMinusList(movieID, dataF, indexF):
    """Parser for lists like goofs.data, crazy-credits.data and so on."""
    offset = getFullIndex(indexF, movieID)
    if offset is None: return []
    try:
        fdata = open(dataF, 'rt')
    except IOError, e:
        raise IMDbDataAccessError, str(e)
    fdata.seek(offset)
    fsize = stat(dataF)[ST_SIZE]
    rlist = []
    tmplist = []
    line = fdata.readline()
    while line:
        line = latin2utf(fdata.readline())
        if line.startswith('# '):
            if tmplist: rlist.append(' '.join(tmplist))
            break
        elif line.startswith('- '):
            if tmplist: rlist.append(' '.join(tmplist))
            l = line[2:].strip()
            if l: tmplist[:] = [l]
            else: tmplist[:] = []
        else:
            l = line.strip()
            if l: tmplist.append(l)
            elif fdata.tell() > fsize:
                if tmplist: rlist.append(' '.join(tmplist))
                break
    fdata.close()

예제 #19

0

파일 보기

            x.encode('latin_1', 'replace') for x in name1, name2, name3
        ]
        try:
            sn = search_name(keyFile, name1, name2, name3, results,
                             _scan_character)
        except IOError, e:
            if _scan_character:
                import warnings
                warnings.warn('Unable to access characters information: %s' %
                              e)
                return []
            else:
                raise
        res = []
        for x in sn:
            tmpd = analyze_name(latin2utf(x[2]))
            res.append((x[0], (x[1], tmpd)))
        return res
except ImportError:
    import warnings
    warnings.warn('Unable to import the cutils.search_name function.'
                  '  Searching names using the "local" data access system'
                  ' will be REALLY slow.')

    from imdb.parser.common.locsql import scan_names

    def _readNamesKeyFile(keyFile):
        """Iterate over the given file, returning tuples suited for
        the common.locsql.scan_names function."""
        try:
            kf = open(keyFile, 'r')

예제 #20

0

파일 보기

파일: characterParser.py 프로젝트: 070499/repo-scripts

        return None
    idx = convBin(piddata, 'fulloffset')
    try:
        dfptr = open(charDF, 'rb')
    except IOError, e:
        import warnings
        warnings.warn('Unable to access characters information, '
                        'please run the characters4local.py script: %s' % e)
        return None
    dfptr.seek(idx)
    # Check characterID.
    chID = dfptr.read(3)
    if characterID != convBin(chID, 'characterID'):
        return None
    length = convBin(dfptr.read(2), 'longlength')
    name = latin2utf(dfptr.read(length))
    dfptr.close()
    return name


def getCharacterFilmography(characterID, charIF, charDF, movieIF, movieKF,
                            personIF, personKF, limit=None):
    """Build a filmography list for the specified characterID."""
    try:
        ifptr = open(charIF, 'rb')
    except IOError, e:
        import warnings
        warnings.warn('Unable to access characters information, '
                    'please run the characters4local.py script: %s' % e)
        return None
    ifptr.seek(4L*characterID)

예제 #21

0

파일 보기

    return res


def getBio(personID, indexF, dataF):
    """Get biography information for the given person."""
    bioidx = getFullIndex(indexF, personID)
    if bioidx is None: return {}
    try:
        fbio = open(dataF, 'r')
    except IOError, e:
        raise IMDbDataAccessError, str(e)
    fbio.seek(bioidx)
    fbio.readline()
    rlines = []
    while 1:
        line = latin2utf(fbio.readline())
        if not line or line[:4] == 'NM: ': break
        rlines.append(line)
    fbio.close()
    return _parseBiography(rlines)


def getFilmography(dataF,
                   indexF,
                   keyF,
                   attrIF,
                   attrKF,
                   offset,
                   charNF=None,
                   doCast=0,
                   doWriters=0):

예제 #22

0

파일 보기

파일: personParser.py 프로젝트: 070499/repo-scripts

    return res


def getBio(personID, indexF, dataF):
    """Get biography information for the given person."""
    bioidx = getFullIndex(indexF, personID)
    if bioidx is None: return {}
    try:
        fbio = open(dataF, 'r')
    except IOError, e:
        raise IMDbDataAccessError, str(e)
    fbio.seek(bioidx)
    fbio.readline()
    rlines = []
    while 1:
        line = latin2utf(fbio.readline())
        if not line or line[:4] == 'NM: ': break
        rlines.append(line)
    fbio.close()
    return _parseBiography(rlines)


def getFilmography(dataF, indexF, keyF, attrIF, attrKF, offset,
                    charNF=None, doCast=0, doWriters=0):
    """Gather information from the given files about the
    person entry found at offset; return a list of Movie objects,
    with the relevant attributes."""
    name, res = getRawData(dataF, offset, doCast, doWriters)
    resList = []
    for movie in res:
        title = getLabel(movie['movieID'], indexF, keyF)