def test_destination_directory_levels_deep(self):
        from pecan.scaffolds import copy_dir
        f = StringIO()
        copy_dir(
            (
                'pecan', os.path.join('tests', 'scaffold_fixtures', 'simple')
            ),
            os.path.join(self.scaffold_destination, 'some', 'app'),
            {},
            out_=f
        )

        assert os.path.isfile(os.path.join(
            self.scaffold_destination, 'some', 'app', 'foo')
        )
        assert os.path.isfile(os.path.join(
            self.scaffold_destination, 'some', 'app', 'bar', 'spam.txt')
        )
        with open(os.path.join(
            self.scaffold_destination, 'some', 'app', 'foo'
        ), 'r') as f:
            assert f.read().strip() == 'YAR'
        with open(os.path.join(
            self.scaffold_destination, 'some', 'app', 'bar', 'spam.txt'
        ), 'r') as f:
            assert f.read().strip() == 'Pecan'
示例#2
0
文件: views.py 项目: willzhang05/ion
def picture_view(request, user_id, year=None):
    """Displays a view of a user's picture.

    Args:
        user_id
            The ID of the user whose picture is being fetched.
        year
            The user's picture from this year is fetched. If not
            specified, use the preferred picture.
    """
    try:
        user = User.get_user(id=user_id)
    except User.DoesNotExist:
        raise Http404
    default_image_path = os.path.join(settings.PROJECT_ROOT, "static/img/default_profile_pic.png")

    if user is None:
        raise Http404
    else:
        if year is None:
            preferred = user.preferred_photo
            if preferred is not None:
                if preferred.endswith("Photo"):
                    preferred = preferred[:-len("Photo")]

            if preferred == "AUTO":
                data = user.default_photo()
                if data is None:
                    image_buffer = io.open(default_image_path, mode="rb")
                else:
                    image_buffer = StringIO(data)

            # Exclude 'graduate' from names array
            elif preferred in Grade.names:
                data = user.photo_binary(preferred)

                if data:
                    image_buffer = StringIO(data)
                else:
                    image_buffer = io.open(default_image_path, mode="rb")
            else:
                image_buffer = io.open(default_image_path, mode="rb")
        else:
            data = user.photo_binary(year)
            if data:
                image_buffer = StringIO(data)
            else:
                image_buffer = io.open(default_image_path, mode="rb")

        response = HttpResponse(content_type="image/jpeg")
        response["Content-Disposition"] = "filename={}_{}.jpg".format(user_id, year or preferred)
        try:
            img = image_buffer.read()
        except UnicodeDecodeError:
            img = io.open(default_image_path, mode="rb").read()

        image_buffer.close()
        response.write(img)

        return response
示例#3
0
def get_migration_status(**options):
    # type: (**Any) -> str
    verbosity = options.get('verbosity', 1)

    for app_config in apps.get_app_configs():
        if module_has_submodule(app_config.module, "management"):
            import_module('.management', app_config.name)

    app_labels = [options['app_label']] if options.get('app_label') else None
    db = options.get('database', DEFAULT_DB_ALIAS)
    out = StringIO()
    call_command(
        'showmigrations',
        '--list',
        app_labels=app_labels,
        database=db,
        no_color=options.get('no_color', False),
        settings=options.get('settings', os.environ['DJANGO_SETTINGS_MODULE']),
        stdout=out,
        traceback=options.get('traceback', True),
        verbosity=verbosity,
    )
    connections.close_all()
    out.seek(0)
    output = out.read()
    return re.sub('\x1b\[(1|0)m', '', output)
示例#4
0
def get_migration_status(**options):
    # type: (**Any) -> str
    verbosity = options.get('verbosity', 1)

    for app_config in apps.get_app_configs():
        if module_has_submodule(app_config.module, "management"):
            import_module('.management', app_config.name)

    app_labels = [options['app_label']] if options.get('app_label') else None
    db = options.get('database', DEFAULT_DB_ALIAS)
    out = StringIO()
    call_command(
        'showmigrations',
        '--list',
        app_labels=app_labels,
        database=db,
        no_color=options.get('no_color', False),
        settings=options.get('settings', os.environ['DJANGO_SETTINGS_MODULE']),
        stdout=out,
        traceback=options.get('traceback', True),
        verbosity=verbosity,
    )
    connections.close_all()
    out.seek(0)
    output = out.read()
    return re.sub('\x1b\[(1|0)m', '', output)
示例#5
0
def colorize(source):
    """
    write colorized version to "[filename].py.html"
    """
    html = StringIO()
    Parser(source, html).format(None, None)
    html.flush()
    html.seek(0)
    return html.read()
示例#6
0
def colorize(source):
    """
    write colorized version to "[filename].py.html"
    """
    html = StringIO()
    Parser(source, html).format(None, None)
    html.flush()
    html.seek(0)
    return html.read()
示例#7
0
 def _crypt_py2(cls, op, key, iv, data):
     cipher = EVP.Cipher(alg='aes_128_cbc', key=key, iv=iv, op=op, padding=False)
     inf = StringIO(data)
     outf = StringIO()
     while True:
         buf = inf.read()
         if not buf:
             break
         outf.write(cipher.update(buf))
     outf.write(cipher.final())
     return outf.getvalue()
示例#8
0
 def _crypt_py2(cls, op, key, iv, data):
     cipher = EVP.Cipher(alg='aes_128_cbc', key=key, iv=iv, op=op, padding=False)
     inf = StringIO(data)
     outf = StringIO()
     while True:
         buf = inf.read()
         if not buf:
             break
         outf.write(cipher.update(buf))
     outf.write(cipher.final())
     return outf.getvalue()
示例#9
0
     def read(self, html=None, code='@'):
         '''Get the content of the clipboard.
         
 html: BOOL. Whether to get the raw HTML code of the fomatted text on clipboard.
 code: coding of the text on clipboard.'''
         if (not html) and (not code):
             return super().read()
         else:
             stream = StringIO()
             clipb.clipboard_to_stream(stream, mode=None, code=code, null=None, html=html)
             stream.seek(0)
             return stream.read()
示例#10
0
     def read(self, html=None, code='@'):
         '''Get the content of the clipboard.
         
 html: BOOL. Whether to get the raw HTML code of the fomatted text on clipboard.
 code: coding of the text on clipboard.'''
         if (not html) and (not code):
             return super().read()
         else:
             stream = StringIO()
             clipb.clipboard_to_stream(stream, mode=None, code=code, null=None, html=html)
             stream.seek(0)
             return stream.read()
示例#11
0
    def local_changes(self, path=None):
        # -z is stable like --porcelain; see the git status documentation for details
        cmd = ["status", "-z", "--ignore-submodules=all"]
        if path is not None:
            cmd.extend(["--", path])

        rv = {}

        data = self.git(*cmd)
        if data == "":
            return rv

        assert data[-1] == "\0"
        f = StringIO(data)

        while f.tell() < len(data):
            # First two bytes are the status in the stage (index) and working tree, respectively
            staged = f.read(1)
            worktree = f.read(1)
            assert f.read(1) == " "

            if staged == "R":
                # When a file is renamed, there are two files, the source and the destination
                files = 2
            else:
                files = 1

            filenames = []

            for i in range(files):
                filenames.append("")
                char = f.read(1)
                while char != "\0":
                    filenames[-1] += char
                    char = f.read(1)

            if not is_blacklisted(rel_path_to_url(filenames[0], self.url_base)):
                rv.update(self.local_status(staged, worktree, filenames))

        return rv
示例#12
0
文件: s3.py 项目: wsk2001/ubd
    def write(self, offset, data):
        """
        s3handler.write(offset, data)

        Write data to this volume starting at offset.
        """
        start_block, start_offset = divmod(offset, self.block_size)
        end_block, end_offset = divmod(offset + len(data), self.block_size)

        if end_offset == 0:
            end_block -= 1
            end_offset = self.block_size
        
        to_write = StringIO(data)
        for block_id in range(start_block, end_block + 1):
            # Do we need a read-modify-write cycle?
            if ((block_id == start_block and start_offset != 0) or
                (block_id == end_block and
                 end_offset != self.block_size)):
                # Yes; get the existing data.
                block_data = self.read_block(block_id)
                    
                # Splice in the current data.
                start_pos = (0 if block_id != start_block else start_offset)
                end_pos = (self.block_size if block_id != end_block else
                           end_offset)
                splice_length = end_pos - start_pos

                spliced = to_write.read(end_pos - start_pos)
                block_data = (block_data[:start_pos] + spliced +
                              block_data[end_pos:])
            else:
                block_data = to_write.read(self.block_size)

            self.write_block(block_id, block_data)

        return
示例#13
0
 def render(self, root, req):
     self.depth_limit = 2 if root.child_language_count > 350 else 100
     et.register_namespace('', self.namespace)
     e = self.element('phyloxml')
     phylogeny = self.element('phylogeny', rooted="true")
     phylogeny.append(self.element('name', root.name))
     phylogeny.append(self.element('description', root.name))
     clade = self.clade(root, req)
     self.append_children(clade, root, req, 0)
     phylogeny.append(clade)
     e.append(phylogeny)
     out = StringIO()
     tree = et.ElementTree(element=e)
     tree.write(out, encoding='utf8', xml_declaration=True)
     out.seek(0)
     return out.read()
示例#14
0
    def test_copy_no_column_limit(self):
        cols = [ "c%050d" % i for i in range(200) ]

        curs = self.conn.cursor()
        curs.execute('CREATE TEMPORARY TABLE manycols (%s)' % ',\n'.join(
            [ "%s int" % c for c in cols]))
        curs.execute("INSERT INTO manycols DEFAULT VALUES")

        f = StringIO()
        curs.copy_to(f, "manycols", columns = cols)
        f.seek(0)
        self.assertEqual(f.read().split(), ['\\N'] * len(cols))

        f.seek(0)
        curs.copy_from(f, "manycols", columns = cols)
        curs.execute("select count(*) from manycols;")
        self.assertEqual(curs.fetchone()[0], 2)
示例#15
0
    def test_copy_no_column_limit(self):
        cols = ["c%050d" % i for i in range(200)]

        curs = self.conn.cursor()
        curs.execute('CREATE TEMPORARY TABLE manycols (%s)' %
                     ',\n'.join(["%s int" % c for c in cols]))
        curs.execute("INSERT INTO manycols DEFAULT VALUES")

        f = StringIO()
        curs.copy_to(f, "manycols", columns=cols)
        f.seek(0)
        self.assertEqual(f.read().split(), ['\\N'] * len(cols))

        f.seek(0)
        curs.copy_from(f, "manycols", columns=cols)
        curs.execute("select count(*) from manycols;")
        self.assertEqual(curs.fetchone()[0], 2)
示例#16
0
文件: excel.py 项目: Woseseltops/clld
    def render(self, ctx, req):
        if not xlwt:
            return ''  # pragma: no cover
        wb = xlwt.Workbook()
        ws = wb.add_sheet(ctx.__unicode__())

        for i, col in enumerate(self.header(ctx, req)):
            ws.write(0, i, col)

        for j, item in enumerate(ctx.get_query(limit=1000)):
            for i, col in enumerate(self.row(ctx, req, item)):
                ws.write(j + 1, i, col)

        out = StringIO()
        wb.save(out)
        out.seek(0)
        return out.read()
示例#17
0
文件: excel.py 项目: mitcho/clld
    def render(self, ctx, req):
        if not xlwt:
            return ''  # pragma: no cover
        wb = xlwt.Workbook()
        ws = wb.add_sheet(ctx.__unicode__())

        for i, col in enumerate(self.header(ctx, req)):
            ws.write(0, i, col)

        for j, item in enumerate(ctx.get_query(limit=1000)):
            for i, col in enumerate(self.row(ctx, req, item)):
                ws.write(j + 1, i, col)

        out = StringIO()
        wb.save(out)
        out.seek(0)
        return out.read()
示例#18
0
 def old(self, no_reduce_db):
     try:
         if no_reduce_db:
             touch('./dummydb')
         fileobj = StringIO()
         self.write_pdb(fileobj)
         fileobj.seek(0)
         reduce = os.path.join(os.getenv('LIBTBX_BUILD'), 'reduce', 'exe',
                               'reduce')
         if not os.path.exists(reduce):
             reduce = 'phenix.reduce'
         cmd = [reduce, '-BUILD', '-NUC', '-NOFLIP', '-DB ./dummydb', '-']
         if no_reduce_db:
             process = subprocess.Popen([
                 reduce, '-BUILD', '-NUC', '-NOFLIP', '-DB ./dummydb', '-'
             ],
                                        stdin=subprocess.PIPE,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)
         else:
             process = subprocess.Popen(
                 [reduce, '-BUILD', '-NUC', '-NOFLIP', '-'],
                 stdin=subprocess.PIPE,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE)
         out, err = process.communicate(str.encode(fileobj.read()))
         out = out.decode()
         err = err.decode()
         if process.wait():
             logger.error("REDUCE returned non-zero exit status: "
                          "See reduce_info.log for more details")
         # print out the reduce log even if it worked
         with open('reduce_info.log', 'w') as fh:
             fh.write(err)
         pdbh = StringIO(out)
         # not using load_file since it does not read StringIO
         print('-' * 80)
         print(pdbh)
         print('-' * 80)
         self.parm = parmed.read_PDB(pdbh)
     finally:
         fileobj.close()
         if no_reduce_db:
             os.unlink('./dummydb')
     return self
示例#19
0
def do_http(method, url, body=""):
    if isinstance(body, str):
        body = StringIO(body)
    elif isinstance(body, unicode):
        raise TypeError("do_http body must be a bytestring, not unicode")
    else:
        # We must give a Content-Length header to twisted.web, otherwise it
        # seems to get a zero-length file. I suspect that "chunked-encoding"
        # may fix this.
        assert body.tell
        assert body.seek
        assert body.read
    scheme, host, port, path = parse_url(url)
    if scheme == "http":
        c = httplib.HTTPConnection(host, port)
    elif scheme == "https":
        c = httplib.HTTPSConnection(host, port)
    else:
        raise ValueError("unknown scheme '%s', need http or https" % scheme)
    c.putrequest(method, path)
    c.putheader("Hostname", host)
    c.putheader("User-Agent", allmydata.__full_version__ + " (tahoe-client)")
    c.putheader("Accept", "text/plain, application/octet-stream")
    c.putheader("Connection", "close")

    old = body.tell()
    body.seek(0, os.SEEK_END)
    length = body.tell()
    body.seek(old)
    c.putheader("Content-Length", str(length))

    try:
        c.endheaders()
    except socket_error as err:
        return BadResponse(url, err)

    while True:
        data = body.read(8192)
        if not data:
            break
        c.send(data)

    return c.getresponse()
示例#20
0
def do_http(method, url, body=""):
    if isinstance(body, str):
        body = StringIO(body)
    elif isinstance(body, unicode):
        raise TypeError("do_http body must be a bytestring, not unicode")
    else:
        # We must give a Content-Length header to twisted.web, otherwise it
        # seems to get a zero-length file. I suspect that "chunked-encoding"
        # may fix this.
        assert body.tell
        assert body.seek
        assert body.read
    scheme, host, port, path = parse_url(url)
    if scheme == "http":
        c = httplib.HTTPConnection(host, port)
    elif scheme == "https":
        c = httplib.HTTPSConnection(host, port)
    else:
        raise ValueError("unknown scheme '%s', need http or https" % scheme)
    c.putrequest(method, path)
    c.putheader("Hostname", host)
    c.putheader("User-Agent", allmydata.__full_version__ + " (tahoe-client)")
    c.putheader("Accept", "text/plain, application/octet-stream")
    c.putheader("Connection", "close")

    old = body.tell()
    body.seek(0, os.SEEK_END)
    length = body.tell()
    body.seek(old)
    c.putheader("Content-Length", str(length))

    try:
        c.endheaders()
    except socket_error as err:
        return BadResponse(url, err)

    while True:
        data = body.read(8192)
        if not data:
            break
        c.send(data)

    return c.getresponse()
示例#21
0
    def add_hydrogen(self, no_reduce_db=False):
        ''' Use reduce program to add hydrogen

        Parameters
        ----------
        obj: file object or parmed.Structure or its derived class

        Returns
        -------
        parm : parmed.Structure

        Requires
        --------
        reduce
        '''
        def touch(fname, times=None):
            with open(fname, 'a'):
                os.utime(fname, times)

        from mmtbx.utils import run_reduce_with_timeout

        parameters = '-BUILD -NUC -NOFLIP'
        if no_reduce_db:
            touch('./dummydb')
            parameters += ' -DB ./dummydb'
        parameters += ' -'

        fileobj = StringIO()
        self.write_pdb(fileobj)
        fileobj.seek(0)

        reduce_out = run_reduce_with_timeout(
            parameters=parameters,
            stdin_lines=fileobj.read(),
            stdout_splitlines=False,
        )
        assert reduce_out.return_code == 0

        pdbh = StringIO()
        pdbh.write(reduce_out.stdout_buffer)
        pdbh.seek(0)
        self.parm = parmed.read_PDB(pdbh)
        return self
示例#22
0
    def dumps(self, indent=4):
        """
        Writes the dataset out to the json format

        Example:
            >>> from coco_wrangler.coco_api import *
            >>> dataset = demo_coco_data()
            >>> self = CocoDataset(dataset, tag='demo')
            >>> text = self.dumps()
            >>> print(text)
            >>> self2 = CocoDataset(json.loads(text), tag='demo2')
            >>> assert self2.dataset == self.dataset
            >>> assert self2.dataset is not self.dataset
        """
        from six.moves import cStringIO as StringIO
        fp = StringIO()
        self.dump(fp, indent=indent)
        fp.seek(0)
        text = fp.read()
        return text
示例#23
0
    def pt_upload(self, REQUEST, file=''):
        """Replace the document with the text in file."""
        if SUPPORTS_WEBDAV_LOCKS and self.wl_isLocked():
            raise ResourceLockedError("File is locked via WebDAV")

        if not isinstance(file, basestring):
            if not file: raise ValueError('File not specified')
            file = file.read()

        if file.startswith(
                "PK"):  # FIXME: this condition is probably not enough
            # this is a OOo zip file, extract the content
            builder = OOoBuilder(file)
            attached_files_list = [
                n for n in builder.getNameList()
                if n.startswith(self._OLE_directory_prefix)
                or n.startswith('Pictures') or n == 'META-INF/manifest.xml'
            ]
            # destroy a possibly pre-existing OLE document set
            if self.OLE_documents_zipstring:
                self.OLE_documents_zipstring = None
            # create a zip archive and store it
            if attached_files_list:
                memory_file = StringIO()
                try:
                    zf = ZipFile(memory_file,
                                 mode='w',
                                 compression=ZIP_DEFLATED)
                except RuntimeError:
                    zf = ZipFile(memory_file, mode='w')
                for attached_file in attached_files_list:
                    zf.writestr(attached_file, builder.extract(attached_file))
                zf.close()
                memory_file.seek(0)
                self.OLE_documents_zipstring = memory_file.read()
            self.content_type = builder.getMimeType()
            file = builder.prepareContentXml(self.ooo_xml_file_id)
        return ZopePageTemplate.pt_upload(self, REQUEST, file)
示例#24
0
class FileCache( Iterator ):
    """
    Wrapper for a file that cache blocks of data in memory. 
    
    **NOTE:** this is currently an incomplete file-like object, it only
    supports seek, tell, and readline (plus iteration). Reading bytes is
    currently not implemented.
    """
    def __init__( self, file, size, cache_size=DEFAULT_CACHE_SIZE, 
                                    block_size=DEFAULT_BLOCK_SIZE ):
        """
        Create a new `FileCache` wrapping the file-like object `file` that
        has total size `size` and caching blocks of size `block_size`.
        """
        self.file = file
        self.size = size
        self.cache_size = cache_size
        self.block_size = block_size
        # Setup the cache
        self.nblocks = ( self.size // self.block_size ) + 1
        self.cache = LRUCache( self.cache_size )
        # Position in file
        self.dirty = True
        self.at_eof = False
        self.file_pos = 0
        self.current_block_index = -1
        self.current_block = None
    def fix_dirty( self ):
        chunk, offset = self.get_block_and_offset( self.file_pos )
        if self.current_block_index != chunk:
            self.current_block = StringIO( self.load_block( chunk ) )
            self.current_block.read( offset )
            self.current_block_index = chunk
        else:
            self.current_block.seek( offset )
        self.dirty = False
    def get_block_and_offset( self, index ):
        return int( index // self.block_size ), int( index % self.block_size )
    def load_block( self, index ):
        if index in self.cache:
            return self.cache[index]
        else:
            real_offset = index * self.block_size
            self.file.seek( real_offset )
            block = self.file.read( self.block_size )
            self.cache[index] = block
            return block
    def seek( self, offset, whence=0 ):
        """
        Move the file pointer to a particular offset.
        """
        # Determine absolute target position
        if whence == 0:
            target_pos = offset
        elif whence == 1:
            target_pos = self.file_pos + offset
        elif whence == 2:
            target_pos = self.size - offset
        else:
            raise Exception( "Invalid `whence` argument: %r", whence )
        # Check if this is a noop
        if target_pos == self.file_pos:
            return    
        # Verify it is valid
        assert 0 <= target_pos < self.size, "Attempt to seek outside file"
        # Move the position
        self.file_pos = target_pos
        # Mark as dirty, the next time a read is done we need to actually
        # move the position in the bzip2 file
        self.dirty = True
    def readline( self ):
        if self.dirty:
            self.fix_dirty()
        if self.at_eof:
            return ""
        rval = []
        while 1:
            line = self.current_block.readline()
            rval.append( line )
            if len( line ) > 0 and line[-1] == '\n':
                break
            elif self.current_block_index == self.nblocks - 1:
                self.at_eof = True
                break
            else:
                self.current_block_index += 1
                self.current_block = StringIO( self.load_block( self.current_block_index ) )      
        return "".join( rval )     
    def __next__( self ):
        line = self.readline()
        if line == "":
            raise StopIteration
    def __iter__( self ):
        return self
    def close( self ):
        self.file.close()
示例#25
0
class SeekableLzopFile( Iterator ):
    """
    Filelike object supporting read-only semi-random access to bz2 compressed
    files for which an offset table (bz2t) has been generated by `bzip-table`.
    """
    
    def __init__( self, filename, table_filename, block_cache_size=0, **kwargs ):
        self.filename = filename
        self.table_filename = table_filename
        self.init_table()
        self.file = open( self.filename, "r" )
        self.dirty = True
        self.at_eof = False
        self.file_pos = 0
        self.current_block_index = -1
        self.current_block = None
        if block_cache_size > 0:
            self.cache = lrucache.LRUCache( block_cache_size )
        else:
            self.cache = None
        
    def init_table( self ):
        self.block_size = None
        self.block_info = []
        # Position of corresponding block in compressed file (in bytes)
        for line in open( self.table_filename ):
            fields = line.split()
            if fields[0] == "s":
                self.block_size = int( fields[1] )
            if fields[0] == "o":
                offset = int( fields[1] )
                compressed_size = int( fields[2] )
                size = int( fields[3] )
                self.block_info.append( ( offset, compressed_size, size ) )
        self.nblocks = len( self.block_info )
        
    def close( self ):
        self.file.close()
        
    def load_block( self, index ):
        if self.cache is not None and index in self.cache:
            return self.cache[index]
        else:      
            offset, csize, size = self.block_info[ index ]
            # Get the block of compressed data
            self.file.seek( offset )
            data = self.file.read( csize )
            # Need to prepend a header for python-lzo module (silly)
            data = ''.join( ( '\xf0', struct.pack( "!I", size ), data ) )
            value = lzo.decompress( data )
            if self.cache is not None:
                self.cache[index] = value
            return value
        
    def fix_dirty( self ):
        chunk, offset = self.get_block_and_offset( self.file_pos )
        if self.current_block_index != chunk:
            self.current_block = StringIO( self.load_block( chunk ) )
            self.current_block.read( offset )
            self.current_block_index = chunk
        else:
            self.current_block.seek( offset )
        self.dirty = False
        
    def get_block_and_offset( self, index ):
        return int( index // self.block_size ), int( index % self.block_size )

    def seek( self, offset, whence=0 ):
        """
        Move the file pointer to a particular offset.
        """
        # Determine absolute target position
        if whence == 0:
            target_pos = offset
        elif whence == 1:
            target_pos = self.file_pos + offset
        elif whence == 2:
            raise Exception( "seek from end not supported" )
            ## target_pos = self.size - offset
        else:
            raise Exception( "Invalid `whence` argument: %r", whence )
        # Check if this is a noop
        if target_pos == self.file_pos:
            return    
        # Verify it is valid
        ## assert 0 <= target_pos < self.size, "Attempt to seek outside file"
        # Move the position
        self.file_pos = target_pos
        # Mark as dirty, the next time a read is done we need to actually
        # move the position in the bzip2 file
        self.dirty = True
        
    def tell( self ):
        return self.file_pos
        
    def readline( self ):
        if self.dirty:
            self.fix_dirty()
        if self.at_eof:
            return ""
        rval = []
        while 1:
            line = self.current_block.readline()
            self.file_pos += len( line )
            rval.append( line )
            if len( line ) > 0 and line[-1] == '\n':
                break
            elif self.current_block_index == self.nblocks - 1:
                self.at_eof = True
                break
            else:
                self.current_block_index += 1
                self.current_block = StringIO( self.load_block( self.current_block_index ) )      
        return "".join( rval ) 
            
    def __next__( self ):
        line = self.readline()
        if line == "":
            raise StopIteration
            
    def __iter__( self ):
        return self
示例#26
0
class SeekableLzopFile(Iterator):
    """
    Filelike object supporting read-only semi-random access to bz2 compressed
    files for which an offset table (bz2t) has been generated by `bzip-table`.
    """
    def __init__(self, filename, table_filename, block_cache_size=0, **kwargs):
        self.filename = filename
        self.table_filename = table_filename
        self.init_table()
        self.file = open(self.filename, "r")
        self.dirty = True
        self.at_eof = False
        self.file_pos = 0
        self.current_block_index = -1
        self.current_block = None
        if block_cache_size > 0:
            self.cache = lrucache.LRUCache(block_cache_size)
        else:
            self.cache = None

    def init_table(self):
        self.block_size = None
        self.block_info = []
        # Position of corresponding block in compressed file (in bytes)
        for line in open(self.table_filename):
            fields = line.split()
            if fields[0] == "s":
                self.block_size = int(fields[1])
            if fields[0] == "o":
                offset = int(fields[1])
                compressed_size = int(fields[2])
                size = int(fields[3])
                self.block_info.append((offset, compressed_size, size))
        self.nblocks = len(self.block_info)

    def close(self):
        self.file.close()

    def load_block(self, index):
        if self.cache is not None and index in self.cache:
            return self.cache[index]
        else:
            offset, csize, size = self.block_info[index]
            # Get the block of compressed data
            self.file.seek(offset)
            data = self.file.read(csize)
            # Need to prepend a header for python-lzo module (silly)
            data = ''.join(('\xf0', struct.pack("!I", size), data))
            value = lzo.decompress(data)
            if self.cache is not None:
                self.cache[index] = value
            return value

    def fix_dirty(self):
        chunk, offset = self.get_block_and_offset(self.file_pos)
        if self.current_block_index != chunk:
            self.current_block = StringIO(self.load_block(chunk))
            self.current_block.read(offset)
            self.current_block_index = chunk
        else:
            self.current_block.seek(offset)
        self.dirty = False

    def get_block_and_offset(self, index):
        return int(index // self.block_size), int(index % self.block_size)

    def seek(self, offset, whence=0):
        """
        Move the file pointer to a particular offset.
        """
        # Determine absolute target position
        if whence == 0:
            target_pos = offset
        elif whence == 1:
            target_pos = self.file_pos + offset
        elif whence == 2:
            raise Exception("seek from end not supported")
            ## target_pos = self.size - offset
        else:
            raise Exception("Invalid `whence` argument: %r", whence)
        # Check if this is a noop
        if target_pos == self.file_pos:
            return
        # Verify it is valid
        ## assert 0 <= target_pos < self.size, "Attempt to seek outside file"
        # Move the position
        self.file_pos = target_pos
        # Mark as dirty, the next time a read is done we need to actually
        # move the position in the bzip2 file
        self.dirty = True

    def tell(self):
        return self.file_pos

    def readline(self):
        if self.dirty:
            self.fix_dirty()
        if self.at_eof:
            return ""
        rval = []
        while 1:
            line = self.current_block.readline()
            self.file_pos += len(line)
            rval.append(line)
            if len(line) > 0 and line[-1] == '\n':
                break
            elif self.current_block_index == self.nblocks - 1:
                self.at_eof = True
                break
            else:
                self.current_block_index += 1
                self.current_block = StringIO(
                    self.load_block(self.current_block_index))
        return "".join(rval)

    def __next__(self):
        line = self.readline()
        if line == "":
            raise StopIteration

    def __iter__(self):
        return self