Пример #1
0
 def _do_execute_direct(self, code):
     shell = builtins.__xonsh_shell__
     env = builtins.__xonsh_env__
     out = io.StringIO()
     err = io.StringIO()
     enc = env.get('XONSH_ENCODING')
     out = SpooledTemporaryFile(max_size=MAX_SIZE,
                                mode='w+t',
                                encoding=enc,
                                newline='\n')
     err = SpooledTemporaryFile(max_size=MAX_SIZE,
                                mode='w+t',
                                encoding=enc,
                                newline='\n')
     try:
         with redirect_stdout(out), redirect_stderr(err), \
              swap(builtins, '__xonsh_stdout_uncaptured__', out), \
              swap(builtins, '__xonsh_stderr_uncaptured__', err), \
              env.swap({'XONSH_STORE_STDOUT': False}):
             shell.default(code)
         interrupted = False
     except KeyboardInterrupt:
         interrupted = True
     output, error = '', ''
     if out.tell() > 0:
         out.seek(0)
         output = out.read()
     if err.tell() > 0:
         err.seek(0)
         error = err.read()
     out.close()
     err.close()
     return output, error, interrupted
Пример #2
0
 def _do_execute_direct(self, code):
     shell = builtins.__xonsh_shell__
     env = builtins.__xonsh_env__
     out = io.StringIO()
     err = io.StringIO()
     enc = env.get('XONSH_ENCODING')
     out = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                encoding=enc, newline='\n')
     err = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                encoding=enc, newline='\n')
     try:
         with redirect_stdout(out), redirect_stderr(err), \
              swap(builtins, '__xonsh_stdout_uncaptured__', out), \
              swap(builtins, '__xonsh_stderr_uncaptured__', err), \
              env.swap({'XONSH_STORE_STDOUT': False}):
             shell.default(code)
         interrupted = False
     except KeyboardInterrupt:
         interrupted = True
     output, error = '', ''
     if out.tell() > 0:
         out.seek(0)
         output = out.read()
     if err.tell() > 0:
         err.seek(0)
         error = err.read()
     out.close()
     err.close()
     return output, error, interrupted
Пример #3
0
    def do_execute(self, code, silent, store_history=True, user_expressions=None,
                   allow_stdin=False):
        """Execute user code."""
        if len(code.strip()) == 0:
            return {'status': 'ok', 'execution_count': self.execution_count,
                    'payload': [], 'user_expressions': {}}
        env = builtins.__xonsh_env__
        shell = builtins.__xonsh_shell__
        hist = builtins.__xonsh_history__
        enc = env.get('XONSH_ENCODING')
        out = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                   encoding=enc, newline='\n')
        err = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                   encoding=enc, newline='\n')
        try:
            with redirect_stdout(out), redirect_stderr(err), \
                 swap(builtins, '__xonsh_stdout_uncaptured__', out), \
                 swap(builtins, '__xonsh_stderr_uncaptured__', err), \
                 env.swap({'XONSH_STORE_STDOUT': False}):
                shell.default(code)
            interrupted = False
        except KeyboardInterrupt:
            interrupted = True

        if not silent:  # stdout response
            if out.tell() > 0:
                out.seek(0)
                self._respond_in_chunks('stdout', out.read())
            if err.tell() > 0:
                err.seek(0)
                self._respond_in_chunks('stderr', err.read())
            if hasattr(builtins, '_') and builtins._ is not None:
                # rely on sys.displayhook functionality
                self._respond_in_chunks('stdout', pformat(builtins._))
                builtins._ = None
            if hist is not None and len(hist) > 0 and out.tell() == 0 and err.tell() == 0:
                self._respond_in_chunks('stdout', hist.outs[-1])

        out.close()
        err.close()

        if interrupted:
            return {'status': 'abort', 'execution_count': self.execution_count}

        rtn = 0 if (hist is None or len(hist) == 0) else hist.rtns[-1]
        if 0 < rtn:
            message = {'status': 'error', 'execution_count': self.execution_count,
                       'ename': '', 'evalue': str(rtn), 'traceback': []}
        else:
            message = {'status': 'ok', 'execution_count': self.execution_count,
                       'payload': [], 'user_expressions': {}}
        return message
Пример #4
0
    def do_execute(self, code, silent, store_history=True, user_expressions=None,
                   allow_stdin=False):
        """Execute user code."""
        if len(code.strip()) == 0:
            return {'status': 'ok', 'execution_count': self.execution_count,
                    'payload': [], 'user_expressions': {}}
        env = builtins.__xonsh_env__
        shell = builtins.__xonsh_shell__
        hist = builtins.__xonsh_history__
        enc = env.get('XONSH_ENCODING')
        out = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                   encoding=enc, newline='\n')
        err = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                   encoding=enc, newline='\n')
        try:
            with redirect_stdout(out), redirect_stderr(err), \
                 swap(builtins, '__xonsh_stdout_uncaptured__', out), \
                 swap(builtins, '__xonsh_stderr_uncaptured__', err), \
                 env.swap({'XONSH_STORE_STDOUT': False}):
                shell.default(code)
            interrupted = False
        except KeyboardInterrupt:
            interrupted = True

        if not silent:  # stdout response
            if out.tell() > 0:
                out.seek(0)
                self._respond_in_chunks('stdout', out.read())
            if err.tell() > 0:
                err.seek(0)
                self._respond_in_chunks('stderr', err.read())
            if hasattr(builtins, '_') and builtins._ is not None:
                # rely on sys.displayhook functionality
                self._respond_in_chunks('stdout', pformat(builtins._))
                builtins._ = None
            if len(hist) > 0 and out.tell() == 0 and err.tell() == 0:
                self._respond_in_chunks('stdout', hist.outs[-1])

        out.close()
        err.close()

        if interrupted:
            return {'status': 'abort', 'execution_count': self.execution_count}

        rtn = 0 if len(hist) == 0 else hist.rtns[-1]
        if 0 < rtn:
            message = {'status': 'error', 'execution_count': self.execution_count,
                       'ename': '', 'evalue': str(rtn), 'traceback': []}
        else:
            message = {'status': 'ok', 'execution_count': self.execution_count,
                       'payload': [], 'user_expressions': {}}
        return message
Пример #5
0
    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        """Execute user code."""
        if len(code.strip()) == 0:
            return {"status": "ok", "execution_count": self.execution_count, "payload": [], "user_expressions": {}}
        env = builtins.__xonsh_env__
        shell = builtins.__xonsh_shell__
        hist = builtins.__xonsh_history__
        enc = env.get("XONSH_ENCODING")
        out = SpooledTemporaryFile(max_size=MAX_SIZE, mode="w+t", encoding=enc, newline="\n")
        err = SpooledTemporaryFile(max_size=MAX_SIZE, mode="w+t", encoding=enc, newline="\n")
        try:
            with redirect_stdout(out), redirect_stderr(err), swap(builtins, "__xonsh_stdout_uncaptured__", out), swap(
                builtins, "__xonsh_stderr_uncaptured__", err
            ), env.swap({"XONSH_STORE_STDOUT": False}):
                shell.default(code)
            interrupted = False
        except KeyboardInterrupt:
            interrupted = True

        if not silent:  # stdout response
            if out.tell() > 0:
                out.seek(0)
                self._respond_in_chunks("stdout", out.read())
            if err.tell() > 0:
                err.seek(0)
                self._respond_in_chunks("stderr", err.read())
            if hasattr(builtins, "_") and builtins._ is not None:
                # rely on sys.displayhook functionality
                self._respond_in_chunks("stdout", pformat(builtins._))
                builtins._ = None
            if len(hist) > 0 and out.tell() == 0 and err.tell() == 0:
                self._respond_in_chunks("stdout", hist.outs[-1])

        out.close()
        err.close()

        if interrupted:
            return {"status": "abort", "execution_count": self.execution_count}

        rtn = 0 if len(hist) == 0 else hist.rtns[-1]
        if 0 < rtn:
            message = {
                "status": "error",
                "execution_count": self.execution_count,
                "ename": "",
                "evalue": str(rtn),
                "traceback": [],
            }
        else:
            message = {"status": "ok", "execution_count": self.execution_count, "payload": [], "user_expressions": {}}
        return message
Пример #6
0
    def __init__(self, data=None, fp=None, length=-1):
        assert bool(data is not None) ^ bool(fp)

        if length == -1:
            if data is not None:
                length = len(data)
            else:
                length = get_size(fp)  # can be -1

        # We allow writer reuse, but if we're working with a stream, we cannot
        # seek. Copy the data to a tempfile.
        if fp and not can_seek(fp):
            newfp = SpooledTemporaryFile(MAX_INMEMORY_SIZE)
            sendfile(newfp, fp)
            length = newfp.tell()
            newfp.seek(0)
            fp = newfp

        self.data = data
        self.fp = fp
        self.fpreads = 0  # keep track of fp usage
        self.length = length

        assert length >= 0
        self.use_tempfile = length > MAX_INMEMORY_SIZE
Пример #7
0
    def upload_file(self,
                    user,
                    stream,
                    expected_size,
                    filename,
                    force_coll_name=''):
        temp_file = None
        logger.debug('Upload Begin')

        logger.debug('Expected Size: ' + str(expected_size))

        #is_anon = False

        size_rem = user.get_size_remaining()

        logger.debug('User Size Rem: ' + str(size_rem))

        if size_rem < expected_size:
            return {'error': 'out_of_space'}

        if force_coll_name and not user.has_collection(force_coll_name):
            #if is_anon:
            #    user.create_collection(force_coll, 'Temporary Collection')

            #else:
            #status = 'Collection {0} not found'.format(force_coll_name)
            return {'error': 'no_such_collection'}

        temp_file = SpooledTemporaryFile(max_size=BLOCK_SIZE)

        stream = CacheingLimitReader(stream, expected_size, temp_file)

        if filename.endswith('.har'):
            stream, expected_size = self.har2warc(filename, stream)
            temp_file.close()
            temp_file = stream

        infos = self.parse_uploaded(stream, expected_size)

        total_size = temp_file.tell()
        if total_size != expected_size:
            return {
                'error': 'incomplete_upload',
                'expected': expected_size,
                'actual': total_size
            }

        upload_id, upload_key = self._init_upload_status(user,
                                                         total_size,
                                                         1,
                                                         filename=filename)

        return self.handle_upload(temp_file, upload_id, upload_key, infos,
                                  filename, user, force_coll_name, total_size)
Пример #8
0
class RemoteFileBuffer(object):
    """File-like object providing buffer for local file operations.

    Instances of this class manage a local tempfile buffer corresponding
    to the contents of a remote file.  All reads and writes happen locally,
    with the content being copied to the remote file only on flush() or
    close().

    Instances of this class are returned by S3FS.open, but it is desgined
    to be usable by any FS subclass that manages remote files.
    """

    def __init__(self,fs,path,mode):
        self.file = TempFile()
        self.fs = fs
        self.path = path
        self.mode = mode

    def __del__(self):
        if not self.closed:
            self.close()

    #  This is lifted straight from the stdlib's tempfile.py
    def __getattr__(self,name):
        file = self.__dict__['file']
        a = getattr(file, name)
        if not issubclass(type(a), type(0)):
            setattr(self, name, a)
        return a

    def __enter__(self):
        self.file.__enter__()
        return self

    def __exit__(self,exc,value,tb):
        self.close()
        return False

    def __iter__(self):
        return iter(self.file)

    def flush(self):
        self.file.flush()
        if "w" in self.mode or "a" in self.mode or "+" in self.mode:
            pos = self.file.tell()
            self.file.seek(0)
            self.fs.setcontents(self.path,self.file)
            self.file.seek(pos)

    def close(self):
        if "w" in self.mode or "a" in self.mode or "+" in self.mode:
            self.file.seek(0)
            self.fs.setcontents(self.path,self.file)
        self.file.close()
Пример #9
0
    def upload_file(self, user, stream, expected_size, filename, force_coll_name=''):
        """Upload WARC archive.

        :param User user: user
        :param stream: file object
        :param int expected_size: expected WARC archive size
        :param str filename: WARC archive filename
        :param str force_coll_name: name of collection to upload into

        :returns: upload information
        :rtype: dict
        """
        temp_file = None
        logger.debug('Upload Begin')

        logger.debug('Expected Size: ' + str(expected_size))

        #is_anon = False

        size_rem = user.get_size_remaining()

        logger.debug('User Size Rem: ' + str(size_rem))

        if size_rem < expected_size:
            return {'error': 'out_of_space'}

        if force_coll_name and not user.has_collection(force_coll_name):
            #if is_anon:
            #    user.create_collection(force_coll, 'Temporary Collection')

            #else:
            #status = 'Collection {0} not found'.format(force_coll_name)
            return {'error': 'no_such_collection'}

        temp_file = SpooledTemporaryFile(max_size=BLOCK_SIZE)

        stream = CacheingLimitReader(stream, expected_size, temp_file)

        if filename.endswith('.har'):
            stream, expected_size = self.har2warc(filename, stream)
            temp_file.close()
            temp_file = stream

        infos = self.parse_uploaded(stream, expected_size)

        total_size = temp_file.tell()
        if total_size != expected_size:
            return {'error': 'incomplete_upload', 'expected': expected_size, 'actual': total_size}

        upload_id, upload_key = self._init_upload_status(user, total_size, 1, filename=filename)

        return self.handle_upload(temp_file, upload_id, upload_key, infos, filename,
                                  user, force_coll_name, total_size)
Пример #10
0
def ensure_content_length(resp):
    """
    Add Content-Length when it is not present.

    Streams content into a temp file, and replaces the original socket with it.
    """
    spool = SpooledTemporaryFile(current_app.config.get('FILES_URL_MAX_SIZE'))
    shutil.copyfileobj(resp.raw, spool)
    resp.headers['Content-Length'] = str(spool.tell())
    spool.seek(0)

    # replace the original socket with temp file
    resp.raw._fp.close()
    resp.raw._fp = spool
    return resp
Пример #11
0
    def convert(self,
                input_file: SpooledTemporaryFile,
                max_char_per_line: int = MAX_CHAR_PER_LINE) -> dict:
        result = ''
        input_file.seek(0, io.SEEK_END)
        if input_file.tell() > 0:
            try:
                converted = extract_text(input_file)
                result = parse_converted_pdf(converted, int(max_char_per_line),
                                             self.debug)

                msg = f'Successfully convert "{input_file.filename}!"'
                logger.info(msg)
            except Exception as err:
                msg = f'Unable to convert "{input_file.filename}"!'
                logger.exception(msg)
                raise err

        result = {'result': result}
        return result
Пример #12
0
class GCloudFile(File):
    """
    Django file object that wraps a SpooledTemporaryFile and remembers changes on
    write to reupload the file to GCS on close()
    """

    def __init__(self, blob, maxsize=1000):
        """
        :type blob: google.cloud.storage.blob.Blob
        """
        self._dirty = False
        self._tmpfile = SpooledTemporaryFile(
            max_size=maxsize,
            prefix="django_gcloud_storage_"
        )

        self._blob = blob

        super(GCloudFile, self).__init__(self._tmpfile)

    def _update_blob(self):
        # Specify explicit size to avoid problems with not yet spooled temporary files
        # Djangos File.size property already knows how to handle cases like this

        if DJANGO_17 and self._tmpfile.name is None:  # Django bug #22307
            size = self._tmpfile.tell()
        else:
            size = self.size

        self._blob.upload_from_file(self._tmpfile, size=size, rewind=True)

    def write(self, content):
        self._dirty = True
        super(GCloudFile, self).write(content)

    def close(self):
        if self._dirty:
            self._update_blob()
            self._dirty = False

        super(GCloudFile, self).close()
Пример #13
0
class GCloudFile(File):
    """
    Django file object that wraps a SpooledTemporaryFile and remembers changes on
    write to reupload the file to GCS on close()
    """

    def __init__(self, blob, maxsize=1000):
        """
        :type blob: google.cloud.storage.blob.Blob
        """
        self._dirty = False
        self._tmpfile = SpooledTemporaryFile(
            max_size=maxsize,
            prefix="django_gcloud_storage_"
        )

        self._blob = blob

        super(GCloudFile, self).__init__(self._tmpfile)

    def _update_blob(self):
        # Specify explicit size to avoid problems with not yet spooled temporary files
        # Djangos File.size property already knows how to handle cases like this

        if DJANGO_17 and self._tmpfile.name is None:  # Django bug #22307
            size = self._tmpfile.tell()
        else:
            size = self.size

        self._blob.upload_from_file(self._tmpfile, size=size, rewind=True)

    def write(self, content):
        self._dirty = True
        super(GCloudFile, self).write(content)

    def close(self):
        if self._dirty:
            self._update_blob()
            self._dirty = False

        super(GCloudFile, self).close()
Пример #14
0
class VersionedFile(io.BufferedIOBase):
    def __init__(self,
                 manager,
                 filename,
                 mode=Perm.read,
                 requestor=Owner.ALL,
                 meta=None,
                 rev=None,
                 file_info=None,
                 **kwargs):
        io.BufferedIOBase.__init__(self)
        self.path = self.name = filename
        # manager.check_perm(self.path, owner=requestor, perm=mode)
        self.created = self.modified = None
        self.data = None
        self.meta = meta or {}
        self.mode = mode
        self._seekable = True
        self.length = 0
        self.bs = 8192
        self._cipher = None
        self.manager = manager
        self._file_info = file_info or manager.get_metadata_and_check_perm(
            filename, rev, mode=mode, owner=requestor)
        # self._file_info = manager.get_file_metadata(filename, rev, mode=mode)
        if self._file_info:
            self.update(self._file_info)

        if mode == Perm.read and not self._file_info:
            raise FileNotFoundError(self.path)
        elif mode == Perm.write:
            self.owner = requestor

        if kwargs:
            self.update(kwargs)
        self._pos = 0
        if mode == Perm.read:
            if self.data:
                self._curr_chunk = self.data
                self._curr_chunk_num = 0
            else:
                self._curr_chunk_num = None
                self._curr_chunk = None
        else:
            self._buf = SpooledTemporaryFile(
                max_size=getattr(self, 'buffer_threshold', 52428800))
            self.hash = None

    @property
    def is_dir(self):
        return self.content_type == u'application/x-directory'

    def do_hash(self, algo='sha256'):
        self.hash = algo

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc, tb):
        if exc:
            if self.readable():
                self.close()
            else:
                self._buf.close()
                self.mode = None
            import six
            six.reraise(exc_type, exc, tb)
        else:
            self.close()

    def close(self):
        if self.closed:
            return
        if self.writable():
            self._buf.seek(0, 2)
            length = self.length = self._buf.tell()
            self._buf.seek(0)

            hist_data = {
                u'meta': self.meta,
                u'owner': getattr(self, 'owner', None),
                u'length': length,
                u'hash': self.hash,
                u'created': self.created,
                u'modified': self.modified,
                u'file_info': self._file_info,
            }
            content_type = getattr(self, 'content_type', None)
            if not content_type:
                content_type = mimetypes.guess_type(self.path)[0]
            hist_data[u'content_type'] = content_type

            if getattr(self, 'force_rev', None) is not None:
                hist_data[u'rev'] = rev = self.force_rev
                hist_data[u'modified'] = self.created

            self.update(
                self.manager.save_file_data(self.path,
                                            hist_data,
                                            self._buf,
                                            cipher=self._cipher))

            self._buf.close()
            self._buf = None
        self.mode = None
        io.BufferedIOBase.close(self)

    # def __del__(self):
    #     self.close()

    def readable(self):
        return self.mode == Perm.read

    def writable(self):
        return self.mode == Perm.write

    def seekable(self):
        return self._seekable

    def tell(self):
        if self.readable():
            return self._pos
        else:
            return self._buf.tell()

    def seek(self, pos, whence=0):
        if self.mode == Perm.read:
            curpos = self._pos
            if whence == 0:
                abspos = pos
            elif whence == 1:
                abspos = curpos + pos
            elif whence == 2:
                abspos = self.length + pos
            self._pos = abspos
            return self._pos
        elif self.mode == Perm.write and self.seekable():
            return self._buf.seek(pos, whence)

    def read(self, size=-1):
        if self.mode != Perm.read:
            return
        elif self._pos == self.length:
            return b''
        buf = bytearray()
        if self._pos == 0 and size == -1:
            if self.data:
                self._pos = self.length
                return self.data
            else:
                # optimization for reading the whole file
                i = 0
                for chunk in self.manager.get_file_chunks(self._file_info,
                                                          cipher=self._cipher):
                    i += 1
                    buf.extend(chunk)
                self._pos = len(buf)
                return bytes(buf)

        length = size if size > 0 else self.length
        where, pos = divmod(self._pos, self.bs)

        if self._curr_chunk_num != where:
            self._curr_chunk = self.manager.get_file_chunk(self._file_info,
                                                           where,
                                                           cipher=self._cipher)
            self._curr_chunk_num = where
        buf += self._curr_chunk[pos:]
        while len(buf) < length:
            where += 1
            self._curr_chunk = self.manager.get_file_chunk(self._file_info,
                                                           where,
                                                           cipher=self._cipher)
            if self._curr_chunk is None:
                self._curr_chunk_num = None
                break
            buf.extend(self._curr_chunk)
            self._curr_chunk_num = where
        read = buf[:length]
        self._pos += len(read)
        return bytes(read)

    def readall(self):
        return self.read()

    def write(self, data):
        if not data:
            return
        if not self.writable():
            raise FileError()
        if isinstance(data, six.text_type):
            data = data.encode('utf8')

        wrote = len(data)

        self._buf.write(data)
        return wrote

    def update(self, kwargs):
        if kwargs:
            for k, v in kwargs.items():
                if k == 'modified' and self.mode == 'w':
                    continue
                if v is not None:
                    setattr(self, k, v)

    def set_encryption(self, password='', save_password=False):
        """
        Set the encryption password, optionally saving the password in the metadata
        """
        try:
            from nacl.secret import SecretBox
        except ImportError:
            SecretBox = None
        if SecretBox:
            password = hashlib.sha256(password.encode('utf8')).digest()
        else:
            password = hashlib.sha512(password.encode('utf8')).digest()[:56]
        if self.writable():
            assert self._cipher is None
            if SecretBox:
                method = u'nacl'
                self.meta[u'_encryption'] = {u'method': method}
            else:
                method = u'cfb'
                self.meta[u'_encryption'] = {
                    u'method': method,
                    u'iv': os.urandom(8),
                }
            if save_password:
                self.meta[u'_encryption'][u'key'] = password
        else:
            assert u'_encryption' in self.meta
            method = self.meta[u'_encryption'][u'method']
            password = self.meta[u'_encryption'].get(u'key', None) or password
        if method == u'nacl':
            c = SecretBox(password)
            self._cipher = {'encrypt': c.encrypt, 'decrypt': c.decrypt}
        else:
            import blowfish
            c = blowfish.Cipher(password)
            iv = self.meta[u'_encryption'][u'iv']
            self._cipher = {
                'encrypt': lambda chunk: b''.join(c.encrypt_cfb(chunk, iv)),
                'decrypt': lambda chunk: b''.join(c.decrypt_cfb(chunk, iv)),
            }
        if self.data:
            self._curr_chunk = self._cipher['decrypt'](self.data)
Пример #15
0
    def fetch_media(self, url, partial_fetch=False):
        """Retrieves a given media object from a remote (HTTP) location
        and returns the content-type and a file-like object containing
        the media content.

        The file-like object is a temporary file that - depending on the
        size - lives in memory or on disk. Once the file is closed, the
        contents are removed from storage.

        :param url: the URL of the media asset.
        :type url: str.
        :param partial_fetch: determines if the the complete file should
            be fetched, or if only the first 2 MB should be retrieved.
            This feature is used to prevent complete retrieval of large
            a/v material.
        :type partial_fetch: bool.
        :returns: a tuple with the ``content-type``, ``content-lenght``
            and a file-like object containing the media content. The
            value of ``content-length`` will be ``None`` in case
            a partial fetch is requested and ``content-length`` is not
            returned by the remote server.
        """

        http_resp = self.http_session.get(url, stream=True, timeout=(60, 120))
        http_resp.raise_for_status()

        if not os.path.exists(TEMP_DIR_PATH):
            log.debug('Creating temp directory %s' % TEMP_DIR_PATH)
            os.makedirs(TEMP_DIR_PATH)

        # Create a temporary file to store the media item, write the file
        # to disk if it is larger than 1 MB.
        media_file = SpooledTemporaryFile(max_size=1024 * 1024,
                                          prefix='oad_m_',
                                          suffix='.tmp',
                                          dir=TEMP_DIR_PATH)

        # When a partial fetch is requested, request up to two MB
        partial_target_size = 1024 * 1024 * 2
        content_length = http_resp.headers.get('content-length')
        if content_length and int(content_length) < partial_target_size:
            partial_target_size = int(content_length)

        retrieved_bytes = 0
        for chunk in http_resp.iter_content(chunk_size=512 * 1024):
            if chunk:  # filter out keep-alive chunks
                media_file.write(chunk)
                retrieved_bytes += len(chunk)

            if partial_fetch and retrieved_bytes >= partial_target_size:
                break

        media_file.flush()
        log.debug('Fetched media item %s [%s/%s]' %
                  (url, retrieved_bytes, content_length))

        # If the server doens't provide a content-length and this isn't
        # a partial fetch, determine the size by looking at the retrieved
        # content
        if not content_length and not partial_fetch:
            media_file.seek(0, 2)
            content_length = media_file.tell()

        return (http_resp.headers.get('content-type'), content_length,
                media_file)
Пример #16
0
    def upload_file(self):
        stream = None
        temp_file = None
        logger.debug('Upload Begin')

        expected_size = int(request.headers['Content-Length'])

        logger.debug('Expected Size: ' + str(expected_size))

        if not expected_size:
            return {'error_message': 'No File Specified'}

        curr_user = self.manager.get_curr_user()

        if not curr_user:
            #user = self.manager.get_anon_user()
            #force_coll = 'temp'
            #is_anon = True

            return {
                'error_message':
                'Sorry, uploads only available for logged-in users'
            }

        user = curr_user
        force_coll = request.query.getunicode('force-coll', '')
        is_anon = False

        size_rem = self.manager.get_size_remaining(user)

        logger.debug('User Size Rem: ' + str(size_rem))

        if size_rem < expected_size:
            return {
                'error_message': 'Sorry, not enough space to upload this file'
            }

        if force_coll and not self.manager.has_collection(user, force_coll):
            if is_anon:
                self.manager.create_collection(user, force_coll,
                                               'Temporary Collection')

            else:
                status = 'Collection {0} not found'.format(force_coll)
                return {'error_message': status}

        temp_file = SpooledTemporaryFile(max_size=BLOCK_SIZE)

        filename = request.query.getunicode('filename')

        stream = request.environ['wsgi.input']
        stream = CacheingLimitReader(stream, expected_size, temp_file)

        if filename.endswith('.har'):
            stream, expected_size = self.har2warc(filename, stream)
            temp_file.close()
            temp_file = stream

        infos = self.parse_uploaded(stream, expected_size)

        total_size = temp_file.tell()
        if total_size != expected_size:
            return {
                'error_message':
                'size mismatch: expected {0}, got {1}'.format(
                    expected_size, total_size)
            }

        upload_id = self._get_upload_id()

        upload_key = self.upload_key.format(user=user, upid=upload_id)

        with redis_pipeline(self.manager.redis) as pi:
            pi.hset(upload_key, 'size', 0)
            pi.hset(upload_key, 'total_size', total_size * 2)
            pi.hset(upload_key, 'filename', filename)
            pi.hset(upload_key, 'total_files', 1)
            pi.hset(upload_key, 'files', 1)

        return self.handle_upload(temp_file, upload_id, upload_key, infos,
                                  filename, user, force_coll, total_size)
Пример #17
0
        else:
            U = urllib2.urlparse.urlparse(R.url)
            fname = os.path.basename(U.path)
            print ' Save as', fname

        F = SpooledTemporaryFile(max_size=R.info().get('content-length', 0))

        while True:
            D = R.read(1024 * 1024)
            if len(D) == 0:
                break
            F.write(D)

    except IOError:
        traceback.print_exc()
        continue

    else:
        FS = F.tell()
        F.seek(0, 0)

        DF = File(F)
        DF.size = FS

        I.file.save(fname, DF)

    finally:
        R.close()

print 'Done'
Пример #18
0
class Buffer(FileWrapper):
    """Class implementing buffering of input and output streams.
    
    This class uses a separate buffer file to hold the contents of the
    underlying file while they are being manipulated.  As data is read
    it is duplicated into the buffer, and data is written from the buffer
    back to the file on close.
    """

    def __init__(self, fileobj, mode=None, max_size_in_memory=1024 * 8):
        """Buffered file wrapper constructor."""
        self._buffer = SpooledTemporaryFile(max_size=max_size_in_memory)
        self._in_eof = False
        self._in_pos = 0
        self._was_truncated = False
        super(Buffer, self).__init__(fileobj, mode)

    def _buffer_size(self):
        try:
            return len(self._buffer.file.getvalue())
        except AttributeError:
            return os.fstat(self._buffer.fileno()).st_size

    def _buffer_chunks(self):
        chunk = self._buffer.read(16 * 1024)
        if chunk == "":
            yield chunk
        else:
            while chunk != "":
                yield chunk
                chunk = self._buffer.read(16 * 1024)

    def _write_out_buffer(self):
        if self._check_mode("r"):
            self._read_rest()
            if "a" in self.mode:
                self._buffer.seek(self._in_pos)
                self._fileobj.seek(self._in_pos)
            else:
                self._fileobj.seek(0)
                self._buffer.seek(0)
        else:
            self._buffer.seek(0)
        if self._was_truncated:
            self._fileobj.truncate(0)
            self._was_truncated = False
        for chunk in self._buffer_chunks():
            self._fileobj.write(chunk)

    def flush(self):
        # flush the buffer; we only write to the underlying file on close
        self._buffer.flush()

    def close(self):
        if self.closed:
            return
        if self._check_mode("w"):
            self._write_out_buffer()
        super(Buffer, self).close()
        self._buffer.close()

    def _read(self, sizehint=-1):
        #  First return any data available from the buffer.
        #  Since we don't flush the buffer after every write, certain OSes
        #  (guess which!) will happily read junk data from the end of it.
        #  Instead, we explicitly read only up to self._in_pos.
        if not self._in_eof:
            buffered_size = self._in_pos - self._buffer.tell()
            if sizehint >= 0:
                buffered_size = min(sizehint, buffered_size)
        else:
            buffered_size = sizehint
        data = self._buffer.read(buffered_size)
        if data != "":
            return data
        # Then look for more data in the underlying file
        if self._in_eof:
            return None
        data = self._fileobj.read(sizehint)
        self._in_pos += len(data)
        self._buffer.write(data)
        if sizehint < 0 or len(data) < sizehint:
            self._in_eof = True
            self._buffer.flush()
        return data

    def _write(self, data, flushing=False):
        self._buffer.write(data)
        if self._check_mode("r") and not self._in_eof:
            diff = self._buffer.tell() - self._in_pos
            if diff > 0:
                junk = self._fileobj.read(diff)
                self._in_pos += len(junk)
                if len(junk) < diff:
                    self._in_eof = True
                    self._buffer.flush()

    def _seek(self, offset, whence):
        # Ensure we've read enough to simply do the seek on the buffer
        if self._check_mode("r") and not self._in_eof:
            if whence == 0:
                if offset > self._in_pos:
                    self._read_rest()
            if whence == 1:
                if self._buffer.tell() + offset > self._in_pos:
                    self._read_rest()
            if whence == 2:
                self._read_rest()
        # Then just do it on the buffer...
        self._buffer.seek(offset, whence)

    def _tell(self):
        return self._buffer.tell()

    def _truncate(self, size):
        if self._check_mode("r") and not self._in_eof:
            if size > self._in_pos:
                self._read_rest()
        self._in_eof = True
        try:
            self._buffer.truncate(size)
        except TypeError:
            et, ev, tb = sys.exc_info()
            # SpooledTemporaryFile.truncate() doesn't accept size paramter.
            try:
                self._buffer._file.truncate(size)
            except Exception:
                raise et, ev, tb
        # StringIO objects don't truncate to larger size correctly.
        if hasattr(self._buffer, "_file"):
            _file = self._buffer._file
            if hasattr(_file, "getvalue"):
                if len(_file.getvalue()) != size:
                    curpos = _file.tell()
                    _file.seek(0, 2)
                    _file.write("\x00" * (size - len(_file.getvalue())))
                    _file.seek(curpos)
        self._was_truncated = True

    def _read_rest(self):
        """Read the rest of the input stream."""
        if self._in_eof:
            return
        pos = self._buffer.tell()
        self._buffer.seek(0, 2)
        data = self._fileobj.read(self._bufsize)
        while data:
            self._in_pos += len(data)
            self._buffer.write(data)
            data = self._fileobj.read(self._bufsize)
        self._in_eof = True
        self._buffer.flush()
        self._buffer.seek(pos)
Пример #19
0
class Buffer(FileWrapper):
    """Class implementing buffering of input and output streams.
    
    This class uses a separate buffer file to hold the contents of the
    underlying file while they are being manipulated.  As data is read
    it is duplicated into the buffer, and data is written from the buffer
    back to the file on close.
    """
    def __init__(self, fileobj, mode=None, max_size_in_memory=1024 * 8):
        """Buffered file wrapper constructor."""
        self._buffer = SpooledTemporaryFile(max_size=max_size_in_memory)
        self._in_eof = False
        self._in_pos = 0
        self._was_truncated = False
        super(Buffer, self).__init__(fileobj, mode)

    def _buffer_size(self):
        try:
            return len(self._buffer.file.getvalue())
        except AttributeError:
            return os.fstat(self._buffer.fileno()).st_size

    def _buffer_chunks(self):
        chunk = self._buffer.read(16 * 1024)
        if chunk == "":
            yield chunk
        else:
            while chunk != "":
                yield chunk
                chunk = self._buffer.read(16 * 1024)

    def _write_out_buffer(self):
        if self._check_mode("r"):
            self._read_rest()
            if "a" in self.mode:
                self._buffer.seek(self._in_pos)
                self._fileobj.seek(self._in_pos)
            else:
                self._fileobj.seek(0)
                self._buffer.seek(0)
        else:
            self._buffer.seek(0)
        if self._was_truncated:
            self._fileobj.truncate(0)
            self._was_truncated = False
        for chunk in self._buffer_chunks():
            self._fileobj.write(chunk)

    def flush(self):
        # flush the buffer; we only write to the underlying file on close
        self._buffer.flush()

    def close(self):
        if self.closed:
            return
        if self._check_mode("w"):
            self._write_out_buffer()
        super(Buffer, self).close()
        self._buffer.close()

    def _read(self, sizehint=-1):
        #  First return any data available from the buffer.
        #  Since we don't flush the buffer after every write, certain OSes
        #  (guess which!) will happily read junk data from the end of it.
        #  Instead, we explicitly read only up to self._in_pos.
        if not self._in_eof:
            buffered_size = self._in_pos - self._buffer.tell()
            if sizehint >= 0:
                buffered_size = min(sizehint, buffered_size)
        else:
            buffered_size = sizehint
        data = self._buffer.read(buffered_size)
        if data != "":
            return data
        # Then look for more data in the underlying file
        if self._in_eof:
            return None
        data = self._fileobj.read(sizehint)
        self._in_pos += len(data)
        self._buffer.write(data)
        if sizehint < 0 or len(data) < sizehint:
            self._in_eof = True
            self._buffer.flush()
        return data

    def _write(self, data, flushing=False):
        self._buffer.write(data)
        if self._check_mode("r") and not self._in_eof:
            diff = self._buffer.tell() - self._in_pos
            if diff > 0:
                junk = self._fileobj.read(diff)
                self._in_pos += len(junk)
                if len(junk) < diff:
                    self._in_eof = True
                    self._buffer.flush()

    def _seek(self, offset, whence):
        # Ensure we've read enough to simply do the seek on the buffer
        if self._check_mode("r") and not self._in_eof:
            if whence == 0:
                if offset > self._in_pos:
                    self._read_rest()
            if whence == 1:
                if self._buffer.tell() + offset > self._in_pos:
                    self._read_rest()
            if whence == 2:
                self._read_rest()
        # Then just do it on the buffer...
        self._buffer.seek(offset, whence)

    def _tell(self):
        return self._buffer.tell()

    def _truncate(self, size):
        if self._check_mode("r") and not self._in_eof:
            if size > self._in_pos:
                self._read_rest()
        self._in_eof = True
        try:
            self._buffer.truncate(size)
        except TypeError:
            et, ev, tb = sys.exc_info()
            # SpooledTemporaryFile.truncate() doesn't accept size paramter.
            try:
                self._buffer._file.truncate(size)
            except Exception:
                raise et, ev, tb
        # StringIO objects don't truncate to larger size correctly.
        if hasattr(self._buffer, "_file"):
            _file = self._buffer._file
            if hasattr(_file, "getvalue"):
                if len(_file.getvalue()) != size:
                    curpos = _file.tell()
                    _file.seek(0, 2)
                    _file.write("\x00" * (size - len(_file.getvalue())))
                    _file.seek(curpos)
        self._was_truncated = True

    def _read_rest(self):
        """Read the rest of the input stream."""
        if self._in_eof:
            return
        pos = self._buffer.tell()
        self._buffer.seek(0, 2)
        data = self._fileobj.read(self._bufsize)
        while data:
            self._in_pos += len(data)
            self._buffer.write(data)
            data = self._fileobj.read(self._bufsize)
        self._in_eof = True
        self._buffer.flush()
        self._buffer.seek(pos)
Пример #20
0
    def fetch_pack_from_origin(
        self,
        origin_url: str,
        base_repo: RepoRepresentation,
        do_activity: Callable[[bytes], None],
    ) -> FetchPackReturn:
        """Fetch a pack from the origin"""

        pack_buffer = SpooledTemporaryFile(max_size=self.temp_file_cutoff)
        transport_url = origin_url

        logger.debug("Transport url to communicate with server: %s", transport_url)

        client, path = dulwich.client.get_transport_and_path(
            transport_url, thin_packs=False
        )

        logger.debug("Client %s to fetch pack at %s", client, path)

        size_limit = self.pack_size_bytes

        def do_pack(data: bytes) -> None:
            cur_size = pack_buffer.tell()
            would_write = len(data)
            if cur_size + would_write > size_limit:
                raise IOError(
                    f"Pack file too big for repository {origin_url}, "
                    f"limit is {size_limit} bytes, current size is {cur_size}, "
                    f"would write {would_write}"
                )

            pack_buffer.write(data)

        pack_result = client.fetch_pack(
            path,
            base_repo.determine_wants,
            base_repo.graph_walker(),
            do_pack,
            progress=do_activity,
        )

        remote_refs = pack_result.refs or {}
        symbolic_refs = pack_result.symrefs or {}

        pack_buffer.flush()
        pack_size = pack_buffer.tell()
        pack_buffer.seek(0)

        logger.debug("fetched_pack_size=%s", pack_size)

        # check if repository only supports git dumb transfer protocol,
        # fetched pack file will be empty in that case as dulwich do
        # not support it and do not fetch any refs
        self.dumb = transport_url.startswith("http") and getattr(client, "dumb", False)

        return FetchPackReturn(
            remote_refs=utils.filter_refs(remote_refs),
            symbolic_refs=utils.filter_refs(symbolic_refs),
            pack_buffer=pack_buffer,
            pack_size=pack_size,
        )
Пример #21
0
    def fetch_media(self, url, partial_fetch=False):
        """Retrieves a given media object from a remote (HTTP) location
        and returns the content-type and a file-like object containing
        the media content.

        The file-like object is a temporary file that - depending on the
        size - lives in memory or on disk. Once the file is closed, the
        contents are removed from storage.

        :param url: the URL of the media asset.
        :type url: str.
        :param partial_fetch: determines if the the complete file should
            be fetched, or if only the first 2 MB should be retrieved.
            This feature is used to prevent complete retrieval of large
            a/v material.
        :type partial_fetch: bool.
        :returns: a tuple with the ``content-type``, ``content-lenght``
            and a file-like object containing the media content. The
            value of ``content-length`` will be ``None`` in case
            a partial fetch is requested and ``content-length`` is not
            returned by the remote server.
        """

        http_resp = self.http_session.get(url, stream=True, timeout=(60, 120))
        http_resp.raise_for_status()

        if not os.path.exists(TEMP_DIR_PATH):
            log.debug('Creating temp directory %s' % TEMP_DIR_PATH)
            os.makedirs(TEMP_DIR_PATH)

        # Create a temporary file to store the media item, write the file
        # to disk if it is larger than 1 MB.
        media_file = SpooledTemporaryFile(max_size=1024*1024, prefix='ocd_m_',
                                          suffix='.tmp',
                                          dir=TEMP_DIR_PATH)

        # When a partial fetch is requested, request up to two MB
        partial_target_size = 1024*1024*2
        content_length = http_resp.headers.get('content-length')
        if content_length and int(content_length) < partial_target_size:
            partial_target_size = int(content_length)

        retrieved_bytes = 0
        for chunk in http_resp.iter_content(chunk_size=512*1024):
            if chunk:  # filter out keep-alive chunks
                media_file.write(chunk)
                retrieved_bytes += len(chunk)

            if partial_fetch and retrieved_bytes >= partial_target_size:
                break

        media_file.flush()
        log.debug('Fetched media item %s [%s/%s]' % (url, retrieved_bytes,
                                                     content_length))

        # If the server doens't provide a content-length and this isn't
        # a partial fetch, determine the size by looking at the retrieved
        # content
        if not content_length and not partial_fetch:
            media_file.seek(0, 2)
            content_length = media_file.tell()

        return (
            http_resp.headers.get('content-type'),
            content_length,
            media_file
        )
Пример #22
0
class Buffer(FileWrapper):
    """Class implementing buffereing of input and output streams.
    
    This class uses a separate buffer file to hold the contents of the
    underlying file while they are being manipulated.  As data is read
    it is duplicated into the buffer, and data is written from the buffer
    back to the file on close.
    """
    
    def __init__(self,fileobj,mode=None,max_size_in_memory=1024*8):
        """Buffered file wrapper constructor."""
        self._buffer = SpooledTemporaryFile(max_size=max_size_in_memory)
        self._in_eof = False
        self._in_pos = 0
        super(Buffer,self).__init__(fileobj,mode)

    def _buffer_chunks(self):
        chunk = self._buffer.read(16*1024)
        if chunk == "":
            yield chunk
        else:
            while chunk != "":
                yield chunk
                chunk = self._buffer.read(16*1024)

    def _write_out_buffer(self):
        if self._check_mode("r"):
            self._read_rest()
            if "a" in self.mode:
                self._buffer.seek(self._in_pos)
                self._fileobj.seek(self._in_pos)
            else:
                self._fileobj.seek(0)
                self._buffer.seek(0)
        else:
            self._buffer.seek(0)
        for chunk in self._buffer_chunks():
            self._fileobj.write(chunk)
 
    def flush(self):
        # flush the buffer; we only write to the underlying file on close
        self._buffer.flush()

    def close(self):
        if self.closed:
            return
        if self._check_mode("w"):
            self._write_out_buffer()
        super(Buffer,self).close()
        self._buffer.close()

    def _read(self,sizehint=-1):
        #  First return any data available from the buffer.
        #  Since we don't flush the buffer after every write, certain OSes
        #  (guess which!) will happy read junk data from the end of it.
        #  Instead, we explicitly read only up to self._in_pos.
        if not self._in_eof:
            buffered_size = self._in_pos - self._buffer.tell()
            if sizehint >= 0:
                buffered_size = min(sizehint,buffered_size)
        else:
            buffered_size = sizehint
        data = self._buffer.read(buffered_size)
        if data != "":
            return data
        # Then look for more data in the underlying file
        if self._in_eof:
            return None
        data = self._fileobj.read(sizehint)
        self._in_pos += len(data)
        self._buffer.write(data)
        if sizehint < 0 or len(data) < sizehint:
            self._in_eof = True
            self._buffer.flush()
        return data

    def _write(self,data,flushing=False):
        self._buffer.write(data)
        if self._check_mode("r") and not self._in_eof:
            diff = self._buffer.tell() - self._in_pos
            if diff > 0:
                junk = self._fileobj.read(diff)
                self._in_pos += len(junk)
                if len(junk) < diff:
                    self._in_eof = True
                    self._buffer.flush()
    
    def _seek(self,offset,whence):
        # Ensure we've read enough to simply do the seek on the buffer
        if self._check_mode("r") and not self._in_eof:
            if whence == 0:
                if offset > self._in_pos:
                    self._read_rest()
            if whence == 1:
                if self._buffer.tell() + offset > self._in_pos:
                    self._read_rest()
            if whence == 2:
                self._read_rest()
        # Then just do it on the buffer...
        self._buffer.seek(offset,whence)

    def _tell(self):
        return self._buffer.tell()
        
    def _read_rest(self):
        """Read the rest of the input stream."""
        if self._in_eof:
            return
        pos = self._buffer.tell()
        self._buffer.seek(0,2)
        data = self._fileobj.read(self._bufsize)
        while data:
            self._in_pos += len(data)
            self._buffer.write(data)
            data = self._fileobj.read(self._bufsize)
        self._in_eof = True 
        self._buffer.flush()
        self._buffer.seek(pos)
Пример #23
0
class TestFile(object):
    CACHE_LIMIT = 0x80000  # data cache limit per file: 512KB
    XFER_BUF = 0x10000  # transfer buffer size: 64KB

    __slots__ = ("_file_name", "_fp")

    def __init__(self, file_name):
        # This is a naive fix for a larger path issue. This is a simple sanity
        # check and does not check if invalid characters are used. If an invalid
        # file name is used an exception will be raised when trying to write
        # that file to the file system.
        if "\\" in file_name:
            file_name = file_name.replace("\\", "/")
        if file_name.startswith("/"):
            file_name = file_name.lstrip("/")
        if file_name.endswith("."):
            file_name = file_name.rstrip(".")
        if not file_name \
                or ("/" in file_name and not file_name.rsplit("/", 1)[-1]) \
                or file_name.startswith("../"):
            raise TypeError("file_name is invalid %r" % (file_name, ))
        # name including path relative to wwwroot
        self._file_name = os.path.normpath(file_name)
        self._fp = SpooledTemporaryFile(dir=grz_tmp("storage"),
                                        max_size=self.CACHE_LIMIT,
                                        prefix="testfile_")

    def __enter__(self):
        return self

    def __exit__(self, *exc):
        self.close()

    def clone(self):
        """Make a copy of the TestFile.

        Args:
            None

        Returns:
            TestFile: A copy of the TestFile instance
        """
        cloned = type(self)(self._file_name)
        self._fp.seek(0)
        shutil.copyfileobj(self._fp, cloned._fp, self.XFER_BUF)  # pylint: disable=protected-access
        return cloned

    def close(self):
        """Close the TestFile.

        Args:
            None

        Returns:
            None
        """
        self._fp.close()

    @property
    def data(self):
        """Get the data from the TestFile. Not recommenced for large files.

        Args:
            None

        Returns:
            bytes: Data from the TestFile
        """
        pos = self._fp.tell()
        self._fp.seek(0)
        data = self._fp.read()
        self._fp.seek(pos)
        return data

    def dump(self, path):
        """Write TestFile data to the filesystem.

        Args:
            path (str): Path to output data.

        Returns:
            None
        """
        target_path = os.path.join(path, os.path.dirname(self._file_name))
        if not os.path.isdir(target_path):
            os.makedirs(target_path)
        self._fp.seek(0)
        with open(os.path.join(path, self._file_name), "wb") as dst_fp:
            shutil.copyfileobj(self._fp, dst_fp, self.XFER_BUF)

    @property
    def file_name(self):
        return self._file_name

    @classmethod
    def from_data(cls, data, file_name, encoding="UTF-8"):
        """Create a TestFile and add it to the test case.

        Args:
            data (bytes or str): Data to write to file. If data is of type str
                                 encoding must be given.
            file_name (str): Name for the TestFile.
            encoding (str): Encoding to be used.

        Returns:
            TestFile: A TestFile.
        """
        t_file = cls(file_name)
        if data:
            if isinstance(data, bytes) or not encoding:
                t_file.write(data)
            else:
                t_file.write(data.encode(encoding))
        return t_file

    @classmethod
    def from_file(cls, input_file, file_name=None):
        """Create a TestFile from an existing file.

        Args:
            input_file (str): Path to existing file to use.
            file_name (str): Name for the TestFile. If file_name is not given
                             the name of the input_file will be used.

        Returns:
            TestFile: A TestFile.
        """
        if file_name is None:
            file_name = os.path.basename(input_file)
        t_file = cls(file_name)
        with open(input_file, "rb") as src_fp:
            shutil.copyfileobj(src_fp, t_file._fp, cls.XFER_BUF)  # pylint: disable=protected-access
        return t_file

    @property
    def size(self):
        """Size of the file in bytes.

        Args:
            None

        Returns:
            int: Size in bytes.
        """
        pos = self._fp.tell()
        self._fp.seek(0, os.SEEK_END)
        size = self._fp.tell()
        self._fp.seek(pos)
        return size

    def write(self, data):
        """Add data to the TestFile.

        Args:
            data (bytes): Data to add to the TestFile.

        Returns:
            None
        """
        self._fp.write(data)
Пример #24
0
        def upload_file():
            stream = None
            upload = None

            try:
                upload = request.files.get('upload-file')

                if not upload:
                    return {'error_message': 'No File Specified'}

                curr_user = self.manager.get_curr_user()

                if not curr_user:
                    #user = self.manager.get_anon_user()
                    #force_coll = 'temp'
                    #is_anon = True

                    return {'error_message': 'Sorry, uploads only available for logged-in users'}

                user = curr_user
                force_coll = request.forms.getunicode('force-coll', '')
                is_anon = False

                if force_coll and not self.manager.has_collection(user, force_coll):
                    if is_anon:
                        self.manager.create_collection(user, force_coll, 'Temporary Collection')

                    else:
                        status = 'Collection {0} not found'.format(force_coll)
                        return {'error_message': status}

                stream = SpooledTemporaryFile(max_size=BLOCK_SIZE)

                logger.debug('Upload Start, Saving')

                upload.save(stream)

                size_rem = self.manager.get_size_remaining(user)

                logger.debug('Size Rem: ' + str(size_rem))

                expected_size = stream.tell()

                logger.debug('Expected Size: ' + str(expected_size))

                if size_rem < expected_size:
                    return {'error_message': 'Sorry, not enough space to upload this file'}

                filename = upload.filename

                logger.debug('Filename: ' + filename)

                new_coll, error_message = self.handle_upload(stream, filename, user, force_coll)

                if new_coll:
                    msg = 'Uploaded file <b>{1}</b> into collection <b>{0}</b>'.format(new_coll['title'], filename)

                    self.flash_message(msg, 'success')

                    return {'uploaded': 'true',
                            'user': user,
                            'coll': new_coll['id']}

                else:
                    print(error_message)
                    return {'error_message': error_message}

            except Exception as e:
                traceback.print_exc()
                return {'error_message': str(e)}

            finally:
                if upload:
                    upload.file.close()

                if stream:
                    stream.close()
Пример #25
0
class UploadFile:
    """
    An uploaded file included as part of the request data.
    """

    __slots__ = ("filename", "content_type", "file")

    spool_max_size = 1024 * 1024

    def __init__(self, filename: str, content_type: str = "") -> None:
        self.filename = filename
        self.content_type = content_type
        self.file = SpooledTemporaryFile(max_size=self.spool_max_size,
                                         mode="w+b")

    @property
    def in_memory(self) -> bool:
        rolled_to_disk = getattr(self.file, "_rolled", True)
        return not rolled_to_disk

    def write(self, data: bytes) -> None:
        self.file.write(data)

    async def awrite(self, data: bytes) -> None:
        if self.in_memory:
            self.write(data)
        else:
            await asyncio.get_event_loop().run_in_executor(
                None, self.write, data)

    def read(self, size: int = -1) -> bytes:
        return self.file.read(size)

    async def aread(self, size: int = -1) -> bytes:
        if self.in_memory:
            return self.read(size)
        return await asyncio.get_event_loop().run_in_executor(
            None, self.read, size)

    def seek(self, offset: int) -> None:
        self.file.seek(offset)

    async def aseek(self, offset: int) -> None:
        if self.in_memory:
            self.seek(offset)
        else:
            await asyncio.get_event_loop().run_in_executor(
                None, self.seek, offset)

    def close(self) -> None:
        self.file.close()

    async def aclose(self) -> None:
        if self.in_memory:
            self.close()
        else:
            await asyncio.get_event_loop().run_in_executor(None, self.close)

    def save(self, filepath: str) -> None:
        """
        Save file to disk.
        """
        # from shutil.COPY_BUFSIZE
        copy_bufsize = 1024 * 1024 if os.name == "nt" else 64 * 1024
        file_position = self.file.tell()
        self.file.seek(0, 0)
        try:
            with open(filepath, "wb+") as target_file:
                source_read = self.file.read
                target_write = target_file.write
                while True:
                    buf = source_read(copy_bufsize)
                    if not buf:
                        break
                    target_write(buf)
        finally:
            self.file.seek(file_position)

    async def asave(self, filepath: str) -> None:
        """
        Save file to disk, work in threading pool.
        """
        await asyncio.get_event_loop().run_in_executor(None, self.save,
                                                       filepath)
Пример #26
0
class TPCTemporaryStorage(object):
    __slots__ = (
        '_queue',
        '_queue_contents',
    )

    def __init__(self):
        # start with a fresh in-memory buffer instead of reusing one that might
        # already be spooled to disk.
        # TODO: An alternate idea would be a temporary sqlite database.
        self._queue = SpooledTemporaryFile(max_size=10 * 1024 * 1024)
        # {oid: (startpos, endpos, prev_tid_int)}
        self._queue_contents = OidObjectMap()

    def reset(self):
        self._queue_contents.clear()
        self._queue.seek(0)

    def store_temp(self, oid_int, state, prev_tid_int=0):
        """
        Queue an object for caching.

        Typically, we can't actually cache the object yet, because its
        transaction ID is not yet chosen.
        """
        queue = self._queue
        queue.seek(0, 2)  # seek to end
        startpos = queue.tell()
        queue.write(state)
        endpos = queue.tell()
        self._queue_contents[oid_int] = (startpos, endpos, prev_tid_int)

    def __len__(self):
        # How many distinct OIDs have been stored?
        # This also lets us be used in a boolean context to see
        # if we've actually stored anything or are closed.
        return len(self._queue_contents)

    @property
    def stored_oids(self):
        return self._queue_contents

    @property
    def max_stored_oid(self):
        return OidObjectMap_max_key(self._queue_contents)

    def _read_temp_state(self, startpos, endpos):
        self._queue.seek(startpos)
        length = endpos - startpos
        state = self._queue.read(length)
        if len(state) != length:
            raise AssertionError("Queued cache data is truncated")
        return state

    def read_temp(self, oid_int):
        """
        Return the bytes for a previously stored temporary item.
        """
        startpos, endpos, _ = self._queue_contents[oid_int]
        return self._read_temp_state(startpos, endpos)

    def __iter__(self):
        return self.iter_for_oids(None)

    def iter_for_oids(self, oids):
        read_temp_state = self._read_temp_state
        for startpos, endpos, oid_int, prev_tid_int in self.items(oids):
            state = read_temp_state(startpos, endpos)
            yield state, oid_int, prev_tid_int

    def items(self, oids=None):
        # Order the queue by file position, which should help
        # if the file is large and needs to be read
        # sequentially from disk.
        items = [(startpos, endpos, oid_int, prev_tid_int)
                 for (oid_int,
                      (startpos, endpos,
                       prev_tid_int)) in iteroiditems(self._queue_contents)
                 if oids is None or oid_int in oids]
        items.sort()
        return items

    def close(self):
        if self._queue is not None:
            self._queue.close()
            self._queue = None
            self._queue_contents = ()  # Not None so len() keeps working

    def __repr__(self):
        approx_size = 0
        if self._queue is not None:
            self._queue.seek(0, 2)  # seek to end
            # The number of bytes we stored isn't necessarily the
            # number of bytes we send to the server, if there are duplicates
            approx_size = self._queue.tell()
        return "<%s at 0x%x count=%d bytes=%d>" % (
            type(self).__name__, id(self), len(self), approx_size)

    def __str__(self):
        base = repr(self)
        if not self:
            return base

        out = NStringIO()

        div = '=' * len(base)
        headings = ['OID', 'Length', 'Previous TID']
        col_width = (len(base) - 5) // len(headings)

        print(base, file=out)
        print(div, file=out)
        print('| ', file=out, end='')
        for heading in headings:
            print('%-*s' % (col_width, heading), end='', file=out)
            print('| ', end='', file=out)
        out.seek(out.tell() - 3)
        print('|', file=out)
        print(div, file=out)

        items = sorted(
            (oid_int, endpos - startpos, prev_tid_int)
            for (startpos, endpos, oid_int, prev_tid_int) in self.items())

        for oid_int, length, prev_tid_int in items:
            print('%*d  |%*d |%*d' % (col_width, oid_int, col_width, length,
                                      col_width, prev_tid_int),
                  file=out)

        return out.getvalue()