Пример #1
0
    def test_encryption_multipart_upload(self):
        import math, os
        from ks3.encryption import Crypts
        Crypts.generate_key('D:/', 'key.txt')
        c = Connection(ak,
                       sk,
                       host="",
                       is_secure=False,
                       domain_mode=False,
                       local_encrypt=True,
                       local_key_path="D:/key.txt")
        from filechunkio import FileChunkIO
        bucket = c.get_bucket(test_bucket)

        source_path = 'D:/1.exe'
        source_size = os.stat(source_path).st_size
        mp = bucket.initiate_multipart_upload(os.path.basename(source_path),
                                              calc_encrypt_md5=False)
        chunk_size = 5242880
        chunk_count = int(math.ceil(source_size // chunk_size))
        print(chunk_count)
        for i in range(chunk_count + 1):
            offset = chunk_size * i
            last = False
            bytes = min(chunk_size, source_size - offset)
            if i == chunk_count + 1:
                last = True
            with FileChunkIO(source_path, 'r', offset=offset,
                             bytes=bytes) as fp:
                mp.upload_part_from_file(fp, part_num=i + 1, is_last_part=last)

        mp.complete_upload()
Пример #2
0
    def initiate_multipart_upload(self, key_name, headers=None,
                                  reduced_redundancy=False,
                                  metadata=None, encrypt_key=False,
                                  policy=None, calc_encrypt_md5=True):
        """
        Start a multipart upload operation.
            Note: After you initiate multipart upload and upload one or more
            parts, you must either complete or abort multipart upload in order
            to stop getting charged for storage of the uploaded parts. Only
            after you either complete or abort multipart upload, Amazon S3
            frees up the parts storage and stops charging you for the parts
            storage.
        """
        query_args = 'uploads'
        provider = self.connection.provider
        headers = headers or {}
        if policy:
            headers[provider.acl_header] = policy
        if reduced_redundancy:
            storage_class_header = provider.storage_class_header
            if storage_class_header:
                headers[storage_class_header] = 'REDUCED_REDUNDANCY'
            # TODO: what if the provider doesn't support reduced redundancy?
        if encrypt_key:
            headers[provider.server_side_encryption_header] = 'AES256'
        if metadata is None:
            metadata = {}

        headers = ks3.utils.merge_meta(headers, metadata,
                self.connection.provider)
        if self.connection.local_encrypt:
            crypts = Crypts(self.connection.key)
            crypts.calc_md5 = calc_encrypt_md5
            crypts.action_info = "init_multi"
            md5_generator = hashlib.md5()
            md5_generator.update(crypts.key)
            headers["x-kss-meta-key"] = base64.b64encode(md5_generator.hexdigest().encode()).decode()
            headers["x-kss-meta-iv"] = base64.b64encode(crypts.first_iv).decode()
            response = self.connection.make_request('POST', self.name, key_name,
                                                query_args=query_args,
                                                headers=headers)
        else:
            response = self.connection.make_request('POST', self.name, key_name,
                                                    query_args=query_args,
                                                    headers=headers)
        body = response.read()
        if response.status == 200:
            resp = MultiPartUpload(self)
            if self.connection.local_encrypt:
                resp.set_crypt_context(crypts)
            h = handler.XmlHandler(resp, self)
            if not isinstance(body, bytes):
                body = body.encode('utf-8')
            xml.sax.parseString(body, h)
            return resp
        else:
            raise self.connection.provider.storage_response_error(
                response.status, response.reason, body)
Пример #3
0
    def initiate_multipart_upload(self, key_name, headers=None,
                                  reduced_redundancy=False,
                                  metadata=None, encrypt_key=False,
                                  policy=None, calc_encrypt_md5=True):
        """
        Start a multipart upload operation.
            Note: After you initiate multipart upload and upload one or more
            parts, you must either complete or abort multipart upload in order
            to stop getting charged for storage of the uploaded parts. Only
            after you either complete or abort multipart upload, Amazon S3
            frees up the parts storage and stops charging you for the parts
            storage.
        """
        query_args = 'uploads'
        provider = self.connection.provider
        headers = headers or {}
        if policy:
            headers[provider.acl_header] = policy
        if reduced_redundancy:
            storage_class_header = provider.storage_class_header
            if storage_class_header:
                headers[storage_class_header] = 'REDUCED_REDUNDANCY'
            # TODO: what if the provider doesn't support reduced redundancy?
        if encrypt_key:
            headers[provider.server_side_encryption_header] = 'AES256'
        if metadata is None:
            metadata = {}

        headers = ks3.utils.merge_meta(headers, metadata,
                self.connection.provider)
        if self.connection.local_encrypt:
            crypts = Crypts(self.connection.key)
            crypts.calc_md5 = calc_encrypt_md5
            crypts.action_info = "init_multi"
            md5_generator = hashlib.md5()
            md5_generator.update(crypts.key)
            headers["x-kss-meta-key"] = base64.b64encode(md5_generator.hexdigest())
            headers["x-kss-meta-iv"] = base64.b64encode(crypts.first_iv)
            response = self.connection.make_request('POST', self.name, key_name,
                                                query_args=query_args,
                                                headers=headers)
        else:
            response = self.connection.make_request('POST', self.name, key_name,
                                                    query_args=query_args,
                                                    headers=headers)
        body = response.read()
        if response.status == 200:
            resp = MultiPartUpload(self)
            if self.connection.local_encrypt:
                resp.set_crypt_context(crypts)
            h = handler.XmlHandler(resp, self)
            if not isinstance(body, bytes):
                body = body.encode('utf-8')
            xml.sax.parseString(body, h)
            return resp
        else:
            raise self.connection.provider.storage_response_error(
                response.status, response.reason, body)
Пример #4
0
 def test_encryption_upload(self):
     from ks3.encryption import Crypts
     Crypts.generate_key('D:/', 'key.txt')
     c = Connection(ak,
                    sk,
                    host="",
                    is_secure=False,
                    domain_mode=False,
                    local_encrypt=True,
                    local_key_path="D:/key.txt")
     b = c.get_bucket(test_bucket)
     #put
     kw = b.new_key(test_key)
     ret = kw.set_contents_from_string("some thing")
     #get
     get_key = b.get_key(test_key)
     s = get_key.get_contents_as_string()
     print("Result:", s)
Пример #5
0
 def __init__(self,
              fp,
              crypt_context,
              type,
              isUploadFirstPart=False,
              isUploadLastPart=False):
     self.fp = fp
     self.first_iv = crypt_context.first_iv
     self.calc_iv = ""
     self.crypt_context = crypt_context
     self.crypt_handler = Crypts(crypt_context.key)
     self.type = type
     self.block_size = 16
     self.isUploadFirstPart = isUploadFirstPart
     self.isUploadLastPart = isUploadLastPart
     self.seek_pos = SEEK_SET
     self.block_count = 0
     self.block_total_count = self.get_total_count(fp)
Пример #6
0
 def __init__(self, fp, crypt_context, type,isUploadFirstPart=False, isUploadLastPart=False):
     self.fp = fp
     self.first_iv = crypt_context.first_iv
     self.calc_iv = ""
     self.crypt_context = crypt_context
     self.crypt_handler = Crypts(crypt_context.key)
     self.type = type
     self.block_size = 16
     self.isUploadFirstPart = isUploadFirstPart
     self.isUploadLastPart = isUploadLastPart
     self.seek_pos = SEEK_SET
     self.block_count = 0
     self.block_total_count = self.get_total_count(fp)
Пример #7
0
    def _get_file_internal(self,
                           fp,
                           headers=None,
                           cb=None,
                           num_cb=10,
                           torrent=False,
                           version_id=None,
                           override_num_retries=None,
                           response_headers=None,
                           hash_algs=None,
                           query_args=None):
        if headers is None:
            headers = {}
        save_debug = self.bucket.connection.debug
        if self.bucket.connection.debug == 1:
            self.bucket.connection.debug = 0

        query_args = query_args or []
        if torrent:
            query_args.append('torrent')

        if hash_algs is None and not torrent:
            hash_algs = {'md5': md5}
        digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})

        # If a version_id is passed in, use that.  If not, check to see
        # if the Key object has an explicit version_id and, if so, use that.
        # Otherwise, don't pass a version_id query param.
        if version_id is None:
            version_id = self.version_id
        if version_id:
            query_args.append('versionId=%s' % version_id)
        if response_headers:
            for key in response_headers:
                query_args.append('%s=%s' %
                                  (key, urllib.quote(response_headers[key])))
        query_args = '&'.join(query_args)
        self.open('r',
                  headers,
                  query_args=query_args,
                  override_num_retries=override_num_retries)

        data_len = 0
        if cb:
            if self.size is None:
                cb_size = 0
            else:
                cb_size = self.size
            if self.size is None and num_cb != -1:
                # If size is not available due to chunked transfer for example,
                # we'll call the cb for every 1MB of data transferred.
                cb_count = (1024 * 1024) / self.BufferSize
            elif num_cb > 1:
                cb_count = int(
                    math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))
            elif num_cb < 0:
                cb_count = -1
            else:
                cb_count = 0
            i = 0
            cb(data_len, cb_size)
        try:
            counter = 1
            last_iv = ""
            total_part = math.ceil(float(self.size) / self.BufferSize)
            for bytes in self:
                if self.bucket.connection.local_encrypt:
                    provider = self.bucket.connection.provider
                    user_key = self.bucket.connection.key
                    crypt_handler = Crypts(user_key)
                    if counter == 1:
                        # For first block, drop first 16 bytes(the subjoin iv).
                        local_iv = bytes[:crypt_handler.block_size]
                        bytes = bytes[crypt_handler.block_size:]
                        server_iv = self.user_meta[provider.metadata_prefix +
                                                   "iv"]
                        server_iv = base64.b64decode(server_iv)
                        if server_iv and local_iv != server_iv:
                            raise ValueError(
                                "decryption error:file.iv not equel server.iv")
                        user_iv = local_iv
                    else:
                        user_iv = last_iv
                    last_iv = bytes[-crypt_handler.block_size:]
                    if counter == total_part:
                        # Special process of the last part with check code appending to it's end.
                        full_content = crypt_handler.decrypt(bytes, user_iv)
                        pad_content_char = full_content[-1]
                        for key in crypt_handler.pad_dict:
                            if crypt_handler.pad_dict[key] == pad_content_char:
                                pad_content_char = key
                        decrypt = full_content[:-int(pad_content_char)]
                    else:
                        decrypt = crypt_handler.decrypt(bytes, user_iv)
                    bytes = decrypt
                    counter += 1
                fp.write(bytes)
                data_len += len(bytes)
                for alg in digesters:
                    digesters[alg].update(bytes)
                if cb:
                    if cb_size > 0 and data_len >= cb_size:
                        break
                    i += 1
                    if i == cb_count or cb_count == -1:
                        cb(data_len, cb_size)
                        i = 0
        except IOError as e:
            if e.errno == errno.ENOSPC:
                raise StorageDataError('Out of space for destination file '
                                       '%s' % fp.name)
            raise
        if cb and (cb_count <= 1 or i > 0) and data_len > 0:
            cb(data_len, cb_size)
        for alg in digesters:
            self.local_hashes[alg] = digesters[alg].digest()
        if self.size is None and not torrent and "Range" not in headers:
            self.size = data_len
        self.close()
        self.bucket.connection.debug = save_debug
Пример #8
0
    def set_contents_from_file(self,
                               fp,
                               headers=None,
                               replace=True,
                               cb=None,
                               num_cb=10,
                               policy=None,
                               md5=None,
                               reduced_redundancy=False,
                               query_args=None,
                               encrypt_key=False,
                               size=None,
                               rewind=False,
                               crypt_context=None,
                               calc_encrypt_md5=True):
        """
        Store an object in S3 using the name of the Key object as the
        key in S3 and the contents of the file pointed to by 'fp' as the
        contents. The data is read from 'fp' from its current position until
        'size' bytes have been read or EOF.
        :type fp: file
        :param fp: the file whose contents to upload

        :type headers: dict
        :param headers: Additional HTTP headers that will be sent with
            the PUT request.

        :type replace: bool
        :param replace: If this parameter is False, the method will
            first check to see if an object exists in the bucket with
            the same key.  If it does, it won't overwrite it.  The
            default value is True which will overwrite the object.

        :type cb: function
        :param cb: a callback function that will be called to report
            progress on the upload.  The callback should accept two
            integer parameters, the first representing the number of
            bytes that have been successfully transmitted to S3 and
            the second representing the size of the to be transmitted
            object.

        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the
            cb parameter this parameter determines the granularity of
            the callback by defining the maximum number of times the
            callback will be called during the file transfer.

        :type policy: :class:`boto.s3.acl.CannedACLStrings`
        :param policy: A canned ACL policy that will be applied to the
            new key in S3.

        :type md5: A tuple containing the hexdigest version of the MD5
            checksum of the file as the first element and the
            Base64-encoded version of the plain checksum as the second
            element.  This is the same format returned by the
            compute_md5 method.
        :type reduced_redundancy: bool
        :param reduced_redundancy: If True, this will set the storage
            class of the new Key to be REDUCED_REDUNDANCY. The Reduced
            Redundancy Storage (RRS) feature of S3, provides lower
            redundancy at lower storage cost.

        :type encrypt_key: bool
        :param encrypt_key: If True, the new copy of the object will
            be encrypted on the server-side by S3 and will be stored
            in an encrypted form while at rest in S3.

        :type size: int
        :param size: (optional) The Maximum number of bytes to read
            from the file pointer (fp). This is useful when uploading
            a file in multiple parts where you are splitting the file
            up into different ranges to be uploaded. If not specified,
            the default behaviour is to read all bytes from the file
            pointer. Less bytes may be available.

        :type rewind: bool
        :param rewind: (optional) If True, the file pointer (fp) will
            be rewound to the start before any bytes are read from
            it. The default behaviour is False which reads from the
            current position of the file pointer (fp).

        :rtype: int
        :return: The number of bytes written to the key.
        """
        provider = self.bucket.connection.provider
        headers = headers or {}
        if policy:
            headers[provider.acl_header] = policy
        if encrypt_key:
            headers[provider.server_side_encryption_header] = 'AES256'

        if rewind:
            # caller requests reading from beginning of fp.
            fp.seek(0, os.SEEK_SET)
        else:
            # The following seek/tell/seek logic is intended
            # to detect applications using the older interface to
            # set_contents_from_file(), which automatically rewound the
            # file each time the Key was reused. This changed with commit
            # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads
            # split into multiple parts and uploaded in parallel, and at
            # the time of that commit this check was added because otherwise
            # older programs would get a success status and upload an empty
            # object. Unfortuantely, it's very inefficient for fp's implemented
            # by KeyFile (used, for example, by gsutil when copying between
            # providers). So, we skip the check for the KeyFile case.
            # TODO: At some point consider removing this seek/tell/seek
            # logic, after enough time has passed that it's unlikely any
            # programs remain that assume the older auto-rewind interface.
            if not isinstance(fp, KeyFile):
                spos = fp.tell()
                fp.seek(0, os.SEEK_END)
                if fp.tell() == spos:
                    fp.seek(0, os.SEEK_SET)
                    if fp.tell() != spos:
                        # Raise an exception as this is likely a programming
                        # error whereby there is data before the fp but nothing
                        # after it.
                        fp.seek(spos)
                        raise AttributeError('fp is at EOF. Use rewind option '
                                             'or seek() to data start.')
                # seek back to the correct position.
                fp.seek(spos)
        if reduced_redundancy:
            self.storage_class = 'REDUCED_REDUNDANCY'
            if provider.storage_class_header:
                headers[provider.storage_class_header] = self.storage_class
                # TODO - What if provider doesn't support reduced reduncancy?
                # What if different providers provide different classes?
        if hasattr(fp, 'name'):
            self.path = fp.name
        if self.bucket is not None:
            if not md5 and provider.supports_chunked_transfer():
                # defer md5 calculation to on the fly and
                # we don't know anything about size yet.
                chunked_transfer = True
                self.size = None
            else:
                chunked_transfer = False
                if isinstance(fp, KeyFile):
                    # Avoid EOF seek for KeyFile case as it's very inefficient.
                    key = fp.getkey()
                    size = key.size - fp.tell()
                    self.size = size
                    # At present both GCS and S3 use MD5 for the etag for
                    # non-multipart-uploaded objects. If the etag is 32 hex
                    # chars use it as an MD5, to avoid having to read the file
                    # twice while transferring.
                    if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)):
                        etag = key.etag.strip('"')
                        md5 = (etag,
                               base64.b64encode(binascii.unhexlify(etag)))
                if not md5:
                    # compute_md5() and also set self.size to actual
                    # size of the bytes read computing the md5.
                    md5 = self.compute_md5(fp, size)
                    # adjust size if required
                    size = self.size
                elif size:
                    self.size = size
                else:
                    # If md5 is provided, still need to size so
                    # calculate based on bytes to end of content
                    spos = fp.tell()
                    fp.seek(0, os.SEEK_END)
                    self.size = fp.tell() - spos
                    fp.seek(spos)
                    size = self.size
                self.md5 = md5[0]
                self.base64md5 = md5[1]

            if self.name is None:
                self.name = self.md5
            if not replace:
                if self.bucket.lookup(self.name):
                    return
            if self.bucket.connection.local_encrypt and self.size:
                if not crypt_context:
                    crypt_context = Crypts(self.bucket.connection.key)
                    crypt_context.action_info = "put"
                    crypt_context.calc_md5 = calc_encrypt_md5
                return self.send_file(fp,
                                      headers=headers,
                                      cb=cb,
                                      num_cb=num_cb,
                                      query_args=query_args,
                                      chunked_transfer=chunked_transfer,
                                      size=size,
                                      crypt_context=crypt_context)
            return self.send_file(fp,
                                  headers=headers,
                                  cb=cb,
                                  num_cb=num_cb,
                                  query_args=query_args,
                                  chunked_transfer=chunked_transfer,
                                  size=size)
Пример #9
0
class EncryptFp (object):
    """
    A class will return you a fp inside which data are encrypted.
    """
    def __init__(self, fp, crypt_context, type,isUploadFirstPart=False, isUploadLastPart=False):
        self.fp = fp
        self.first_iv = crypt_context.first_iv
        self.calc_iv = ""
        self.crypt_context = crypt_context
        self.crypt_handler = Crypts(crypt_context.key)
        self.type = type
        self.block_size = 16
        self.isUploadFirstPart = isUploadFirstPart
        self.isUploadLastPart = isUploadLastPart
        self.seek_pos = SEEK_SET
        self.block_count = 0
        self.block_total_count = self.get_total_count(fp)

    def get_total_count(self, fp):
        fp.seek(0, SEEK_END)
        count = math.ceil(float(fp.tell())/8192)
        fp.seek(0, SEEK_SET)
        return count

    def __getattr__(self, name):
        func = getattr(self.__dict__['fp'], name)
        if callable(func):
            if name == "read":
                def my_wrapper(*args, **kwargs):
                    data = func(*args, **kwargs)
                    self.block_count += 1
                    if len(data) == 0:
                        return None
                    # print len(data)
                    if self.type == "put":              
                        if self.block_count == 1:
                            if self.block_total_count == 1:
                                encrypt_data = self.crypt_handler.encrypt(data,self.first_iv)
                            else:
                                encrypt_data = self.crypt_handler.encrypt_without_padding(data,self.first_iv)
                            self.calc_iv = encrypt_data[-self.block_size:]
                            self.crypt_context.calc_iv = self.calc_iv
                            encrypt_data = self.first_iv+encrypt_data
                        elif self.block_count == self.block_total_count:
                            encrypt_data = self.crypt_handler.encrypt(data,self.calc_iv)
                        else:
                            encrypt_data = self.crypt_handler.encrypt_without_padding(data, self.calc_iv)
                            self.calc_iv = encrypt_data[-self.block_size:]
                            self.crypt_context.calc_iv = self.calc_iv
                    elif self.type == "upload_part":
                        need_prefix = False
                        if self.isUploadFirstPart and self.block_count == 1:
                            pre_iv = self.first_iv
                            need_prefix = True
                        else:
                            last_part_num = self.crypt_context.part_num - 1
                            if last_part_num > 0 and self.block_count == 1:
                                if self.crypt_context.iv_dict.get(last_part_num):
                                    self.calc_iv = self.crypt_context.iv_dict.get(last_part_num)
                            else:
                                if not self.calc_iv:
                                    raise ValueError(
                                        "upload part[%d] encryption error:calc_vi miss" % self.crypt_context.part_num)
                            pre_iv = self.calc_iv
                        if self.isUploadLastPart and self.block_count == self.block_total_count:
                            encrypt_data = self.crypt_handler.encrypt(data, pre_iv)
                        else:
                            encrypt_data = self.crypt_handler.encrypt_without_padding(data, pre_iv)
                        if need_prefix:
                            encrypt_data = self.first_iv + encrypt_data
                        self.calc_iv = encrypt_data[-self.block_size:]
                        self.crypt_context.iv_dict[self.crypt_context.part_num] = self.calc_iv
                        # if self.isUploadFirstPart:
                        #     # For multi, the first part's first part will add a prefix of iv.
                        #     if not self.calc_iv:
                        #         if self.isUploadLastPart and self.block_count == self.block_total_count:
                        #             # A very special circumstance: a short piece of data that is both the first of the
                        #             # first and the last of the last.
                        #             encrypt_data = self.crypt_handler.encrypt(data, self.first_iv)
                        #             encrypt_data = self.first_iv + encrypt_data
                        #         else:
                        #             encrypt_data = self.crypt_handler.encrypt_without_padding(data,self.first_iv)
                        #             encrypt_data = self.first_iv+encrypt_data
                        #     elif not self.isUploadLastPart:
                        #         encrypt_data = self.crypt_handler.encrypt_without_padding(data, self.calc_iv)
                        #     elif self.isUploadLastPart and self.block_count == self.block_total_count:
                        #         # When the part is the firstPart AND the lastPart.
                        #         encrypt_data = self.crypt_handler.encrypt(data, self.calc_iv)
                        #     elif self.isUploadLastPart:
                        #         encrypt_data = self.crypt_handler.encrypt_without_padding(data, self.calc_iv)
                        #     else:
                        #         raise Exception
                        # elif not self.isUploadLastPart:
                        #     # The normal part that is neither the first nor the last one.
                        #     if self.block_count == 1:
                        #         self.calc_iv = self.crypt_context.iv_dict[self.crypt_context.part_num-1]
                        #     encrypt_data = self.crypt_handler.encrypt_without_padding(data, self.calc_iv)
                        # else:
                        #     # The last part.
                        #     # The last part's parts use 'encrypt' instead of 'encrypt_without_padding'
                        #     # because the last part's last part need paddling.
                        #     if self.block_count == 1 and self.block_count != self.block_total_count:
                        #         encrypt_data = self.crypt_handler.encrypt_without_padding(data, self.crypt_context.iv_dict[self.crypt_context.part_num-1])
                        #     elif self.block_count == 1 and self.block_count == self.block_total_count:
                        #         encrypt_data = self.crypt_handler.encrypt(data, self.crypt_context.iv_dict[self.crypt_context.part_num-1])
                        #     elif self.block_count == self.block_total_count:
                        #         encrypt_data = self.crypt_handler.encrypt(data, self.calc_iv)
                        #     else:
                        #         encrypt_data = self.crypt_handler.encrypt_without_padding(data,self.calc_iv)
                        # self.calc_iv = encrypt_data[-self.block_size:]
                        # self.crypt_context.iv_dict[self.crypt_context.part_num] = self.calc_iv
                        # print len(encrypt_data), self.block_count, self.block_total_count
                    return encrypt_data
            if name == "seek":
                def my_wrapper(*args, **kwargs):
                    ret = func(*args, **kwargs)
                    # self.seek_pos = args[1]
                    return ret
            if name == "tell":
                def my_wrapper(*args, **kwargs):
                    ret = func(*args, **kwargs)
                    if self.type == "upload_part":
                        if self.isUploadFirstPart:
                            return ret + 16
                        else:
                            return ret
                    return ret+16

            return my_wrapper
        else:
            return func

    def __len__(self):
        self.seek(0,SEEK_END)
        length = self.tell()
        self.seek(0,SEEK_SET)
        blocksize = 16
        if self.type == "put" or (self.type=="upload_part" and self.isUploadLastPart==True):
            pad = blocksize - length % blocksize
            return length+pad
        else:
            return length
Пример #10
0
class EncryptFp(object):
    """
    A class will return you a fp inside which data are encrypted.
    """
    def __init__(self,
                 fp,
                 crypt_context,
                 type,
                 isUploadFirstPart=False,
                 isUploadLastPart=False):
        self.fp = fp
        self.first_iv = crypt_context.first_iv
        self.calc_iv = ""
        self.crypt_context = crypt_context
        self.crypt_handler = Crypts(crypt_context.key)
        self.type = type
        self.block_size = 16
        self.isUploadFirstPart = isUploadFirstPart
        self.isUploadLastPart = isUploadLastPart
        self.seek_pos = SEEK_SET
        self.block_count = 0
        self.block_total_count = self.get_total_count(fp)

    def get_total_count(self, fp):
        fp.seek(0, SEEK_END)
        count = math.ceil(float(fp.tell()) / 8192)
        fp.seek(0, SEEK_SET)
        return count

    def __getattr__(self, name):
        func = getattr(self.__dict__['fp'], name)
        if callable(func):
            if name == "read":

                def my_wrapper(*args, **kwargs):
                    data = func(*args, **kwargs)
                    self.block_count += 1
                    if len(data) == 0:
                        return None
                    # print len(data)
                    if self.type == "put":
                        if self.block_count == 1:
                            if self.block_total_count == 1:
                                encrypt_data = self.crypt_handler.encrypt(
                                    data, self.first_iv)
                            else:
                                encrypt_data = self.crypt_handler.encrypt_without_padding(
                                    data, self.first_iv)
                            self.calc_iv = encrypt_data[-self.block_size:]
                            self.crypt_context.calc_iv = self.calc_iv
                            encrypt_data = self.first_iv + encrypt_data
                        elif self.block_count == self.block_total_count:
                            encrypt_data = self.crypt_handler.encrypt(
                                data, self.calc_iv)
                        else:
                            encrypt_data = self.crypt_handler.encrypt_without_padding(
                                data, self.calc_iv)
                            self.calc_iv = encrypt_data[-self.block_size:]
                            self.crypt_context.calc_iv = self.calc_iv
                    elif self.type == "upload_part":
                        need_prefix = False
                        if self.isUploadFirstPart and self.block_count == 1:
                            pre_iv = self.first_iv
                            need_prefix = True
                        else:
                            last_part_num = self.crypt_context.part_num - 1
                            if last_part_num > 0 and self.block_count == 1:
                                if self.crypt_context.iv_dict.get(
                                        last_part_num):
                                    self.calc_iv = self.crypt_context.iv_dict.get(
                                        last_part_num)
                            else:
                                if not self.calc_iv:
                                    raise ValueError(
                                        "upload part[%d] encryption error:calc_vi miss"
                                        % self.crypt_context.part_num)
                            pre_iv = self.calc_iv
                        if self.isUploadLastPart and self.block_count == self.block_total_count:
                            encrypt_data = self.crypt_handler.encrypt(
                                data, pre_iv)
                        else:
                            encrypt_data = self.crypt_handler.encrypt_without_padding(
                                data, pre_iv)
                        if need_prefix:
                            encrypt_data = self.first_iv + encrypt_data
                        self.calc_iv = encrypt_data[-self.block_size:]
                        self.crypt_context.iv_dict[
                            self.crypt_context.part_num] = self.calc_iv
                        # if self.isUploadFirstPart:
                        #     # For multi, the first part's first part will add a prefix of iv.
                        #     if not self.calc_iv:
                        #         if self.isUploadLastPart and self.block_count == self.block_total_count:
                        #             # A very special circumstance: a short piece of data that is both the first of the
                        #             # first and the last of the last.
                        #             encrypt_data = self.crypt_handler.encrypt(data, self.first_iv)
                        #             encrypt_data = self.first_iv + encrypt_data
                        #         else:
                        #             encrypt_data = self.crypt_handler.encrypt_without_padding(data,self.first_iv)
                        #             encrypt_data = self.first_iv+encrypt_data
                        #     elif not self.isUploadLastPart:
                        #         encrypt_data = self.crypt_handler.encrypt_without_padding(data, self.calc_iv)
                        #     elif self.isUploadLastPart and self.block_count == self.block_total_count:
                        #         # When the part is the firstPart AND the lastPart.
                        #         encrypt_data = self.crypt_handler.encrypt(data, self.calc_iv)
                        #     elif self.isUploadLastPart:
                        #         encrypt_data = self.crypt_handler.encrypt_without_padding(data, self.calc_iv)
                        #     else:
                        #         raise Exception
                        # elif not self.isUploadLastPart:
                        #     # The normal part that is neither the first nor the last one.
                        #     if self.block_count == 1:
                        #         self.calc_iv = self.crypt_context.iv_dict[self.crypt_context.part_num-1]
                        #     encrypt_data = self.crypt_handler.encrypt_without_padding(data, self.calc_iv)
                        # else:
                        #     # The last part.
                        #     # The last part's parts use 'encrypt' instead of 'encrypt_without_padding'
                        #     # because the last part's last part need paddling.
                        #     if self.block_count == 1 and self.block_count != self.block_total_count:
                        #         encrypt_data = self.crypt_handler.encrypt_without_padding(data, self.crypt_context.iv_dict[self.crypt_context.part_num-1])
                        #     elif self.block_count == 1 and self.block_count == self.block_total_count:
                        #         encrypt_data = self.crypt_handler.encrypt(data, self.crypt_context.iv_dict[self.crypt_context.part_num-1])
                        #     elif self.block_count == self.block_total_count:
                        #         encrypt_data = self.crypt_handler.encrypt(data, self.calc_iv)
                        #     else:
                        #         encrypt_data = self.crypt_handler.encrypt_without_padding(data,self.calc_iv)
                        # self.calc_iv = encrypt_data[-self.block_size:]
                        # self.crypt_context.iv_dict[self.crypt_context.part_num] = self.calc_iv
                        # print len(encrypt_data), self.block_count, self.block_total_count
                    return encrypt_data

            if name == "seek":

                def my_wrapper(*args, **kwargs):
                    ret = func(*args, **kwargs)
                    # self.seek_pos = args[1]
                    return ret

            if name == "tell":

                def my_wrapper(*args, **kwargs):
                    ret = func(*args, **kwargs)
                    if self.type == "upload_part":
                        if self.isUploadFirstPart:
                            return ret + 16
                        else:
                            return ret
                    return ret + 16

            return my_wrapper
        else:
            return func

    def __len__(self):
        self.seek(0, SEEK_END)
        length = self.tell()
        self.seek(0, SEEK_SET)
        blocksize = 16
        if self.type == "put" or (self.type == "upload_part"
                                  and self.isUploadLastPart == True):
            pad = blocksize - length % blocksize
            return length + pad
        else:
            return length
Пример #11
0
    def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10,
                 torrent=False, version_id=None, override_num_retries=None,
                 response_headers=None, hash_algs=None, query_args=None):
        if headers is None:
            headers = {}
        save_debug = self.bucket.connection.debug
        if self.bucket.connection.debug == 1:
            self.bucket.connection.debug = 0

        query_args = query_args or []
        if torrent:
            query_args.append('torrent')

        if hash_algs is None and not torrent:
            hash_algs = {'md5': md5}
        digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})

        # If a version_id is passed in, use that.  If not, check to see
        # if the Key object has an explicit version_id and, if so, use that.
        # Otherwise, don't pass a version_id query param.
        if version_id is None:
            version_id = self.version_id
        if version_id:
            query_args.append('versionId=%s' % version_id)
        if response_headers:
            for key in response_headers:
                query_args.append('%s=%s' % (
                    key, urllib.quote(response_headers[key])))
        query_args = '&'.join(query_args)
        self.open('r', headers, query_args=query_args,
                  override_num_retries=override_num_retries)

        data_len = 0
        if cb:
            if self.size is None:
                cb_size = 0
            else:
                cb_size = self.size
            if self.size is None and num_cb != -1:
                # If size is not available due to chunked transfer for example,
                # we'll call the cb for every 1MB of data transferred.
                cb_count = (1024 * 1024) / self.BufferSize
            elif num_cb > 1:
                cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0)))
            elif num_cb < 0:
                cb_count = -1
            else:
                cb_count = 0
            i = 0
            cb(data_len, cb_size)
        try:
            counter = 1
            last_iv = ""
            total_part = math.ceil(float(self.size)/self.BufferSize)
            for bytes in self:
                if self.bucket.connection.local_encrypt:
                    provider = self.bucket.connection.provider
                    user_key = self.bucket.connection.key
                    crypt_handler = Crypts(user_key)
                    if counter == 1:
                        # For first block, drop first 16 bytes(the subjoin iv).
                        local_iv = bytes[:crypt_handler.block_size]
                        bytes = bytes[crypt_handler.block_size:]
                        server_iv = self.user_meta[provider.metadata_prefix+"iv"]
                        server_iv = base64.b64decode(server_iv)
                        if server_iv and local_iv != server_iv:
                            raise ValueError("decryption error:file.iv not equel server.iv")
                        user_iv=local_iv
                    else:
                        user_iv = last_iv
                    last_iv = bytes[-crypt_handler.block_size:]
                    if counter == total_part:
                        # Special process of the last part with check code appending to it's end.
                        full_content = crypt_handler.decrypt(bytes,user_iv)
                        pad_content_char = full_content[-1]
                        for key in crypt_handler.pad_dict:
                            if crypt_handler.pad_dict[key] == pad_content_char:
                                pad_content_char = key
                        decrypt = full_content[:-int(pad_content_char)]
                    else:
                        decrypt = crypt_handler.decrypt(bytes, user_iv)
                    bytes = decrypt
                    counter += 1
                fp.write(bytes)
                data_len += len(bytes)
                for alg in digesters:
                    digesters[alg].update(bytes)
                if cb:
                    if cb_size > 0 and data_len >= cb_size:
                        break
                    i += 1
                    if i == cb_count or cb_count == -1:
                        cb(data_len, cb_size)
                        i = 0
        except IOError as e:
            if e.errno == errno.ENOSPC:
                raise StorageDataError('Out of space for destination file '
                                       '%s' % fp.name)
            raise
        if cb and (cb_count <= 1 or i > 0) and data_len > 0:
            cb(data_len, cb_size)
        for alg in digesters:
          self.local_hashes[alg] = digesters[alg].digest()
        if self.size is None and not torrent and "Range" not in headers:
            self.size = data_len
        self.close()
        self.bucket.connection.debug = save_debug
Пример #12
0
    def set_contents_from_file(self, fp, headers=None, replace=True,
                               cb=None, num_cb=10, policy=None, md5=None,
                               reduced_redundancy=False, query_args=None,
                               encrypt_key=False, size=None, rewind=False,
                               crypt_context=None, calc_encrypt_md5=True):
        """
        Store an object in S3 using the name of the Key object as the
        key in S3 and the contents of the file pointed to by 'fp' as the
        contents. The data is read from 'fp' from its current position until
        'size' bytes have been read or EOF.
        :type fp: file
        :param fp: the file whose contents to upload

        :type headers: dict
        :param headers: Additional HTTP headers that will be sent with
            the PUT request.

        :type replace: bool
        :param replace: If this parameter is False, the method will
            first check to see if an object exists in the bucket with
            the same key.  If it does, it won't overwrite it.  The
            default value is True which will overwrite the object.

        :type cb: function
        :param cb: a callback function that will be called to report
            progress on the upload.  The callback should accept two
            integer parameters, the first representing the number of
            bytes that have been successfully transmitted to S3 and
            the second representing the size of the to be transmitted
            object.

        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the
            cb parameter this parameter determines the granularity of
            the callback by defining the maximum number of times the
            callback will be called during the file transfer.

        :type policy: :class:`boto.s3.acl.CannedACLStrings`
        :param policy: A canned ACL policy that will be applied to the
            new key in S3.

        :type md5: A tuple containing the hexdigest version of the MD5
            checksum of the file as the first element and the
            Base64-encoded version of the plain checksum as the second
            element.  This is the same format returned by the
            compute_md5 method.
        :type reduced_redundancy: bool
        :param reduced_redundancy: If True, this will set the storage
            class of the new Key to be REDUCED_REDUNDANCY. The Reduced
            Redundancy Storage (RRS) feature of S3, provides lower
            redundancy at lower storage cost.

        :type encrypt_key: bool
        :param encrypt_key: If True, the new copy of the object will
            be encrypted on the server-side by S3 and will be stored
            in an encrypted form while at rest in S3.

        :type size: int
        :param size: (optional) The Maximum number of bytes to read
            from the file pointer (fp). This is useful when uploading
            a file in multiple parts where you are splitting the file
            up into different ranges to be uploaded. If not specified,
            the default behaviour is to read all bytes from the file
            pointer. Less bytes may be available.

        :type rewind: bool
        :param rewind: (optional) If True, the file pointer (fp) will
            be rewound to the start before any bytes are read from
            it. The default behaviour is False which reads from the
            current position of the file pointer (fp).

        :rtype: int
        :return: The number of bytes written to the key.
        """
        provider = self.bucket.connection.provider
        headers = headers or {}
        if policy:
            headers[provider.acl_header] = policy
        if encrypt_key:
            headers[provider.server_side_encryption_header] = 'AES256'

        if rewind:
            # caller requests reading from beginning of fp.
            fp.seek(0, os.SEEK_SET)
        else:
            # The following seek/tell/seek logic is intended
            # to detect applications using the older interface to
            # set_contents_from_file(), which automatically rewound the
            # file each time the Key was reused. This changed with commit
            # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads
            # split into multiple parts and uploaded in parallel, and at
            # the time of that commit this check was added because otherwise
            # older programs would get a success status and upload an empty
            # object. Unfortuantely, it's very inefficient for fp's implemented
            # by KeyFile (used, for example, by gsutil when copying between
            # providers). So, we skip the check for the KeyFile case.
            # TODO: At some point consider removing this seek/tell/seek
            # logic, after enough time has passed that it's unlikely any
            # programs remain that assume the older auto-rewind interface.
            if not isinstance(fp, KeyFile):
                spos = fp.tell()
                fp.seek(0, os.SEEK_END)
                if fp.tell() == spos:
                    fp.seek(0, os.SEEK_SET)
                    if fp.tell() != spos:
                        # Raise an exception as this is likely a programming
                        # error whereby there is data before the fp but nothing
                        # after it.
                        fp.seek(spos)
                        raise AttributeError('fp is at EOF. Use rewind option '
                                             'or seek() to data start.')
                # seek back to the correct position.
                fp.seek(spos)
        if reduced_redundancy:
            self.storage_class = 'REDUCED_REDUNDANCY'
            if provider.storage_class_header:
                headers[provider.storage_class_header] = self.storage_class
                # TODO - What if provider doesn't support reduced reduncancy?
                # What if different providers provide different classes?
        if hasattr(fp, 'name'):
            self.path = fp.name
        if self.bucket is not None:
            if not md5 and provider.supports_chunked_transfer():
                # defer md5 calculation to on the fly and
                # we don't know anything about size yet.
                chunked_transfer = True
                self.size = None
            else:
                chunked_transfer = False
                if isinstance(fp, KeyFile):
                    # Avoid EOF seek for KeyFile case as it's very inefficient.
                    key = fp.getkey()
                    size = key.size - fp.tell()
                    self.size = size
                    # At present both GCS and S3 use MD5 for the etag for
                    # non-multipart-uploaded objects. If the etag is 32 hex
                    # chars use it as an MD5, to avoid having to read the file
                    # twice while transferring.
                    if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)):
                        etag = key.etag.strip('"')
                        md5 = (etag, base64.b64encode(binascii.unhexlify(etag)))
                if not md5:
                    # compute_md5() and also set self.size to actual
                    # size of the bytes read computing the md5.
                    md5 = self.compute_md5(fp, size)
                    # adjust size if required
                    size = self.size
                elif size:
                    self.size = size
                else:
                    # If md5 is provided, still need to size so
                    # calculate based on bytes to end of content
                    spos = fp.tell()
                    fp.seek(0, os.SEEK_END)
                    self.size = fp.tell() - spos
                    fp.seek(spos)
                    size = self.size
                self.md5 = md5[0]
                self.base64md5 = md5[1]

            if self.name is None:
                self.name = self.md5
            if not replace:
                if self.bucket.lookup(self.name):
                    return
            if self.bucket.connection.local_encrypt and self.size:
                if not crypt_context:
                    crypt_context = Crypts(self.bucket.connection.key)
                    crypt_context.action_info = "put"
                    crypt_context.calc_md5 = calc_encrypt_md5
                return self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb,
                                      query_args=query_args,
                                      chunked_transfer=chunked_transfer, size=size,
                                      crypt_context=crypt_context)
            return self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb,
                           query_args=query_args,
                           chunked_transfer=chunked_transfer, size=size)