Exemplo n.º 1
0
    def _copy_via_put_post(self, src, dest, metadata=None):
        """Fallback copy method for older Swift implementations."""
        headers = CaseInsensitiveDict()
        headers['X-Copy-From'] = '/%s/%s%s' % (self.container_name, self.prefix, src)

        if metadata is not None:
            # We can't do a direct copy, because during copy we can only update the
            # metadata, but not replace it. Therefore, we have to make a full copy
            # followed by a separate request to replace the metadata. To avoid an
            # inconsistent intermediate state, we use a temporary object.
            final_dest = dest
            dest = final_dest + TEMP_SUFFIX
            headers['X-Delete-After'] = '600'

        try:
            self._copy_helper('PUT', '/%s%s' % (self.prefix, dest), headers)
        except HTTPError as exc:
            if exc.status == 404:
                raise NoSuchObject(src)
            raise

        if metadata is None:
            return

        # Update metadata
        headers = CaseInsensitiveDict()
        self._add_meta_headers(headers, metadata, chunksize=self.features.max_meta_len)
        self._copy_helper('POST', '/%s%s' % (self.prefix, dest), headers)

        # Rename object
        headers = CaseInsensitiveDict()
        headers['X-Copy-From'] = '/%s/%s%s' % (self.container_name, self.prefix, dest)
        self._copy_helper('PUT', '/%s%s' % (self.prefix, final_dest), headers)
Exemplo n.º 2
0
    def _extract_b2_metadata(self, response, obj_key):
        '''Extract metadata from HTTP response object'''

        headers = CaseInsensitiveDict()
        for k, v in response.headers.items():
            # we convert to lower case in order to do case-insensitive comparison
            if k.lower().startswith(info_header_prefix.lower() + 'meta-'):
                headers[k] = self._b2_url_decode(v)

        format_ = headers.get('%smeta-format' % info_header_prefix, 'raw')
        if format_ != 'raw2':  # Current metadata format
            raise CorruptedObjectError('invalid metadata format: %s' % format_)

        parts = []
        for i in count():
            part = headers.get('%smeta-%03d' % (info_header_prefix, i), None)
            if part is None:
                break
            parts.append(part)

        buffer = urllib.parse.unquote(''.join(parts))
        meta = literal_eval('{ %s }' % buffer)

        # Decode bytes values
        for (k, v) in meta.items():
            if not isinstance(v, bytes):
                continue
            try:
                meta[k] = base64.b64decode(v)
            except binascii.Error:
                # This should trigger a MD5 mismatch below
                meta[k] = None

        # Check MD5. There is a case to be made for treating a mismatch as a
        # `CorruptedObjectError` rather than a `BadDigestError`, because the MD5
        # sum is not calculated on-the-fly by the server but stored with the
        # object, and therefore does not actually verify what the server has
        # sent over the wire. However, it seems more likely for the data to get
        # accidentally corrupted in transit than to get accidentally corrupted
        # on the server (which hopefully checksums its storage devices).
        md5 = base64.b64encode(checksum_basic_mapping(meta)).decode('ascii')
        if md5 != headers.get('%smeta-md5' % info_header_prefix, None):
            log.warning('MD5 mismatch in metadata for %s', obj_key)

            # When trying to read file system revision 23 or earlier, we will
            # get a MD5 error because the checksum was calculated
            # differently. In order to get a better error message, we special
            # case the s3ql_passphrase and s3ql_metadata object (which are only
            # retrieved once at program start).
            if obj_key in ('s3ql_passphrase', 's3ql_metadata'):
                raise CorruptedObjectError('Meta MD5 for %s does not match' %
                                           obj_key)
            raise BadDigestError(400, 'bad_digest',
                                 'Meta MD5 for %s does not match' % obj_key)

        return meta
Exemplo n.º 3
0
    def _get_access_token(self):
        log.info('Requesting new access token')

        headers = CaseInsensitiveDict()
        headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=utf-8'

        body = urllib.parse.urlencode({
            'client_id': OAUTH_CLIENT_ID,
            'client_secret': OAUTH_CLIENT_SECRET,
            'refresh_token': self.password,
            'grant_type': 'refresh_token' })

        conn = HTTPConnection('accounts.google.com', 443, proxy=self.proxy,
                              ssl_context=self.ssl_context)
        try:

            conn.send_request('POST', '/o/oauth2/token', headers=headers,
                              body=body.encode('utf-8'))
            resp = conn.read_response()
            json_resp = self._parse_json_response(resp, conn)

            if resp.status > 299 or resp.status < 200:
                assert 'error' in json_resp
            if 'error' in json_resp:
                raise AuthenticationError(json_resp['error'])
            else:
                self.access_token[self.password] = json_resp['access_token']
        finally:
            conn.disconnect()
Exemplo n.º 4
0
    def open_write(self, key, metadata=None, is_compressed=False, extra_headers=None):
        """
        The returned object will buffer all data and only start the upload
        when its `close` method is called.
        """

        log.debug('started with %s', key)

        headers = CaseInsensitiveDict()
        if extra_headers is not None:
            headers.update(extra_headers)
        if metadata is None:
            metadata = dict()
        self._add_meta_headers(headers, metadata)

        return ObjectW(key, self, headers)
Exemplo n.º 5
0
    def open_write(self, key, metadata=None, is_compressed=False, extra_headers=None):
        """
        The returned object will buffer all data and only start the upload
        when its `close` method is called.
        """

        log.debug('started with %s', key)

        headers = CaseInsensitiveDict()
        if extra_headers is not None:
            headers.update(extra_headers)
        if metadata is None:
            metadata = dict()
        self._add_meta_headers(headers, metadata)

        return ObjectW(key, self, headers)
Exemplo n.º 6
0
    def close(self):
        """Close object and upload data"""
        log.debug('started with %s', self.key)

        if self.closed:
            # still call fh.close, may have generated an error before
            self.fh.close()
            return

        self.fh.seek(0)
        upload_auth_token, upload_url = self.backend._get_upload_url()
        upload_url = urllib.parse.urlparse(upload_url)

        with HTTPConnection(upload_url.hostname, 443, ssl_context=self.backend.ssl_context) as conn_up:

            headers = CaseInsensitiveDict()
            headers['X-Bz-File-Name'] = self.backend.prefix + self.key
            headers['Content-Type'] = 'application/octet-stream'
            headers['Content-Length'] = self.obj_size
            headers['X-Bz-Content-Sha1'] = self.sha1.hexdigest()

            if self.meta is None:
                self.meta = dict()
            self.backend._add_meta_headers(headers, self.meta)

            self.backend._do_request('POST', upload_url.path + '?' + upload_url.query,
                                     conn_up,
                                     headers=headers,
                                     body=self.fh,
                                     auth_token=upload_auth_token,
                                     body_size=self.obj_size)

        self.fh.close()
        self.closed = True
Exemplo n.º 7
0
    def copy(self, src, dest, metadata=None):
        log.debug('started with %s, %s', src, dest)

        if not (metadata is None or isinstance(metadata, dict)):
            raise TypeError('*metadata*: expected dict or None, got %s' %
                            type(metadata))

        headers = CaseInsensitiveDict()

        if metadata is not None:
            headers['Content-Type'] = 'application/json; charset="utf-8"'
            body = json.dumps({'metadata': _wrap_user_meta(metadata)}).encode()
        else:
            body = None

        path = '/storage/v1/b/%s/o/%s/rewriteTo/b/%s/o/%s' % (
            urllib.parse.quote(self.bucket_name, safe=''),
            urllib.parse.quote(self.prefix + src, safe=''),
            urllib.parse.quote(self.bucket_name, safe=''),
            urllib.parse.quote(self.prefix + dest, safe=''))
        try:
            resp = self._do_request('POST', path, headers=headers, body=body)
        except RequestError as exc:
            exc = _map_request_error(exc, src)
            if exc:
                raise exc
            raise

        json_resp = self._parse_json_response(resp)
        assert json_resp['done']
        assert 'rewriteToken' not in json_resp
Exemplo n.º 8
0
def test_put_separate(conn):
    data = DUMMY_DATA
    conn.send_request('PUT', '/allgood', body=BodyFollowing(len(data)))
    conn.write(data)
    resp = conn.read_response()
    conn.discard()
    assert resp.status == 204
    assert resp.length == 0
    assert resp.reason == 'Ok, but no MD5'

    headers = CaseInsensitiveDict()
    headers['Content-MD5'] = b64encode(
        hashlib.md5(data).digest()).decode('ascii')
    conn.send_request('PUT',
                      '/allgood',
                      body=BodyFollowing(len(data)),
                      headers=headers)
    conn.write(data)
    resp = conn.read_response()
    conn.discard()
    assert resp.status == 204
    assert resp.length == 0
    assert resp.reason == 'MD5 matched'

    headers['Content-MD5'] = 'nUzaJEag3tOdobQVU/39GA=='
    conn.send_request('PUT',
                      '/allgood',
                      body=BodyFollowing(len(data)),
                      headers=headers)
    conn.write(data)
    resp = conn.read_response()
    conn.discard()
    assert resp.status == 400
    assert resp.reason.startswith('MD5 mismatch')
Exemplo n.º 9
0
 def update_meta(self, key, metadata):
     log.debug('started with %s', key)
     headers = CaseInsensitiveDict()
     self._add_meta_headers(headers,
                            metadata,
                            chunksize=self.features.max_meta_len)
     self._do_request('POST', '/%s%s' % (self.prefix, key), headers=headers)
     self.conn.discard()
Exemplo n.º 10
0
    def _get_conn(self):
        '''Obtain connection to server and authentication token'''

        log.debug('started')

        if 'no-ssl' in self.options:
            ssl_context = None
        else:
            ssl_context = self.ssl_context

        headers = CaseInsensitiveDict()
        headers['X-Auth-User'] = self.login
        headers['X-Auth-Key'] = self.password

        with HTTPConnection(self.hostname,
                            self.port,
                            proxy=self.proxy,
                            ssl_context=ssl_context) as conn:
            conn.timeout = int(self.options.get('tcp-timeout', 20))

            for auth_path in ('/v1.0', '/auth/v1.0'):
                log.debug('GET %s', auth_path)
                conn.send_request('GET', auth_path, headers=headers)
                resp = conn.read_response()

                if resp.status in (404, 412):
                    log.debug('auth to %s failed, trying next path', auth_path)
                    conn.discard()
                    continue

                elif resp.status == 401:
                    raise AuthorizationError(resp.reason)

                elif resp.status > 299 or resp.status < 200:
                    raise HTTPError(resp.status, resp.reason, resp.headers)

                # Pylint can't infer SplitResult Types
                #pylint: disable=E1103
                self.auth_token = resp.headers['X-Auth-Token']
                o = urlsplit(resp.headers['X-Storage-Url'])
                self.auth_prefix = urllib.parse.unquote(o.path)
                if o.scheme == 'https':
                    ssl_context = self.ssl_context
                elif o.scheme == 'http':
                    ssl_context = None
                else:
                    # fall through to scheme used for authentication
                    pass

                conn = HTTPConnection(o.hostname,
                                      o.port,
                                      proxy=self.proxy,
                                      ssl_context=ssl_context)
                conn.timeout = int(self.options.get('tcp-timeout', 20))
                return conn

            raise RuntimeError('No valid authentication path found')
Exemplo n.º 11
0
Arquivo: s3c.py Projeto: mkhon/s3ql
    def copy(self, src, dest, metadata=None, extra_headers=None):
        log.debug('started with %s, %s', src, dest)

        headers = CaseInsensitiveDict()
        if extra_headers is not None:
            headers.update(extra_headers)
        headers[self.hdr_prefix + 'copy-source'] = \
            urllib.parse.quote('/%s/%s%s' % (self.bucket_name, self.prefix, src))

        if metadata is None:
            headers[self.hdr_prefix + 'metadata-directive'] = 'COPY'
        else:
            headers[self.hdr_prefix + 'metadata-directive'] = 'REPLACE'
            self._add_meta_headers(headers, metadata)

        try:
            resp = self._do_request('PUT',
                                    '/%s%s' % (self.prefix, dest),
                                    headers=headers)
        except NoSuchKeyError:
            raise NoSuchObject(src)

        # When copying, S3 may return error despite a 200 OK status
        # http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectCOPY.html
        # https://doc.s3.amazonaws.com/proposals/copy.html
        if self.options.get('dumb-copy', False):
            self.conn.discard()
            return
        body = self.conn.readall()
        root = self._parse_xml_response(resp, body)

        # Some S3 implemenentations do not have a namespace on
        # CopyObjectResult.
        if root.tag in [
                self.xml_ns_prefix + 'CopyObjectResult', 'CopyObjectResult'
        ]:
            return
        elif root.tag in [self.xml_ns_prefix + 'Error', 'Error']:
            raise get_S3Error(root.findtext('Code'), root.findtext('Message'),
                              resp.headers)
        else:
            log.error('Unexpected server reply to copy operation:\n%s',
                      self._dump_response(resp, body))
            raise RuntimeError('Copy response has %s as root tag' % root.tag)
Exemplo n.º 12
0
    def _do_request(self, method, path, subres=None, query_string=None,
                    headers=None, body=None):
        '''Send request, read and return response object

        This method modifies the *headers* dictionary.
        '''

        log.debug('started with %r, %r, %r, %r, %r, %r',
                  method, path, subres, query_string, headers, body)

        if headers is None:
            headers = CaseInsensitiveDict()

        if isinstance(body, (bytes, bytearray, memoryview)):
            headers['Content-MD5'] = md5sum_b64(body)

        if self.conn is None:
            log.debug('no active connection, calling _get_conn()')
            self.conn =  self._get_conn()

        # Construct full path
        path = urllib.parse.quote('%s/%s%s' % (self.auth_prefix, self.container_name, path))
        if query_string:
            s = urllib.parse.urlencode(query_string, doseq=True)
            if subres:
                path += '?%s&%s' % (subres, s)
            else:
                path += '?%s' % s
        elif subres:
            path += '?%s' % subres

        headers['X-Auth-Token'] = self.auth_token
        try:
            resp = self._do_request_inner(method, path, body=body, headers=headers)
        except Exception as exc:
            if is_temp_network_error(exc) or isinstance(exc, ssl.SSLError):
                # We probably can't use the connection anymore
                self.conn.disconnect()
            raise

        # Success
        if resp.status >= 200 and resp.status <= 299:
            return resp

        # Expired auth token
        if resp.status == 401:
            self._do_authentication_expired(resp.reason)
            # raises AuthenticationExpired

        # If method == HEAD, server must not return response body
        # even in case of errors
        self.conn.discard()
        if method.upper() == 'HEAD':
            raise HTTPError(resp.status, resp.reason, resp.headers)
        else:
            raise HTTPError(resp.status, resp.reason, resp.headers)
Exemplo n.º 13
0
Arquivo: swift.py Projeto: iphydf/s3ql
 def _copy_via_copy(self, src, dest, metadata=None):
     """Copy for more modern Swift implementations that know the
     X-Fresh-Metadata option and the native COPY method."""
     headers = CaseInsensitiveDict()
     headers['Destination'] = '/%s/%s%s' % (self.container_name, self.prefix, dest)
     if metadata is not None:
         self._add_meta_headers(headers, metadata, chunksize=self.features.max_meta_len)
         headers['X-Fresh-Metadata'] = 'true'
     resp = self._do_request('COPY', '/%s%s' % (self.prefix, src), headers=headers)
     self._assert_empty_response(resp)
Exemplo n.º 14
0
Arquivo: gs.py Projeto: naeloob/s3ql
    def _get_access_token(self):
        log.info('Requesting new access token')

        headers = CaseInsensitiveDict()
        headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=utf-8'

        body = urlencode({'client_id': OAUTH_CLIENT_ID,
                          'client_secret': OAUTH_CLIENT_SECRET,
                          'refresh_token': self.password,
                          'grant_type': 'refresh_token' })

        conn = HTTPConnection('accounts.google.com', 443, proxy=self.proxy,
                              ssl_context=self.ssl_context)
        try:

            conn.send_request('POST', '/o/oauth2/token', headers=headers,
                              body=body.encode('utf-8'))
            resp = conn.read_response()

            if resp.status > 299 or resp.status < 200:
                raise HTTPError(resp.status, resp.reason, resp.headers)

            content_type = resp.headers.get('Content-Type', None)
            if content_type:
                hit = re.match(r'application/json(?:; charset="(.+)")?$',
                               resp.headers['Content-Type'], re.IGNORECASE)
            else:
                hit = None

            if not hit:
                log.error('Unexpected server reply when refreshing access token:\n%s',
                          self._dump_response(resp))
                raise RuntimeError('Unable to parse server response')

            charset = hit.group(1) or 'utf-8'
            body = conn.readall().decode(charset)
            resp_json = json.loads(body)

            if not isinstance(resp_json, dict):
                log.error('Invalid json server response. Expected dict, got:\n%s', body)
                raise RuntimeError('Unable to parse server response')

            if 'error' in resp_json:
                raise AuthenticationError(resp_json['error'])

            if 'access_token' not in resp_json:
                log.error('Unable to find access token in server response:\n%s', body)
                raise RuntimeError('Unable to parse server response')

            self.access_token[self.password] = resp_json['access_token']

        finally:
            conn.disconnect()
Exemplo n.º 15
0
Arquivo: s3c.py Projeto: s3ql/s3ql
    def copy(self, src, dest, metadata=None, extra_headers=None):
        log.debug('started with %s, %s', src, dest)

        headers = CaseInsensitiveDict()
        if extra_headers is not None:
            headers.update(extra_headers)
        headers[self.hdr_prefix + 'copy-source'] = \
            urllib.parse.quote('/%s/%s%s' % (self.bucket_name, self.prefix, src))

        if metadata is None:
            headers[self.hdr_prefix + 'metadata-directive'] = 'COPY'
        else:
            headers[self.hdr_prefix + 'metadata-directive'] = 'REPLACE'
            self._add_meta_headers(headers, metadata)

        try:
            resp = self._do_request('PUT', '/%s%s' % (self.prefix, dest), headers=headers)
        except NoSuchKeyError:
            raise NoSuchObject(src)

        # When copying, S3 may return error despite a 200 OK status
        # http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectCOPY.html
        # https://doc.s3.amazonaws.com/proposals/copy.html
        if self.options.get('dumb-copy', False):
            self.conn.discard()
            return
        body = self.conn.readall()
        root = self._parse_xml_response(resp, body)

        # Some S3 implemenentations do not have a namespace on
        # CopyObjectResult.
        if root.tag in [self.xml_ns_prefix + 'CopyObjectResult', 'CopyObjectResult']:
            return
        elif root.tag in [self.xml_ns_prefix + 'Error', 'Error']:
            raise get_S3Error(root.findtext('Code'), root.findtext('Message'),
                              resp.headers)
        else:
            log.error('Unexpected server reply to copy operation:\n%s',
                      self._dump_response(resp, body))
            raise RuntimeError('Copy response has %s as root tag' % root.tag)
Exemplo n.º 16
0
    def copy(self, src, dest, metadata=None):
        log.debug('started with %s, %s', src, dest)
        if dest.endswith(TEMP_SUFFIX) or src.endswith(TEMP_SUFFIX):
            raise ValueError('Keys must not end with %s' % TEMP_SUFFIX)

        headers = CaseInsensitiveDict()
        headers['X-Copy-From'] = '/%s/%s%s' % (self.container_name,
                                               self.prefix, src)

        if metadata is not None:
            # We can't do a direct copy, because during copy we can only update the
            # metadata, but not replace it. Therefore, we have to make a full copy
            # followed by a separate request to replace the metadata. To avoid an
            # inconsistent intermediate state, we use a temporary object.
            final_dest = dest
            dest = final_dest + TEMP_SUFFIX
            headers['X-Delete-After'] = '600'

        try:
            self._copy_helper('PUT', '/%s%s' % (self.prefix, dest), headers)
        except HTTPError as exc:
            if exc.status == 404:
                raise NoSuchObject(src)
            raise

        if metadata is None:
            return

        # Update metadata
        headers = CaseInsensitiveDict()
        self._add_meta_headers(headers, metadata)
        self._copy_helper('POST', '/%s%s' % (self.prefix, dest), headers)

        # Rename object
        headers = CaseInsensitiveDict()
        headers['X-Copy-From'] = '/%s/%s%s' % (self.container_name,
                                               self.prefix, dest)
        self._copy_helper('PUT', '/%s%s' % (self.prefix, final_dest), headers)
Exemplo n.º 17
0
    def _do_request(self, method, path, query_string=None, headers=None, body=None):
        '''Send request, read and return response object'''

        log.debug('started with %s %s, qs=%s', method, path, query_string)

        if headers is None:
            headers = CaseInsensitiveDict()

        expect100 = isinstance(body, BodyFollowing)
        headers['host'] = self.hostname
        if query_string:
            s = urllib.parse.urlencode(query_string, doseq=True)
            path += '?%s' % s

        # If we have an access token, try to use it.
        token = self.access_token.get(self.refresh_token, None)
        if token is not None:
            headers['Authorization'] = 'Bearer ' + token
            self.conn.send_request(method, path, body=body, headers=headers,
                                   expect100=expect100)
            resp = self.conn.read_response()
            if ((expect100 and resp.status == 100) or
                (not expect100 and 200 <= resp.status <= 299)):
                return resp
            elif resp.status != 401:
                raise self._parse_error_response(resp)
            self.conn.discard()

        # If we reach this point, then the access token must have
        # expired, so we try to get a new one. We use a lock to prevent
        # multiple threads from refreshing the token simultaneously.
        with self._refresh_lock:
            # Don't refresh if another thread has already done so while
            # we waited for the lock.
            if token is None or self.access_token.get(self.refresh_token, None) == token:
                self._get_access_token()

        # Try request again. If this still fails, propagate the error
        # (because we have just refreshed the access token).
        # FIXME: We can't rely on this if e.g. the system hibernated
        # after refreshing the token, but before reaching this line.
        headers['Authorization'] = 'Bearer ' + self.access_token[self.refresh_token]
        self.conn.send_request(method, path, body=body, headers=headers,
                               expect100=expect100)
        resp = self.conn.read_response()
        if ((expect100 and resp.status == 100) or
            (not expect100 and 200 <= resp.status <= 299)):
            return resp
        else:
            raise self._parse_error_response(resp)
Exemplo n.º 18
0
    def lookup(self, key):
        log.debug('started with %s', key)

        key = self._encode_key(key)
        headers = CaseInsensitiveDict()
        headers['Range'] = "bytes=0-1"  # Only get first byte

        resp, data = self._do_request('GET',
                                      '/file/%s/%s%s' % (self.bucket_name,
                                                         self.prefix, key),
                                      self.conn_download, headers=headers)

        meta = self._extractmeta(resp, key)
        return meta
Exemplo n.º 19
0
    def open_write(self, key, metadata=None, is_compressed=False):
        '''
        The returned object will buffer all data and only start the uploads
        when its `close` method is called.
        '''

        log.debug('started with %s', key)

        headers = CaseInsensitiveDict()
        if metadata is None:
            metadata = dict()

        self._add_b2_metadata_to_headers(headers, metadata)

        return ObjectW(key, self, headers)
Exemplo n.º 20
0
    def open_write(self, key, metadata=None, is_compressed=False):
        """
        The returned object will buffer all data and only start the upload
        when its `close` method is called.
        """
        log.debug('started with %s', key)

        if key.endswith(TEMP_SUFFIX):
            raise ValueError('Keys must not end with %s' % TEMP_SUFFIX)

        headers = CaseInsensitiveDict()
        if metadata is None:
            metadata = dict()
        self._add_meta_headers(headers, metadata, chunksize=self.features.max_meta_len)

        return ObjectW(key, self, headers)
Exemplo n.º 21
0
def test_put(conn):
    data = DUMMY_DATA
    conn.send_request('PUT', '/allgood', body=data)
    resp = conn.read_response()
    conn.discard()
    assert resp.status == 204
    assert resp.length == 0
    assert resp.reason == 'MD5 matched'

    headers = CaseInsensitiveDict()
    headers['Content-MD5'] = 'nUzaJEag3tOdobQVU/39GA=='
    conn.send_request('PUT', '/allgood', body=data, headers=headers)
    resp = conn.read_response()
    conn.discard()
    assert resp.status == 400
    assert resp.reason.startswith('MD5 mismatch')
Exemplo n.º 22
0
    def _authorize_account(self):
        '''Authorize API calls'''

        authorize_host = 'api.backblazeb2.com'
        authorize_url = api_url_prefix + 'b2_authorize_account'

        id_and_key = self.b2_application_key_id + ':' + self.b2_application_key
        basic_auth_string = 'Basic ' + str(
            base64.b64encode(bytes(id_and_key, 'UTF-8')), encoding='UTF-8')

        with HTTPConnection(authorize_host, 443,
                            ssl_context=self.ssl_context) as connection:

            headers = CaseInsensitiveDict()
            headers['Authorization'] = basic_auth_string

            connection.send_request('GET',
                                    authorize_url,
                                    headers=headers,
                                    body=None)
            response = connection.read_response()
            response_body = connection.readall()

            if response.status != 200:
                raise RuntimeError('Authorization failed.')

            j = json.loads(response_body.decode('utf-8'))

            self.account_id = j['accountId']

            allowed_info = j.get('allowed')
            if allowed_info.get('bucketId'):
                self.bucket_id = allowed_info.get('bucketId')
                if allowed_info.get('bucketName') != self.bucket_name:
                    raise RuntimeError(
                        'Provided API key can not access desired bucket.')

            if not self._check_key_capabilities(allowed_info):
                raise RuntimeError(
                    'Provided API key does not have the required capabilities.'
                )

            self.api_url = urlparse(j['apiUrl'])
            self.download_url = urlparse(j['downloadUrl'])
            self.authorization_token = j['authorizationToken']
Exemplo n.º 23
0
Arquivo: gs.py Projeto: pheanex/s3ql
    def update_meta(self, key, metadata):

        headers = CaseInsensitiveDict()
        headers['Content-Type'] = 'application/json; charset="utf-8"'
        body = json.dumps({ 'metadata': _wrap_user_meta(metadata),
                            'acl': [] }).encode()

        path = '/storage/v1/b/%s/o/%s' % (
            urllib.parse.quote(self.bucket_name, safe=''),
            urllib.parse.quote(self.prefix + key, safe=''))
        try:
            resp = self._do_request('PUT', path, headers=headers, body=body)
        except RequestError as exc:
            exc = _map_request_error(exc, key)
            if exc:
                raise exc
            raise

        self._parse_json_response(resp)
Exemplo n.º 24
0
Arquivo: gs.py Projeto: naeloob/s3ql
    def copy(self, src, dest, metadata=None):
        log.debug('started with %s, %s', src, dest)

        if not (metadata is None or isinstance(metadata, dict)):
            raise TypeError('*metadata*: expected dict or None, got %s' % type(metadata))

        headers = CaseInsensitiveDict()
        headers[self.hdr_prefix + 'copy-source'] = \
            '/%s/%s%s' % (self.bucket_name, self.prefix, src)

        if metadata is None:
            headers[self.hdr_prefix + 'metadata-directive'] = 'COPY'
        else:
            headers[self.hdr_prefix + 'metadata-directive'] = 'REPLACE'
            self._add_meta_headers(headers, metadata)

        try:
            self._do_request('PUT', '/%s%s' % (self.prefix, dest), headers=headers)
            self.conn.discard()
        except s3c.NoSuchKeyError:
            raise NoSuchObject(src)
Exemplo n.º 25
0
    def _get_conn(self):
        '''Obtain connection to server and authentication token'''

        log.debug('started')

        if 'no-ssl' in self.options:
            ssl_context = None
        else:
            ssl_context = self.ssl_context

        headers = CaseInsensitiveDict()
        headers['Content-Type'] = 'application/json'
        headers['Accept'] = 'application/json; charset="utf-8"'

        if ':' in self.login:
            (tenant,user) = self.login.split(':')
        else:
            tenant = None
            user = self.login

        domain = self.options.get('domain', None)
        if domain:
            if not tenant:
                raise ValueError("Tenant is required when Keystone v3 is used")

            # In simple cases where there's only one domain, the project domain
            # will be the same as the authentication domain, but this option
            # allows for them to be different
            project_domain = self.options.get('project-domain', domain)

            auth_body = {
                'auth': {
                    'identity': {
                        'methods': ['password'],
                        'password': {
                            'user': {
                                'name': user,
                                'domain': {
                                    'id': domain
                                },
                                'password': self.password
                            }
                        }
                    },
                    'scope': {
                        'project': {
                            'id': tenant,
                            'domain': {
                                'id': project_domain
                            }
                        }
                    }
                }
            }

            auth_url_path = '/v3/auth/tokens'

        else:
            # If a domain is not specified, assume v2
            auth_body = { 'auth':
                          { 'passwordCredentials':
                                { 'username': user,
                                  'password': self.password } }}

            auth_url_path = '/v2.0/tokens'

            if tenant:
                auth_body['auth']['tenantName'] = tenant

        with HTTPConnection(self.hostname, port=self.port, proxy=self.proxy,
                            ssl_context=ssl_context) as conn:
            conn.timeout = int(self.options.get('tcp-timeout', 20))

            conn.send_request('POST', auth_url_path, headers=headers,
                              body=json.dumps(auth_body).encode('utf-8'))
            resp = conn.read_response()

            if resp.status == 401:
                raise AuthorizationError(resp.reason)

            elif resp.status > 299 or resp.status < 200:
                raise HTTPError(resp.status, resp.reason, resp.headers)

            cat = json.loads(conn.read().decode('utf-8'))

            if self.options.get('domain', None):
                self.auth_token = resp.headers['X-Subject-Token']
                service_catalog = cat['token']['catalog']
            else:
                self.auth_token = cat['access']['token']['id']
                service_catalog = cat['access']['serviceCatalog']

        avail_regions = []
        for service in service_catalog:
            if service['type'] != 'object-store':
                continue

            for endpoint in service['endpoints']:
                if endpoint['region'] != self.region:
                    avail_regions.append(endpoint['region'])
                    continue

                if 'publicURL' in endpoint:
                    # The publicURL nomenclature is found in v2 catalogs
                    o = urlsplit(endpoint['publicURL'])
                else:
                    # Whereas v3 catalogs do 'interface' == 'public' and
                    # 'url' for the URL itself
                    if endpoint['interface'] != 'public':
                        continue

                    o = urlsplit(endpoint['url'])

                self.auth_prefix = urllib.parse.unquote(o.path)
                if o.scheme == 'https':
                    ssl_context = self.ssl_context
                elif o.scheme == 'http':
                    ssl_context = None
                else:
                    # fall through to scheme used for authentication
                    pass

                self._detect_features(o.hostname, o.port, ssl_context)

                conn = HTTPConnection(o.hostname, o.port,  proxy=self.proxy,
                                      ssl_context=ssl_context)
                conn.timeout = int(self.options.get('tcp-timeout', 20))
                return conn

        if len(avail_regions) < 10:
            raise DanglingStorageURLError(self.container_name,
                'No accessible object storage service found in region %s'
                ' (available regions: %s)' % (self.region, ', '.join(avail_regions)))
        else:
            raise DanglingStorageURLError(self.container_name,
                'No accessible object storage service found in region %s'
                % self.region)
Exemplo n.º 26
0
    def _extractmeta(self, resp, obj_key):
        '''Extract metadata from HTTP response object'''

        format_ = resp.headers.get('%smeta-format' % self.hdr_prefix, 'raw')
        if format_ in ('raw', 'pickle'):
            meta = CaseInsensitiveDict()
            pattern = re.compile(r'^%smeta-(.+)$' % re.escape(self.hdr_prefix),
                                 re.IGNORECASE)
            for fname in resp.headers:
                hit = pattern.search(fname)
                if hit:
                    meta[hit.group(1)] = resp.headers[fname]

            if format_ == 'raw':
                return meta

            # format_ == pickle
            buf = ''.join(meta[x] for x in sorted(meta) if x.lower().startswith('data-'))
            if 'md5' in meta and md5sum_b64(buf.encode('us-ascii')) != meta['md5']:
                log.warning('MD5 mismatch in metadata for %s', obj_key)
                raise BadDigestError('BadDigest', 'Meta MD5 for %s does not match' % obj_key)
            try:
                return safe_unpickle(b64decode(buf), encoding='latin1')
            except binascii.Error:
                raise CorruptedObjectError('Corrupted metadata, b64decode failed')
            except pickle.UnpicklingError as exc:
                raise CorruptedObjectError('Corrupted metadata, pickle says: %s' % exc)

        elif format_ != 'raw2': # Current
            raise RuntimeError('Unknown metadata format %s for key %s'
                               % (format_, obj_key))

        parts = []
        for i in count():
            # Headers is an email.message object, so indexing it
            # would also give None instead of KeyError
            part = resp.headers.get('%smeta-%03d' % (self.hdr_prefix, i), None)
            if part is None:
                break
            parts.append(part)
        buf = unquote(''.join(parts))
        meta = literal_eval('{ %s }' % buf)

        # Decode bytes values
        for (k,v) in meta.items():
            if not isinstance(v, bytes):
                continue
            try:
                meta[k] = b64decode(v)
            except binascii.Error:
                # This should trigger a MD5 mismatch below
                meta[k] = None

        # Check MD5. There is a case to be made for treating a mismatch as a
        # `CorruptedObjectError` rather than a `BadDigestError`, because the MD5
        # sum is not calculated on-the-fly by the server but stored with the
        # object, and therefore does not actually verify what the server has
        # sent over the wire. However, it seems more likely for the data to get
        # accidentally corrupted in transit than to get accidentally corrupted
        # on the server (which hopefully checksums its storage devices).
        md5 = b64encode(checksum_basic_mapping(meta)).decode('ascii')
        if md5 != resp.headers.get('%smeta-md5' % self.hdr_prefix, None):
            log.warning('MD5 mismatch in metadata for %s', obj_key)
            raise BadDigestError('BadDigest', 'Meta MD5 for %s does not match' % obj_key)

        return meta
Exemplo n.º 27
0
    def write_fh(self,
                 fh,
                 key: str,
                 md5: bytes,
                 metadata: Optional[Dict[str, Any]] = None,
                 size: Optional[int] = None):
        '''Write data from byte stream *fh* into *key*.

        *fh* must be seekable. If *size* is None, *fh* must also implement
        `fh.fileno()` so that the size can be determined through `os.fstat`.

        *md5* must be the (binary) md5 checksum of the data.
        '''

        metadata = json.dumps({
            'metadata':
            _wrap_user_meta(metadata if metadata else {}),
            'md5Hash':
            b64encode(md5).decode(),
            'name':
            self.prefix + key,
        })

        # Google Storage uses Content-Length to read the object data, so we
        # don't have to worry about the boundary occurring in the object data.
        boundary = 'foo_bar_baz'
        headers = CaseInsensitiveDict()
        headers['Content-Type'] = 'multipart/related; boundary=%s' % boundary

        body_prefix = '\n'.join(
            ('--' + boundary, 'Content-Type: application/json; charset=UTF-8',
             '', metadata, '--' + boundary,
             'Content-Type: application/octet-stream', '', '')).encode()
        body_suffix = ('\n--%s--\n' % boundary).encode()

        body_size = len(body_prefix) + len(body_suffix)
        if size is not None:
            body_size += size
        else:
            body_size += os.fstat(fh.fileno()).st_size

        path = '/upload/storage/v1/b/%s/o' % (urllib.parse.quote(
            self.bucket_name, safe=''), )
        query_string = {'uploadType': 'multipart'}
        try:
            resp = self._do_request('POST',
                                    path,
                                    query_string=query_string,
                                    headers=headers,
                                    body=BodyFollowing(body_size))
        except RequestError as exc:
            exc = _map_request_error(exc, key)
            if exc:
                raise exc
            raise

        assert resp.status == 100
        fh.seek(0)

        md5_run = hashlib.md5()
        try:
            self.conn.write(body_prefix)
            while True:
                buf = fh.read(BUFSIZE)
                if not buf:
                    break
                self.conn.write(buf)
                md5_run.update(buf)
            self.conn.write(body_suffix)
        except ConnectionClosed:
            # Server closed connection while we were writing body data -
            # but we may still be able to read an error response
            try:
                resp = self.conn.read_response()
            except ConnectionClosed:  # No server response available
                pass
            else:
                log.warning(
                    'Server broke connection during upload, signaled '
                    '%d %s', resp.status, resp.reason)
            # Re-raise first ConnectionClosed exception
            raise

        if md5_run.digest() != md5:
            raise ValueError('md5 passed to write_fd does not match fd data')

        resp = self.conn.read_response()
        # If we're really unlucky, then the token has expired while we were uploading data.
        if resp.status == 401:
            self.conn.discard()
            raise AccessTokenExpired()
        elif resp.status != 200:
            exc = self._parse_error_response(resp)
            raise _map_request_error(exc, key) or exc
        self._parse_json_response(resp)
Exemplo n.º 28
0
    def _get_conn(self):
        '''Obtain connection to server and authentication token'''

        log.debug('started')

        if 'no-ssl' in self.options:
            ssl_context = None
        else:
            ssl_context = self.ssl_context

        headers = CaseInsensitiveDict()
        headers['Content-Type'] = 'application/json'
        headers['Accept'] = 'application/json; charset="utf-8"'

        if ':' in self.login:
            (tenant, user) = self.login.split(':')
        else:
            tenant = None
            user = self.login

        auth_body = {
            'auth': {
                'passwordCredentials': {
                    'username': user,
                    'password': self.password
                }
            }
        }
        if tenant:
            auth_body['auth']['tenantName'] = tenant

        with HTTPConnection(self.hostname,
                            port=self.port,
                            proxy=self.proxy,
                            ssl_context=ssl_context) as conn:
            conn.timeout = int(self.options.get('tcp-timeout', 20))

            conn.send_request('POST',
                              '/v2.0/tokens',
                              headers=headers,
                              body=json.dumps(auth_body).encode('utf-8'))
            resp = conn.read_response()

            if resp.status == 401:
                raise AuthorizationError(resp.reason)

            elif resp.status > 299 or resp.status < 200:
                raise HTTPError(resp.status, resp.reason, resp.headers)

            cat = json.loads(conn.read().decode('utf-8'))
            self.auth_token = cat['access']['token']['id']

        avail_regions = []
        for service in cat['access']['serviceCatalog']:
            if service['type'] != 'object-store':
                continue

            for endpoint in service['endpoints']:
                if endpoint['region'] != self.region:
                    avail_regions.append(endpoint['region'])
                    continue

                o = urlsplit(endpoint['publicURL'])
                self.auth_prefix = urllib.parse.unquote(o.path)
                if o.scheme == 'https':
                    ssl_context = self.ssl_context
                elif o.scheme == 'http':
                    ssl_context = None
                else:
                    # fall through to scheme used for authentication
                    pass

                self._detect_features(o.hostname, o.port, ssl_context)

                conn = HTTPConnection(o.hostname,
                                      o.port,
                                      proxy=self.proxy,
                                      ssl_context=ssl_context)
                conn.timeout = int(self.options.get('tcp-timeout', 20))
                return conn

        if len(avail_regions) < 10:
            raise DanglingStorageURLError(
                self.container_name,
                'No accessible object storage service found in region %s'
                ' (available regions: %s)' %
                (self.region, ', '.join(avail_regions)))
        else:
            raise DanglingStorageURLError(
                self.container_name,
                'No accessible object storage service found in region %s' %
                self.region)
Exemplo n.º 29
0
    def _do_request(self,
                    connection,
                    method,
                    path,
                    headers=None,
                    body=None,
                    download_body=True):
        '''Send request, read and return response object'''

        log.debug('started with %s %s', method, path)

        if headers is None:
            headers = CaseInsensitiveDict()

        if self.authorization_token is None:
            self._authorize_account()

        if 'Authorization' not in headers:
            headers['Authorization'] = self.authorization_token

        if self.test_mode_expire_some_tokens:
            headers[
                'X-Bz-Test-Mode'] = 'expire_some_account_authorization_tokens'

        if self.test_mode_force_cap_exceeded:
            headers['X-Bz-Test-Mode'] = 'force_cap_exceeded'

        log.debug('REQUEST: %s %s %s', connection.hostname, method, path)

        if body is None or isinstance(body, (bytes, bytearray, memoryview)):
            connection.send_request(method, path, headers=headers, body=body)
        else:
            body_length = os.fstat(body.fileno()).st_size
            connection.send_request(method,
                                    path,
                                    headers=headers,
                                    body=BodyFollowing(body_length))

            copyfileobj(body, connection, BUFSIZE)

        response = connection.read_response()

        if download_body is True or response.status != 200:  # Backblaze always returns a json with error information in body
            response_body = connection.readall()
        else:
            response_body = None

        content_length = response.headers.get('Content-Length', '0')
        log.debug('RESPONSE: %s %s %s %s', response.method, response.status,
                  response.reason, content_length)

        if (
                response.status == 404 or  # File not found
            (response.status != 200 and method == 'HEAD')
        ):  # HEAD responses do not have a body -> we have to raise a HTTPError with the code
            raise HTTPError(response.status, response.reason, response.headers)

        if response.status != 200:
            json_error_response = json.loads(
                response_body.decode('utf-8')) if response_body else None
            code = json_error_response['code'] if json_error_response else None
            message = json_error_response[
                'message'] if json_error_response else response.reason
            b2_error = B2Error(json_error_response['status'], code, message,
                               response.headers)
            raise b2_error

        return response, response_body
Exemplo n.º 30
0
Arquivo: s3c.py Projeto: mkhon/s3ql
    def _send_request(self,
                      method,
                      path,
                      headers,
                      subres=None,
                      query_string=None,
                      body=None):
        '''Add authentication and send request

        Returns the response object.
        '''

        if not isinstance(headers, CaseInsensitiveDict):
            headers = CaseInsensitiveDict(headers)

        self._authorize_request(method, path, headers, subres)

        # Construct full path
        if not self.hostname.startswith(self.bucket_name):
            path = '/%s%s' % (self.bucket_name, path)
        path = urllib.parse.quote(path)
        if query_string:
            s = urllib.parse.urlencode(query_string, doseq=True)
            if subres:
                path += '?%s&%s' % (subres, s)
            else:
                path += '?%s' % s
        elif subres:
            path += '?%s' % subres

        # We can probably remove the assertions at some point and
        # call self.conn.read_response() directly
        def read_response():
            resp = self.conn.read_response()
            assert resp.method == method
            assert resp.path == path
            return resp

        use_expect_100c = not self.options.get('disable-expect100', False)
        try:
            log.debug('sending %s %s', method, path)
            if body is None or isinstance(body,
                                          (bytes, bytearray, memoryview)):
                self.conn.send_request(method,
                                       path,
                                       body=body,
                                       headers=headers)
            else:
                body_len = os.fstat(body.fileno()).st_size
                self.conn.send_request(method,
                                       path,
                                       expect100=use_expect_100c,
                                       headers=headers,
                                       body=BodyFollowing(body_len))

                if use_expect_100c:
                    resp = read_response()
                    if resp.status != 100:  # Error
                        return resp

                try:
                    copyfileobj(body, self.conn, BUFSIZE)
                except ConnectionClosed:
                    # Server closed connection while we were writing body data -
                    # but we may still be able to read an error response
                    try:
                        resp = read_response()
                    except ConnectionClosed:  # No server response available
                        pass
                    else:
                        if resp.status >= 400:  # Got error response
                            return resp
                        log.warning(
                            'Server broke connection during upload, but signaled '
                            '%d %s', resp.status, resp.reason)

                    # Re-raise first ConnectionClosed exception
                    raise

            return read_response()

        except Exception as exc:
            if is_temp_network_error(exc):
                # We probably can't use the connection anymore
                self.conn.disconnect()
            raise
Exemplo n.º 31
0
Arquivo: s3c.py Projeto: mkhon/s3ql
    def _do_request(self,
                    method,
                    path,
                    subres=None,
                    query_string=None,
                    headers=None,
                    body=None):
        '''Send request, read and return response object'''

        log.debug('started with %s %s?%s, qs=%s', method, path, subres,
                  query_string)

        if headers is None:
            headers = CaseInsensitiveDict()

        if isinstance(body, (bytes, bytearray, memoryview)):
            headers['Content-MD5'] = md5sum_b64(body)

        redirect_count = 0
        this_method = method
        while True:
            resp = self._send_request(this_method,
                                      path,
                                      headers=headers,
                                      subres=subres,
                                      query_string=query_string,
                                      body=body)

            if (resp.status < 300 or resp.status > 399):
                break

            # Assume redirect
            redirect_count += 1
            if redirect_count > 10:
                raise RuntimeError('Too many chained redirections')

            # First try location header...
            new_url = resp.headers['Location']
            if new_url:
                # Discard body
                self.conn.discard()

                # Pylint can't infer SplitResult Types
                #pylint: disable=E1103
                o = urlsplit(new_url)
                if o.scheme:
                    if self.ssl_context and o.scheme != 'https':
                        raise RuntimeError('Redirect to non-https URL')
                    elif not self.ssl_context and o.scheme != 'http':
                        raise RuntimeError('Redirect to non-http URL')
                if o.hostname != self.hostname or o.port != self.port:
                    self.hostname = o.hostname
                    self.port = o.port
                    self.conn.disconnect()
                    self.conn = self._get_conn()
                else:
                    raise RuntimeError(
                        'Redirect to different path on same host')

            # ..but endpoint may also be hidden in message body.
            # If we have done a HEAD request, we have to change to GET
            # to actually retrieve the body.
            elif resp.method == 'HEAD':
                log.debug('Switching from HEAD to GET to read redirect body')
                this_method = 'GET'

            # Try to read new URL from request body
            else:
                tree = self._parse_xml_response(resp)
                new_url = tree.findtext('Endpoint')

                if not new_url:
                    raise get_S3Error(tree.findtext('Code'),
                                      tree.findtext('Message'), resp.headers)

                self.hostname = new_url
                self.conn.disconnect()
                self.conn = self._get_conn()

                # Update method
                this_method = method

            log.info('_do_request(): redirected to %s', self.conn.hostname)

            if body and not isinstance(body, (bytes, bytearray, memoryview)):
                body.seek(0)

        # At the end, the request should have gone out with the right
        # method
        if this_method != method:
            raise RuntimeError(
                'Dazed and confused - HEAD fails but GET works?')

        # Success
        if resp.status >= 200 and resp.status <= 299:
            return resp

        # Error
        self._parse_error_response(resp)
Exemplo n.º 32
0
    def _do_request(self,
                    method,
                    path,
                    conn,
                    headers=None,
                    body=None,
                    auth_token=None,
                    download_body=True,
                    body_size=None):
        """Send request, read and return response object

        This method modifies the *headers* dictionary.
        conn must by a HTTPConnection

        When download_body is True, need to receive data before making new connection

        """
        def _debug_body(b):
            if isinstance(b, str):
                return b
            elif b is None:
                return "None"
            else:
                return 'byte_body'

        def _debug_hostname(c):
            try:
                return c.hostname
            except:
                return "None"

        log.debug('started with %r, %r, %r, %r, %r', method,
                  _debug_hostname(conn), path, headers, _debug_body(body))

        if headers is None:
            headers = CaseInsensitiveDict()

        if auth_token is None:
            headers['Authorization'] = self.auth_token
        else:
            headers['Authorization'] = auth_token

        if self.test_string:
            headers['X-Bz-Test-Mode'] = self.test_string

        try:
            if isinstance(body, io.FileIO):
                if body_size is None:
                    raise ValueError(
                        "Body size is necessary when uploading from file")
                conn.send_request(method,
                                  path,
                                  headers=headers,
                                  body=BodyFollowing(body_size))
                while True:
                    buf = body.read(BUFSIZE)
                    if not buf:
                        break
                    conn.write(buf)
            else:
                conn.send_request(method, path, headers=headers, body=body)
            resp = conn.read_response()
            if download_body or resp.status != 200:
                body = conn.readall()
            else:
                # caller need to download body itself before making new request
                body = None
        except Exception as exc:
            if is_temp_network_error(exc):
                # We probably can't use the connection anymore
                conn.disconnect()
            raise

        if resp.status == 200 or resp.status == 206:
            return resp, body

        try:
            # error code is in body
            j = json.loads(str(body, encoding='UTF-8'))
        except ValueError:
            raise HTTPError(resp.status, resp.reason, resp.headers)

        # Expired auth token
        if resp.status == 401:
            if j['code'] == 'expired_auth_token':
                log.info(
                    'BackBlaze auth token seems to have expired, requesting new one.'
                )
                self.conn_api.disconnect()
                self.conn_download.disconnect()
                # Force constructing a new connection with a new token, otherwise
                # the connection will be reestablished with the same token.
                self.conn_api = None
                self.conn_download = None
                self._login()
                raise AuthenticationExpired(j['message'])
            else:
                raise AuthorizationError(j['message'])

        # File not found
        if resp.status == 404:
            raise NoSuchObject(path)

        # Backend error
        raise B2Error(j['status'], j['code'], j['message'], headers=headers)