示例#1
0
def s3_upload_tmp_file(s3_bucket,
                       tmp_file,
                       filename,
                       headers,
                       directory='',
                       file_type_check=True,
                       return_key_only=False,
                       conn_name=DEFAULT_CONN,
                       with_encryption=False,
                       upload_root_dir=None):
    """
    Upload the content of a temporary file to s3 and delete the file
    """
    try:
        if file_type_check:
            check_type(tmp_file.name)
        content = tmp_file.read()
        if with_encryption:
            secret = app.config.get('FILE_ENCRYPTION_KEY')
            cipher = AESWithGCM(secret)
            content = cipher.encrypt(content)
        fp = BytesIO(content)
        url = s3_upload_file(s3_bucket, fp, filename, headers, upload_root_dir,
                             directory, return_key_only, conn_name)
    finally:
        os.unlink(tmp_file.name)
    return url
示例#2
0
    def test_proxy_key_err(self, http_get):
        res = MagicMock()
        res.json.return_value = {'error': 'an error occurred'}
        http_get.return_value = res

        admin, owner = UserFactory.create_batch(2)
        project = ProjectFactory.create(
            owner=owner, info={'ext_config': {
                'encryption': {
                    'key_id': 123
                }
            }})
        encryption_key = 'testkey'
        aes = AESWithGCM(encryption_key)
        content = json.dumps(dict(a=1, b="2"))
        encrypted_content = aes.encrypt(content)
        task = TaskFactory.create(
            project=project,
            info={'private_json__encrypted_payload': encrypted_content})

        signature = signer.dumps({'task_id': task.id})
        url = '/fileproxy/encrypted/taskpayload/%s/%s?api_key=%s&task-signature=%s' \
            % (project.id, task.id, admin.api_key, signature)

        with patch.dict(self.flask_app.config, self.app_config):
            res = self.app.get(url, follow_redirects=True)
            assert res.status_code == 500, res.status_code

        bad_project_id = 9999
        url = '/fileproxy/encrypted/taskpayload/%s/%s?api_key=%s&task-signature=%s' \
            % (bad_project_id, task.id, admin.api_key, signature)

        with patch.dict(self.flask_app.config, self.app_config):
            res = self.app.get(url, follow_redirects=True)
            assert res.status_code == 400, res.status_code
示例#3
0
    def test_proxy_owner(self, http_get):
        res = MagicMock()
        res.json.return_value = {'key': 'testkey'}
        http_get.return_value = res

        project = ProjectFactory.create(
            info={'ext_config': {
                'encryption': {
                    'key_id': 123
                }
            }})

        encryption_key = 'testkey'
        aes = AESWithGCM(encryption_key)
        content = json.dumps(dict(a=1, b="2"))
        encrypted_content = aes.encrypt(content)
        task = TaskFactory.create(
            project=project,
            info={'private_json__encrypted_payload': encrypted_content})
        owner = project.owner

        signature = signer.dumps({'task_id': task.id})
        url = '/fileproxy/encrypted/taskpayload/%s/%s?api_key=%s&task-signature=%s' \
            % (project.id, task.id, owner.api_key, signature)

        with patch.dict(self.flask_app.config, self.app_config):
            res = self.app.get(url, follow_redirects=True)
            assert res.status_code == 200, res.status_code
            assert res.data == content, res.data
示例#4
0
    def test_proxy_admin(self, http_get, hdfs_get):
        res = MagicMock()
        res.json.return_value = {'key': 'testkey'}
        http_get.return_value = res

        admin, owner = UserFactory.create_batch(2)
        project = ProjectFactory.create(
            owner=owner, info={'ext_config': {
                'encryption': {
                    'key_id': 123
                }
            }})
        url = '/fileproxy/hdfs/test/%s/file.pdf' % project.id
        task = TaskFactory.create(project=project, info={'url': url})

        signature = signer.dumps({'task_id': task.id})
        req_url = '%s?api_key=%s&task-signature=%s' % (url, admin.api_key,
                                                       signature)

        encryption_key = 'testkey'
        aes = AESWithGCM(encryption_key)
        hdfs_get.return_value = aes.encrypt('the content')

        with patch.dict(self.flask_app.config, self.app_config):
            res = self.app.get(req_url, follow_redirects=True)
            assert res.status_code == 200, res.status_code
            assert res.data == 'the content', res.data
示例#5
0
class TestAes(object):
    def setUp(self):
        iv_length = 12
        tag_length = 16
        secret = bytearray('very secret', 'ascii')
        self.aes = AESWithGCM(secret, iv_length, tag_length)

    def test_aes(self):
        text = 'testing simple encrytion'
        encrypted = self.aes.encrypt(text)
        assert encrypted != text
        decrypted = self.aes.decrypt(encrypted)
        assert decrypted == text

    def test_aes_2(self):
        original = 'this is a test string I plan to encrypt'
        encrypted = 'DMj4/yC2pgzgAg76TApmk7zVZlaG0B47KASCnS/TqH6fQpA9UaHjmGLHqCfvGVVQcSivX76Oy349QivZjOJ2yfXZRb0='
        secret = bytearray('this is my super secret key', 'ascii')
        aes = AESWithGCM(secret)
        assert aes.decrypt(encrypted) == original

    def test_aes_unicode(self):
        text = u'∀ z ∈ ℂ, ζ(z) = 0 ⇒ ((z ∈ -2ℕ) ∨ (Re(z) = -½))'
        encrypted = self.aes.encrypt(text.encode('utf-8'))
        decrypted = self.aes.decrypt(encrypted).decode('utf-8')
        assert text == decrypted
示例#6
0
def hdfs_file(project_id, cluster, path):
    if not current_app.config.get('HDFS_CONFIG'):
        raise NotFound('Not Found')
    signature = request.args.get('task-signature')
    if not signature:
        raise Forbidden('No signature')

    project = get_project_data(project_id)
    timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL)
    payload = signer.loads(signature, max_age=timeout)
    task_id = payload['task_id']
    check_allowed(current_user.id, task_id, project, request.path)

    client = HDFSKerberos(**current_app.config['HDFS_CONFIG'][cluster])
    try:
        content = client.get('/{}'.format(path))
        project_encryption = project['info'].get('ext_config',
                                                 {}).get('encryption', {})
        if project_encryption and all(project_encryption.values()):
            secret = get_secret_from_vault(project_encryption)
            cipher = AESWithGCM(secret)
            content = cipher.decrypt(content)
    except Exception:
        current_app.logger.exception('Project id {} get task file {}'.format(
            project_id, path))
        raise InternalServerError('An Error Occurred')

    return Response(content)
示例#7
0
    def test_file_user_key_from_vault(self, get_secret, has_lock,
                                      create_connection):
        has_lock.return_value = True
        admin, owner, user = UserFactory.create_batch(3)
        project = ProjectFactory.create(info={'encryption': {'key': 'abc'}})
        url = '/fileproxy/encrypted/s3/anothertest/%s/file.pdf' % project.id
        task = TaskFactory.create(project=project, info={'url': url})

        signature = signer.dumps({'task_id': task.id})
        req_url = '%s?api_key=%s&task-signature=%s' % (url, user.api_key,
                                                       signature)

        encryption_key = 'testkey'
        aes = AESWithGCM(encryption_key)
        key = self.get_key(create_connection)
        key.get_contents_as_string.return_value = aes.encrypt('the content')
        get_secret.return_value = encryption_key

        with patch.dict(
                self.flask_app.config, {
                    'FILE_ENCRYPTION_KEY': 'another key',
                    'S3_REQUEST_BUCKET': 'test',
                    'ENCRYPTION_CONFIG_PATH': ['encryption']
                }):
            res = self.app.get(req_url, follow_redirects=True)
            assert res.status_code == 200, res.status_code
            assert res.data == 'the content', res.data
示例#8
0
 def test_decrypts_file_from_s3(self, get_contents):
     config = self.default_config.copy()
     config['FILE_ENCRYPTION_KEY'] = 'abcd'
     config['ENABLE_ENCRYPTION'] = True
     cipher = AESWithGCM('abcd')
     get_contents.return_value = cipher.encrypt('hello world')
     with patch.dict(self.flask_app.config, config):
         fp = get_file_from_s3('test_bucket', '/the/key', decrypt=True)
         content = fp.read()
         assert content == 'hello world'
示例#9
0
文件: s3.py 项目: lsuttle/pybossa
def get_content_and_key_from_s3(s3_bucket, path, conn_name=DEFAULT_CONN,
        decrypt=False, secret=None):
    _, key = get_s3_bucket_key(s3_bucket, path, conn_name)
    content = key.get_contents_as_string()
    if decrypt:
        if not secret:
            secret = app.config.get('FILE_ENCRYPTION_KEY')
        cipher = AESWithGCM(secret)
        content = cipher.decrypt(content)
    return content, key
示例#10
0
def get_file_from_s3(s3_bucket, path, conn_name=DEFAULT_CONN, decrypt=False):
    temp_file = NamedTemporaryFile()
    _, key = get_s3_bucket_key(s3_bucket, path, conn_name)
    content = key.get_contents_as_string()
    if decrypt:
        secret = app.config.get('FILE_ENCRYPTION_KEY')
        cipher = AESWithGCM(secret)
        content = cipher.decrypt(content)
    temp_file.write(content)
    temp_file.seek(0)
    return temp_file
示例#11
0
def encrypt_task_response_data(task_id, project_id, data):
    content = None
    task = task_repo.get_task(task_id)
    if not (task and isinstance(task.info, dict)
            and 'private_json__encrypted_payload' in task.info):
        return content

    project = get_project_data(project_id)
    secret = get_encryption_key(project)
    cipher = AESWithGCM(secret)
    content = json.dumps(data)
    content = cipher.encrypt(content.encode('utf8')).decode('utf8')
    return content
示例#12
0
def hdfs_file(project_id, cluster, path):
    if not current_app.config.get('HDFS_CONFIG'):
        raise NotFound('Not Found')
    signature = request.args.get('task-signature')
    if not signature:
        raise Forbidden('No signature')
    size_signature = len(signature)
    if size_signature > TASK_SIGNATURE_MAX_SIZE:
        current_app.logger.exception(
            'Project id {}, cluster {} path {} invalid task signature. Signature length {} exceeds max allowed length {}.' \
                .format(project_id, cluster, path, size_signature, TASK_SIGNATURE_MAX_SIZE))
        raise Forbidden('Invalid signature')

    project = get_project_data(project_id)
    timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL)
    payload = signer.loads(signature, max_age=timeout)
    task_id = payload['task_id']

    try:
        check_allowed(
            current_user.id, task_id, project,
            is_valid_hdfs_url(request.path, request.args.to_dict(flat=False)))
    except Exception:
        current_app.logger.exception(
            'Project id %s not allowed to get file %s %s', project_id, path,
            str(request.args))
        raise

    current_app.logger.info(
        "Project id %s, task id %s. Accessing hdfs cluster %s, path %s",
        project_id, task_id, cluster, path)
    client = HDFSKerberos(**current_app.config['HDFS_CONFIG'][cluster])
    offset = request.args.get('offset')
    length = request.args.get('length')

    try:
        offset = int(offset) if offset else None
        length = int(length) if length else None
        content = client.get('/{}'.format(path), offset=offset, length=length)
        project_encryption = get_project_encryption(project)
        if project_encryption and all(project_encryption.values()):
            secret = get_secret_from_vault(project_encryption)
            cipher = AESWithGCM(secret)
            content = cipher.decrypt(content)
    except Exception:
        current_app.logger.exception(
            "Project id %s, task id %s, cluster %s, get task file %s, %s",
            project_id, task_id, cluster, path, str(request.args))
        raise InternalServerError('An Error Occurred')

    return Response(content)
示例#13
0
def encrypted_task_payload(project_id, task_id):
    """Proxy to decrypt encrypted task payload"""
    current_app.logger.info(
        'Project id {}, task id {}, decrypt task payload.'.format(
            project_id, task_id))
    signature = request.args.get('task-signature')
    if not signature:
        current_app.logger.exception(
            'Project id {}, task id {} has no signature.'.format(
                project_id, task_id))
        raise Forbidden('No signature')

    size_signature = len(signature)
    if size_signature > TASK_SIGNATURE_MAX_SIZE:
        current_app.logger.exception(
            'Project id {}, task id {} invalid task signature. Signature length {} exceeds max allowed length {}.' \
                .format(project_id, task_id, size_signature, TASK_SIGNATURE_MAX_SIZE))
        raise Forbidden('Invalid signature')

    project = get_project_data(project_id)
    if not project:
        current_app.logger.exception('Invalid project id {}.'.format(
            project_id, task_id))
        raise BadRequest('Invalid Project')

    timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL)

    payload = signer.loads(signature, max_age=timeout)
    task_id = payload.get('task_id', 0)

    validate_task(project, task_id, current_user.id)

    ## decrypt encrypted task data under private_json__encrypted_payload
    try:
        secret = get_encryption_key(project)
        task = task_repo.get_task(task_id)
        content = task.info.get('private_json__encrypted_payload')
        if content:
            cipher = AESWithGCM(secret)
            content = cipher.decrypt(content)
        else:
            content = ''
    except Exception as e:
        current_app.logger.exception(
            'Project id {} task {} decrypt encrypted data {}'.format(
                project_id, task_id, e))
        raise InternalServerError('An Error Occurred')

    response = Response(content, content_type='application/json')
    return response
示例#14
0
 def test_count_tasks_encrypted(self, s3_get):
     k = Mock()
     s3_get.return_value = '', k
     cont = 'req\n1'
     cipher = AESWithGCM('abcd')
     k.get_contents_as_string.return_value = cipher.encrypt(cont)
     config = {
         'S3_IMPORT_BUCKET': 'aadf',
         'FILE_ENCRYPTION_KEY': 'abcd',
         'ENABLE_ENCRYPTION': True
     }
     with patch.dict(self.flask_app.config, config):
         number_of_tasks = self.importer.count_tasks()
         assert number_of_tasks is 1, number_of_tasks
示例#15
0
    def test_taskrun_with_encrypted_payload(self, encr_key, upload_from_string,
                                            set_content):
        with patch.dict(self.flask_app.config, self.patch_config):
            project = ProjectFactory.create()
            encryption_key = 'testkey'
            encr_key.return_value = encryption_key
            aes = AESWithGCM(encryption_key)
            content = 'some data'
            encrypted_content = aes.encrypt(content)
            task = TaskFactory.create(
                project=project,
                info={'private_json__encrypted_payload': encrypted_content})
            self.app.get('/api/project/%s/newtask?api_key=%s' %
                         (project.id, project.owner.api_key))

            taskrun_data = {'another_field': 42}
            data = dict(project_id=project.id,
                        task_id=task.id,
                        info=taskrun_data)
            datajson = json.dumps(data)
            url = '/api/taskrun?api_key=%s' % project.owner.api_key

            success = self.app.post(url, data=datajson)

            assert success.status_code == 200, success.data
            set_content.assert_called()
            res = json.loads(success.data)
            assert len(res['info']) == 2
            encrypted_response = res['info'][
                'private_json__encrypted_response']
            decrypted_content = aes.decrypt(encrypted_response)
            assert decrypted_content == json.dumps(
                taskrun_data
            ), "private_json__encrypted_response decrypted data mismatch"
            url = res['info']['pyb_answer_url']
            args = {
                'host': self.host,
                'bucket': self.bucket,
                'project_id': project.id,
                'task_id': task.id,
                'user_id': project.owner.id,
                'filename': 'pyb_answer.json'
            }
            expected = 'https://{host}/{bucket}/{project_id}/{task_id}/{user_id}/{filename}'.format(
                **args)
            assert url == expected, url
示例#16
0
    def test_proxy_regular_user_has_lock(self, http_get):
        res = MagicMock()
        res.json.return_value = {'key': 'testkey'}
        http_get.return_value = res

        admin, owner, user = UserFactory.create_batch(3)
        project = ProjectFactory.create(
            owner=owner, info={'ext_config': {
                'encryption': {
                    'key_id': 123
                }
            }})

        encryption_key = 'testkey'
        aes = AESWithGCM(encryption_key)
        content = json.dumps(dict(a=1, b="2"))
        encrypted_content = aes.encrypt(content)
        task = TaskFactory.create(
            project=project,
            info={'private_json__encrypted_payload': encrypted_content})

        signature = signer.dumps({'task_id': task.id})
        url = '/fileproxy/encrypted/taskpayload/%s/%s?api_key=%s&task-signature=%s' \
            % (project.id, task.id, user.api_key, signature)

        with patch('pybossa.view.fileproxy.has_lock') as has_lock:
            has_lock.return_value = True
            with patch.dict(self.flask_app.config, self.app_config):
                res = self.app.get(url, follow_redirects=True)
                assert res.status_code == 200, res.status_code
                assert res.data == content, res.data

        with patch('pybossa.view.fileproxy.has_lock') as has_lock:
            has_lock.return_value = False
            with patch.dict(self.flask_app.config, self.app_config):
                res = self.app.get(url, follow_redirects=True)
                assert res.status_code == 403, res.status_code

        # coowner can access the task
        project.owners_ids.append(user.id)
        with patch('pybossa.view.fileproxy.has_lock') as has_lock:
            has_lock.return_value = False
            with patch.dict(self.flask_app.config, self.app_config):
                res = self.app.get(url, follow_redirects=True)
                assert res.status_code == 200, res.status_code
示例#17
0
def encrypted_file(store, bucket, project_id, path):
    """Proxy encrypted task file in a cloud storage"""
    current_app.logger.info('Project id {} decrypt file. {}'.format(
        project_id, path))
    conn_args = current_app.config.get('S3_TASK_REQUEST', {})
    signature = request.args.get('task-signature')
    if not signature:
        current_app.logger.exception('Project id {} no signature {}'.format(
            project_id, path))
        raise Forbidden('No signature')

    project = get_project_data(project_id)
    timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL)

    payload = signer.loads(signature, max_age=timeout)
    task_id = payload['task_id']

    check_allowed(current_user.id, task_id, project, request.path)

    ## download file
    try:
        key = '/{}/{}'.format(project_id, path)
        conn = create_connection(**conn_args)
        _bucket = conn.get_bucket(bucket, validate=False)
        _key = _bucket.get_key(key, validate=False)
        content = _key.get_contents_as_string()
    except S3ResponseError as e:
        current_app.logger.exception(
            'Project id {} get task file {} {}'.format(project_id, path, e))
        if e.error_code == 'NoSuchKey':
            raise NotFound('File Does Not Exist')
        else:
            raise InternalServerError('An Error Occurred')

    ## decyrpt file
    secret = current_app.config.get('FILE_ENCRYPTION_KEY')
    cipher = AESWithGCM(secret)
    decrypted = cipher.decrypt(content)

    response = Response(decrypted, content_type=_key.content_type)
    response.headers.add('Content-Encoding', _key.content_encoding)
    response.headers.add('Content-Disposition', _key.content_disposition)
    return response
示例#18
0
    def test_proxy_admin(self, create_connection):
        admin, owner = UserFactory.create_batch(2)
        project = ProjectFactory.create(owner=owner)
        url = '/fileproxy/encrypted/s3/test/%s/file.pdf' % project.id
        task = TaskFactory.create(project=project, info={'url': url})

        signature = signer.dumps({'task_id': task.id})
        req_url = '%s?api_key=%s&task-signature=%s' % (url, admin.api_key,
                                                       signature)

        encryption_key = 'testkey'
        aes = AESWithGCM(encryption_key)
        key = self.get_key(create_connection)
        key.get_contents_as_string.return_value = aes.encrypt('the content')

        with patch.dict(self.flask_app.config,
                        {'FILE_ENCRYPTION_KEY': encryption_key}):
            res = self.app.get(req_url, follow_redirects=True)
            assert res.status_code == 200, res.status_code
            assert res.data == 'the content', res.data
示例#19
0
 def setUp(self):
     iv_length = 12
     tag_length = 16
     secret = bytearray('very secret', 'ascii')
     self.aes = AESWithGCM(secret, iv_length, tag_length)
示例#20
0
 def test_aes_2(self):
     original = 'this is a test string I plan to encrypt'
     encrypted = 'DMj4/yC2pgzgAg76TApmk7zVZlaG0B47KASCnS/TqH6fQpA9UaHjmGLHqCfvGVVQcSivX76Oy349QivZjOJ2yfXZRb0='
     secret = bytearray('this is my super secret key', 'ascii')
     aes = AESWithGCM(secret)
     assert aes.decrypt(encrypted) == original
示例#21
0
    def test_taskrun_with_upload(self, upload_from_string, set_content):
        with patch.dict(self.flask_app.config, self.patch_config):
            project = ProjectFactory.create()
            task = TaskFactory.create(project=project)
            self.app.get('/api/project/%s/newtask?api_key=%s' %
                         (project.id, project.owner.api_key))

            data = dict(project_id=project.id,
                        task_id=task.id,
                        info={
                            'test__upload_url': {
                                'filename': 'hello.txt',
                                'content': 'abc'
                            },
                            'another_field': 42
                        })
            datajson = json.dumps(data)
            url = '/api/taskrun?api_key=%s' % project.owner.api_key

            success = self.app.post(url, data=datajson)

            assert success.status_code == 200, success.data
            set_content.assert_called()
            res = json.loads(success.data)
            assert len(res['info']) == 1
            url = res['info']['pyb_answer_url']
            args = {
                'host': self.host,
                'bucket': self.bucket,
                'project_id': project.id,
                'task_id': task.id,
                'user_id': project.owner.id,
                'filename': 'pyb_answer.json'
            }
            expected = 'https://{host}/{bucket}/{project_id}/{task_id}/{user_id}/{filename}'.format(
                **args)
            assert url == expected, url

            aes = AESWithGCM('testkey')
            # first call
            first_call = set_content.call_args_list[0]
            args, kwargs = first_call
            encrypted = args[0].read()
            content = aes.decrypt(encrypted)
            assert encrypted != content
            assert content == 'abc'

            upload_from_string.assert_called()
            args, kwargs = set_content.call_args
            content = aes.decrypt(args[0].read())
            actual_content = json.loads(content)

            args = {
                'host': self.host,
                'bucket': self.bucket,
                'project_id': project.id,
                'task_id': task.id,
                'user_id': project.owner.id,
                'filename': 'hello.txt'
            }
            expected = 'https://{host}/{bucket}/{project_id}/{task_id}/{user_id}/{filename}'.format(
                **args)
            assert actual_content['test__upload_url'] == expected
            assert actual_content['another_field'] == 42