Exemplo n.º 1
0
def test_replicate_file(app):
    """Replicating a file initiates copy operations from regions where the file
    exists to regions where it does not."""
    with app.app_context():
        file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1'])
        make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA)
        make_bucket(app, 'us-west-2', 'tt-usw2')
        grooming.replicate_file(app.db.session('relengapi'), file)
    assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2'])
    assert key_exists(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST))
    assert key_exists(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST))
Exemplo n.º 2
0
def test_replicate_file(app):
    """Replicating a file initiates copy operations from regions where the file
    exists to regions where it does not."""
    with app.app_context():
        file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1'])
        make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA)
        make_bucket(app, 'us-west-2', 'tt-usw2')
        grooming.replicate_file(app.db.session('relengapi'), file)
    assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2'])
    assert key_exists(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST))
    assert key_exists(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST))
Exemplo n.º 3
0
def test_replicate_file_already_exists(app):
    """If a target object already exists in S3 during replication, it is
    deleted rather than being trusted to be correct."""
    with app.app_context():
        file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1'])
        make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA)
        make_key(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST), "BAD")
        grooming.replicate_file(app.db.session('relengapi'), file)
    assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2'])
    assert key_exists(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST))
    k = key_exists(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST))
    eq_(k.get_contents_as_string(), DATA)  # not "BAD"
Exemplo n.º 4
0
def test_replicate_file_already_exists(app):
    """If a target object already exists in S3 during replication, it is
    deleted rather than being trusted to be correct."""
    with app.app_context():
        file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1'])
        make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA)
        make_key(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST), "BAD")
        grooming.replicate_file(app.db.session('relengapi'), file)
    assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2'])
    assert key_exists(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST))
    k = key_exists(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST))
    eq_(k.get_contents_as_string(), DATA)  # not "BAD"
Exemplo n.º 5
0
def replicate_file(session, file):
    config = current_app.config['TOOLTOOL_REGIONS']
    regions = set(config)
    file_regions = set([i.region for i in file.instances])
    # only use configured source regions; if a region is removed
    # from the configuration, we can't copy from it.
    source_regions = file_regions & regions
    if not source_regions:
        # this should only happen when the only region containing a
        # file is removed from the configuration
        log.warning("no source regions for {}".format(file.sha512))
        return
    source_region = source_regions.pop()
    source_bucket = config[source_region]
    target_regions = regions - file_regions
    log.info("replicating {} from {} to {}".format(
        file.sha512, source_region, ', '.join(target_regions)))

    key_name = util.keyname(file.sha512)
    for target_region in target_regions:
        target_bucket = config[target_region]
        conn = current_app.aws.connect_to('s3', target_region)
        bucket = conn.get_bucket(target_bucket)

        # commit the session before replicating, since the DB connection may
        # otherwise go away while we're distracted.
        session.commit()
        bucket.copy_key(new_key_name=key_name,
                        src_key_name=key_name,
                        src_bucket_name=source_bucket,
                        storage_class='STANDARD',
                        preserve_acl=False)
        session.add(tables.FileInstance(file=file, region=target_region))
        session.commit()
Exemplo n.º 6
0
def check_pending_upload(session, pu, _test_shim=lambda: None):
    # we can check the upload any time between the expiration of the URL
    # (after which the user can't make any more changes, but the upload
    # may yet be incomplete) and 1 day afterward (ample time for the upload
    # to complete)
    sha512 = pu.file.sha512
    size = pu.file.size

    log = logger.bind(tooltool_sha512=sha512)

    if time.now() < pu.expires:
        # URL is not expired yet
        return
    elif time.now() > pu.expires + timedelta(days=1):
        # Upload will probably never complete
        log.info(
            "Deleting abandoned pending upload for {}".format(sha512))
        session.delete(pu)
        return

    # connect and see if the file exists..
    s3 = current_app.aws.connect_to('s3', pu.region)
    cfg = current_app.config.get('TOOLTOOL_REGIONS')
    if not cfg or pu.region not in cfg:
        log.warning("Pending upload for {} was to an un-configured "
                    "region".format(sha512))
        session.delete(pu)
        return

    bucket = s3.get_bucket(cfg[pu.region], validate=False)
    key = bucket.get_key(util.keyname(sha512))
    if not key:
        # not uploaded yet
        return

    # commit the session before verifying the file instance, since the
    # DB connection may otherwise go away while we're distracted.
    session.commit()
    _test_shim()

    if not verify_file_instance(sha512, size, key):
        log.warning(
            "Upload of {} was invalid; deleting key".format(sha512))
        key.delete()
        session.delete(pu)
        session.commit()
        return

    log.info("Upload of {} considered valid".format(sha512))
    # add a file instance, but it's OK if it already exists
    try:
        tables.FileInstance(file=pu.file, region=pu.region)
        session.commit()
    except sa.exc.IntegrityError:
        session.rollback()

    # and delete the pending upload
    session.delete(pu)
    session.commit()
Exemplo n.º 7
0
def check_pending_upload(session, pu, _test_shim=lambda: None):
    # we can check the upload any time between the expiration of the URL
    # (after which the user can't make any more changes, but the upload
    # may yet be incomplete) and 1 day afterward (ample time for the upload
    # to complete)
    sha512 = pu.file.sha512
    size = pu.file.size

    log = logger.bind(tooltool_sha512=sha512, mozdef=True)

    if time.now() < pu.expires:
        # URL is not expired yet
        return
    elif time.now() > pu.expires + timedelta(days=1):
        # Upload will probably never complete
        log.info(
            "Deleting abandoned pending upload for {}".format(sha512))
        session.delete(pu)
        return

    # connect and see if the file exists..
    s3 = current_app.aws.connect_to('s3', pu.region)
    cfg = current_app.config.get('TOOLTOOL_REGIONS')
    if not cfg or pu.region not in cfg:
        log.warning("Pending upload for {} was to an un-configured "
                    "region".format(sha512))
        session.delete(pu)
        return

    bucket = s3.get_bucket(cfg[pu.region], validate=False)
    key = bucket.get_key(util.keyname(sha512))
    if not key:
        # not uploaded yet
        return

    # commit the session before verifying the file instance, since the
    # DB connection may otherwise go away while we're distracted.
    session.commit()
    _test_shim()

    if not verify_file_instance(sha512, size, key):
        log.warning(
            "Upload of {} was invalid; deleting key".format(sha512))
        key.delete()
        session.delete(pu)
        session.commit()
        return

    log.info("Upload of {} considered valid".format(sha512))
    # add a file instance, but it's OK if it already exists
    try:
        tables.FileInstance(file=pu.file, region=pu.region)
        session.commit()
    except sa.exc.IntegrityError:
        session.rollback()

    # and delete the pending upload
    session.delete(pu)
    session.commit()
def add_file_to_s3(app, content, region='us-east-1'):
    with app.app_context():
        conn = app.aws.connect_to('s3', region)
        bucket_name = cfg['TOOLTOOL_REGIONS'][region]
        try:
            conn.head_bucket(bucket_name)
        except boto.exception.S3ResponseError:
            conn.create_bucket(bucket_name)
        bucket = conn.get_bucket(bucket_name)
        key_name = util.keyname(hashlib.sha512(content).hexdigest())
        key = bucket.new_key(key_name)
        key.set_contents_from_string(content)
Exemplo n.º 9
0
def add_file_to_s3(app, content, region='us-east-1'):
    with app.app_context():
        conn = app.aws.connect_to('s3', region)
        bucket_name = cfg['TOOLTOOL_REGIONS'][region]
        try:
            conn.head_bucket(bucket_name)
        except boto.exception.S3ResponseError:
            conn.create_bucket(bucket_name)
        bucket = conn.get_bucket(bucket_name)
        key_name = util.keyname(hashlib.sha512(content).hexdigest())
        key = bucket.new_key(key_name)
        key.set_contents_from_string(content)
Exemplo n.º 10
0
def test_delete_instances_success(app, client):
    """A PATCH with op=delete_instances deletes its instances."""
    add_file_to_db(app, ONE, regions=["us-east-1"])
    add_file_to_s3(app, ONE, region="us-east-1")
    resp = do_patch(client, "sha512", ONE_DIGEST, [{"op": "delete_instances"}])
    assert_file_response(resp, ONE, instances=[])
    with app.app_context():
        # ensure instances are gone from the DB
        f = tables.File.query.first()
        eq_(f.instances, [])

        # and from S3
        conn = app.aws.connect_to("s3", "us-east-1")
        key = conn.get_bucket("tt-use1").get_key(util.keyname(ONE_DIGEST))
        assert not key, "key still exists"
Exemplo n.º 11
0
def test_delete_instances_success(app, client):
    """A PATCH with op=delete_instances deletes its instances."""
    add_file_to_db(app, ONE, regions=['us-east-1'])
    add_file_to_s3(app, ONE, region='us-east-1')
    resp = do_patch(client, 'sha512', ONE_DIGEST, [{'op': 'delete_instances'}])
    assert_file_response(resp, ONE, instances=[])
    with app.app_context():
        # ensure instances are gone from the DB
        f = tables.File.query.first()
        eq_(f.instances, [])

        # and from S3
        conn = app.aws.connect_to('s3', 'us-east-1')
        key = conn.get_bucket('tt-use1').get_key(util.keyname(ONE_DIGEST))
        assert not key, "key still exists"
Exemplo n.º 12
0
def download_file(digest, region=None):
    """Fetch a link to the file with the given sha512 digest.  The response
    is a 302 redirect to a signed download URL.

    The query argument ``region=us-west-1`` indicates a preference for a URL in
    that region, although if the file is not available in tht region then a URL
    from another region may be returned."""
    log = logger.bind(tooltool_sha512=digest, tooltool_operation='download')
    if not is_valid_sha512(digest):
        raise BadRequest("Invalid sha512 digest")

    # see where the file is..
    tbl = tables.File
    file_row = tbl.query.filter(tbl.sha512 == digest).first()
    if not file_row or not file_row.instances:
        raise NotFound

    # check visibility
    allow_pub_dl = current_app.config.get(
        'TOOLTOOL_ALLOW_ANONYMOUS_PUBLIC_DOWNLOAD')
    if file_row.visibility != 'public' or not allow_pub_dl:
        if not p.get('tooltool.download.{}'.format(file_row.visibility)).can():
            raise Forbidden

    # figure out which region to use, and from there which bucket
    cfg = current_app.config['TOOLTOOL_REGIONS']
    selected_region = None
    for inst in file_row.instances:
        if inst.region == region:
            selected_region = inst.region
            break
    else:
        # preferred region not found, so pick one from the available set
        selected_region = random.choice(
            [inst.region for inst in file_row.instances])
    bucket = cfg[selected_region]

    key = util.keyname(digest)

    s3 = current_app.aws.connect_to('s3', selected_region)
    log.info("generating signed S3 GET URL for {}.. expiring in {}s".format(
        digest[:10], GET_EXPIRES_IN))
    signed_url = s3.generate_url(method='GET',
                                 expires_in=GET_EXPIRES_IN,
                                 bucket=bucket,
                                 key=key)

    return redirect(signed_url)
Exemplo n.º 13
0
def assert_signed_url(url, digest, method="GET", region=None, expires_in=60, bucket=None):
    region = region or "us-east-1"
    bucket = bucket or cfg["TOOLTOOL_REGIONS"][region]
    if region == "us-east-1":
        host = "{}.s3.amazonaws.com".format(bucket)
    else:
        host = "{}.s3-{}.amazonaws.com".format(bucket, region)
    url = urlparse.urlparse(url)
    eq_(url.scheme, "https")
    eq_(url.netloc, host)
    eq_(url.path, "/" + util.keyname(digest))
    query = urlparse.parse_qs(url.query)
    assert "Signature" in query
    # sadly, headers are not represented in the URL
    eq_(query["AWSAccessKeyId"][0], "aa")
    eq_(int(query["Expires"][0]), time.time() + expires_in)
Exemplo n.º 14
0
def test_replicate_file_race(app):
    """If, while replicating a file, another replication completes and the
    subsequent database insert fails, the replication function nonetheless
    succeeds."""
    with app.app_context():
        file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1'])
        make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA)
        make_bucket(app, 'us-west-2', 'tt-usw2')

        def test_shim():
            session = app.db.session('relengapi')
            session.add(tables.FileInstance(file=file, region='us-west-2'))
            session.commit()
        grooming.replicate_file(app.db.session('relengapi'), file,
                                _test_shim=test_shim)
    assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2'])
Exemplo n.º 15
0
def assert_signed_url(url, digest, method='GET', region=None,
                      expires_in=60, bucket=None):
    region = region or 'us-east-1'
    bucket = bucket or cfg['TOOLTOOL_REGIONS'][region]
    if region == 'us-east-1':
        host = '{}.s3.amazonaws.com'.format(bucket)
    else:
        host = '{}.s3-{}.amazonaws.com'.format(bucket, region)
    url = urlparse.urlparse(url)
    eq_(url.scheme, 'https')
    eq_(url.netloc, host)
    eq_(url.path, '/' + util.keyname(digest))
    query = urlparse.parse_qs(url.query)
    assert 'Signature' in query
    # sadly, headers are not represented in the URL
    eq_(query['AWSAccessKeyId'][0], 'aa')
    eq_(int(query['Expires'][0]), time.time() + expires_in)
Exemplo n.º 16
0
def assert_signed_url(url, digest, method='GET', region=None,
                      expires_in=60, bucket=None):
    region = region or 'us-east-1'
    bucket = bucket or cfg['TOOLTOOL_REGIONS'][region]
    if region == 'us-east-1':
        host = '{}.s3.amazonaws.com'.format(bucket)
    else:
        host = '{}.s3-{}.amazonaws.com'.format(bucket, region)
    url = urlparse.urlparse(url)
    eq_(url.scheme, 'https')
    eq_(url.netloc, host)
    eq_(url.path, '/' + util.keyname(digest))
    query = urlparse.parse_qs(url.query)
    assert 'Signature' in query
    # sadly, headers are not represented in the URL
    eq_(query['AWSAccessKeyId'][0], 'aa')
    eq_(int(query['Expires'][0]), time.time() + expires_in)
Exemplo n.º 17
0
def test_replicate_file_race(app):
    """If, while replicating a file, another replication completes and the
    subsequent database insert fails, the replication function nonetheless
    succeeds."""
    with app.app_context():
        file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1'])
        make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA)
        make_bucket(app, 'us-west-2', 'tt-usw2')

        def test_shim():
            session = app.db.session('relengapi')
            session.add(tables.FileInstance(file=file, region='us-west-2'))
            session.commit()

        grooming.replicate_file(app.db.session('relengapi'),
                                file,
                                _test_shim=test_shim)
    assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2'])
Exemplo n.º 18
0
def download_file(digest, region=None):
    """Fetch a link to the file with the given sha512 digest.  The response
    is a 302 redirect to a signed download URL.

    The query argument ``region=us-west-1`` indicates a preference for a URL in
    that region, although if the file is not available in tht region then a URL
    from another region may be returned."""
    log = logger.bind(tooltool_sha512=digest, tooltool_operation='download')
    if not is_valid_sha512(digest):
        raise BadRequest("Invalid sha512 digest")

    # see where the file is..
    tbl = tables.File
    file_row = tbl.query.filter(tbl.sha512 == digest).first()
    if not file_row or not file_row.instances:
        raise NotFound

    # check visibility
    allow_pub_dl = current_app.config.get('TOOLTOOL_ALLOW_ANONYMOUS_PUBLIC_DOWNLOAD')
    if file_row.visibility != 'public' or not allow_pub_dl:
        if not p.get('tooltool.download.{}'.format(file_row.visibility)).can():
            raise Forbidden

    # figure out which region to use, and from there which bucket
    cfg = current_app.config['TOOLTOOL_REGIONS']
    selected_region = None
    for inst in file_row.instances:
        if inst.region == region:
            selected_region = inst.region
            break
    else:
        # preferred region not found, so pick one from the available set
        selected_region = random.choice([inst.region for inst in file_row.instances])
    bucket = cfg[selected_region]

    key = util.keyname(digest)

    s3 = current_app.aws.connect_to('s3', selected_region)
    log.info("generating signed S3 GET URL for {}.. expiring in {}s".format(
        digest[:10], GET_EXPIRES_IN))
    signed_url = s3.generate_url(
        method='GET', expires_in=GET_EXPIRES_IN, bucket=bucket, key=key)

    return redirect(signed_url)
Exemplo n.º 19
0
def patch_file(digest, body):
    """Make administrative changes to an existing file.  The body is a list of
    changes to apply, each represented by a JSON object.

    The object ``{"op": "delete_instances"}`` will cause all instances of the
    file to be deleted.  The file record itself will not be deleted, as it is
    still a part of one or more upload batches, but until and unless someone
    uploads a new copy, the content will not be available for download.

    If the change has op ``"set_visibility"``, then the file's visibility will
    be set to the value given by the change's ``visibility`` attribute.  For
    example, ``{"op": "set_visibility", "visibility": "internal"}`` will mark a
    file as "internal" after someone has accidentally uploaded it with public
    visibility.

    The returned File instance contains an ``instances`` attribute showing any
    changes."""
    session = current_app.db.session('relengapi')
    file = session.query(
        tables.File).filter(tables.File.sha512 == digest).first()
    if not file:
        raise NotFound

    for change in body:
        if 'op' not in change:
            raise BadRequest("no op")
        if change['op'] == 'delete_instances':
            key_name = util.keyname(digest)
            cfg = current_app.config.get('TOOLTOOL_REGIONS')
            for i in file.instances:
                conn = current_app.aws.connect_to('s3', i.region)
                bucket = conn.get_bucket(cfg[i.region])
                bucket.delete_key(key_name)
                session.delete(i)
        elif change['op'] == 'set_visibility':
            if change['visibility'] not in ('internal', 'public'):
                raise BadRequest("bad visibility level")
            file.visibility = change['visibility']
        else:
            raise BadRequest("unknown op")
    session.commit()
    return file.to_json(include_instances=True)
Exemplo n.º 20
0
def patch_file(digest, body):
    """Make administrative changes to an existing file.  The body is a list of
    changes to apply, each represented by a JSON object.

    The object ``{"op": "delete_instances"}`` will cause all instances of the
    file to be deleted.  The file record itself will not be deleted, as it is
    still a part of one or more upload batches, but until and unless someone
    uploads a new copy, the content will not be available for download.

    If the change has op ``"set_visibility"``, then the file's visibility will
    be set to the value given by the change's ``visibility`` attribute.  For
    example, ``{"op": "set_visibility", "visibility": "internal"}`` will mark a
    file as "internal" after someone has accidentally uploaded it with public
    visibility.

    The returned File instance contains an ``instances`` attribute showing any
    changes."""
    session = current_app.db.session('relengapi')
    file = session.query(tables.File).filter(tables.File.sha512 == digest).first()
    if not file:
        raise NotFound

    for change in body:
        if 'op' not in change:
            raise BadRequest("no op")
        if change['op'] == 'delete_instances':
            key_name = util.keyname(digest)
            cfg = current_app.config.get('TOOLTOOL_REGIONS')
            for i in file.instances:
                conn = current_app.aws.connect_to('s3', i.region)
                bucket = conn.get_bucket(cfg[i.region])
                bucket.delete_key(key_name)
                session.delete(i)
        elif change['op'] == 'set_visibility':
            if change['visibility'] not in ('internal', 'public'):
                raise BadRequest("bad visibility level")
            file.visibility = change['visibility']
        else:
            raise BadRequest("unknown op")
    session.commit()
    return file.to_json(include_instances=True)
Exemplo n.º 21
0
def replicate_file(session, file, _test_shim=lambda: None):
    log = logger.bind(tooltool_sha512=file.sha512)
    config = current_app.config["TOOLTOOL_REGIONS"]
    regions = set(config)
    file_regions = set([i.region for i in file.instances])
    # only use configured source regions; if a region is removed
    # from the configuration, we can't copy from it.
    source_regions = file_regions & regions
    if not source_regions:
        # this should only happen when the only region containing a
        # file is removed from the configuration
        log.warning("no source regions for {}".format(file.sha512))
        return
    source_region = source_regions.pop()
    source_bucket = config[source_region]
    target_regions = regions - file_regions
    log.info("replicating {} from {} to {}".format(file.sha512, source_region, ", ".join(target_regions)))

    key_name = util.keyname(file.sha512)
    for target_region in target_regions:
        target_bucket = config[target_region]
        conn = current_app.aws.connect_to("s3", target_region)
        bucket = conn.get_bucket(target_bucket)

        # commit the session before replicating, since the DB connection may
        # otherwise go away while we're distracted.
        session.commit()
        _test_shim()
        bucket.copy_key(
            new_key_name=key_name,
            src_key_name=key_name,
            src_bucket_name=source_bucket,
            storage_class="STANDARD",
            preserve_acl=False,
        )
        try:
            session.add(tables.FileInstance(file=file, region=target_region))
            session.commit()
        except sa.exc.IntegrityError:
            session.rollback()
Exemplo n.º 22
0
def replicate_file(session, file, _test_shim=lambda: None):
    log = logger.bind(tooltool_sha512=file.sha512, mozdef=True)
    config = current_app.config['TOOLTOOL_REGIONS']
    regions = set(config)
    file_regions = set([i.region for i in file.instances])
    # only use configured source regions; if a region is removed
    # from the configuration, we can't copy from it.
    source_regions = file_regions & regions
    if not source_regions:
        # this should only happen when the only region containing a
        # file is removed from the configuration
        log.warning("no source regions for {}".format(file.sha512))
        return
    source_region = source_regions.pop()
    source_bucket = config[source_region]
    target_regions = regions - file_regions
    log.info("replicating {} from {} to {}".format(file.sha512, source_region,
                                                   ', '.join(target_regions)))

    key_name = util.keyname(file.sha512)
    for target_region in target_regions:
        target_bucket = config[target_region]
        conn = current_app.aws.connect_to('s3', target_region)
        bucket = conn.get_bucket(target_bucket)

        # commit the session before replicating, since the DB connection may
        # otherwise go away while we're distracted.
        session.commit()
        _test_shim()
        bucket.copy_key(new_key_name=key_name,
                        src_key_name=key_name,
                        src_bucket_name=source_bucket,
                        storage_class='STANDARD',
                        preserve_acl=False)
        try:
            session.add(tables.FileInstance(file=file, region=target_region))
            session.commit()
        except sa.exc.IntegrityError:
            session.rollback()
Exemplo n.º 23
0
def upload_batch(region=None, body=None):
    """Create a new upload batch.  The response object will contain a
    ``put_url`` for each file which needs to be uploaded -- which may not be
    all!  The caller is then responsible for uploading to those URLs.  The
    resulting signed URLs are valid for one hour, so uploads should begin
    within that timeframe.  Consider using Amazon's MD5-verification
    capabilities to ensure that the uploaded files are transferred correctly,
    although the tooltool server will verify the integrity anyway.  The
    upload must have the header ``Content-Type: application/octet-stream```.

    The query argument ``region=us-west-1`` indicates a preference for URLs
    in that region, although if the region is not available then URLs in
    other regions may be returned.

    The returned URLs are only valid for 60 seconds, so all upload requests
    must begin within that timeframe.  Clients should therefore perform all
    uploads in parallel, rather than sequentially.  This limitation is in
    place to prevent malicious modification of files after they have been
    verified."""
    region, bucket = get_region_and_bucket(region)

    if not body.message:
        raise BadRequest("message must be non-empty")

    if not body.files:
        raise BadRequest("a batch must include at least one file")

    if body.author:
        raise BadRequest("Author must not be specified for upload")
    try:
        body.author = current_user.authenticated_email
    except AttributeError:
        raise BadRequest("Could not determine authenticated username")

    # verify permissions based on visibilities
    visibilities = set(f.visibility for f in body.files.itervalues())
    for v in visibilities:
        prm = p.get('tooltool.upload.{}'.format(v))
        if not prm or not prm.can():
            raise Forbidden("no permission to upload {} files".format(v))

    session = g.db.session('relengapi')
    batch = tables.Batch(
        uploaded=time.now(),
        author=body.author,
        message=body.message)

    s3 = current_app.aws.connect_to('s3', region)
    for filename, info in body.files.iteritems():
        log = logger.bind(tooltool_sha512=info.digest, tooltool_operation='upload',
                          tooltool_batch_id=batch.id)
        if info.algorithm != 'sha512':
            raise BadRequest("'sha512' is the only allowed digest algorithm")
        if not is_valid_sha512(info.digest):
            raise BadRequest("Invalid sha512 digest")
        digest = info.digest
        file = tables.File.query.filter(tables.File.sha512 == digest).first()
        if file and file.visibility != info.visibility:
            raise BadRequest("Cannot change a file's visibility level")
        if file and file.instances != []:
            if file.size != info.size:
                raise BadRequest("Size mismatch for {}".format(filename))
        else:
            if not file:
                file = tables.File(
                    sha512=digest,
                    visibility=info.visibility,
                    size=info.size)
                session.add(file)
            log.info("generating signed S3 PUT URL to {} for {}; expiring in {}s".format(
                info.digest[:10], current_user, UPLOAD_EXPIRES_IN))
            info.put_url = s3.generate_url(
                method='PUT', expires_in=UPLOAD_EXPIRES_IN, bucket=bucket,
                key=util.keyname(info.digest),
                headers={'Content-Type': 'application/octet-stream'})
            # The PendingUpload row needs to reflect the updated expiration
            # time, even if there's an existing pending upload that expires
            # earlier.  The `merge` method does a SELECT and then either UPDATEs
            # or INSERTs the row.  However, merge needs the file_id, rather than
            # just a reference to the file object; and for that, we need to flush
            # the inserted file.
            session.flush()
            pu = tables.PendingUpload(
                file_id=file.id,
                region=region,
                expires=time.now() + datetime.timedelta(seconds=UPLOAD_EXPIRES_IN))
            session.merge(pu)
        session.add(tables.BatchFile(filename=filename, file=file, batch=batch))
    session.add(batch)
    session.commit()

    body.id = batch.id
    return body
Exemplo n.º 24
0
import boto
import mock
import moto
from flask import current_app
from nose.tools import eq_

from relengapi.blueprints.tooltool import grooming
from relengapi.blueprints.tooltool import tables
from relengapi.blueprints.tooltool import util
from relengapi.lib import time
from relengapi.lib.testing.context import TestContext

DATA = os.urandom(10240)
DATA_DIGEST = hashlib.sha512(DATA).hexdigest()
DATA_KEY = util.keyname(DATA_DIGEST)

NOW = 1425592922

cfg = {
    'AWS': {
        'access_key_id': 'aa',
        'secret_access_key': 'ss',
    },
    'TOOLTOOL_REGIONS': {
        'us-east-1': 'tt-use1',
        'us-west-2': 'tt-usw2',
    }
}
test_context = TestContext(config=cfg, databases=['relengapi'])
Exemplo n.º 25
0
def test_keyname():
    eq_(util.keyname(ONE_DIGEST), 'sha512/' + ONE_DIGEST)
Exemplo n.º 26
0
def upload_batch(region=None, body=None):
    """Create a new upload batch.  The response object will contain a
    ``put_url`` for each file which needs to be uploaded -- which may not be
    all!  The caller is then responsible for uploading to those URLs.  The
    resulting signed URLs are valid for one hour, so uploads should begin
    within that timeframe.  Consider using Amazon's MD5-verification
    capabilities to ensure that the uploaded files are transferred correctly,
    although the tooltool server will verify the integrity anyway.  The
    upload must have the header ``Content-Type: application/octet-stream```.

    The query argument ``region=us-west-1`` indicates a preference for URLs
    in that region, although if the region is not available then URLs in
    other regions may be returned.

    The returned URLs are only valid for 60 seconds, so all upload requests
    must begin within that timeframe.  Clients should therefore perform all
    uploads in parallel, rather than sequentially.  This limitation is in
    place to prevent malicious modification of files after they have been
    verified."""
    region, bucket = get_region_and_bucket(region)

    if not body.message:
        raise BadRequest("message must be non-empty")

    if not body.files:
        raise BadRequest("a batch must include at least one file")

    if body.author:
        raise BadRequest("Author must not be specified for upload")
    try:
        body.author = current_user.authenticated_email
    except AttributeError:
        raise BadRequest("Could not determine authenticated username")

    # verify permissions based on visibilities
    visibilities = set(f.visibility for f in body.files.itervalues())
    for v in visibilities:
        prm = p.get('tooltool.upload.{}'.format(v))
        if not prm or not prm.can():
            raise Forbidden("no permission to upload {} files".format(v))

    session = g.db.session('relengapi')
    batch = tables.Batch(uploaded=time.now(),
                         author=body.author,
                         message=body.message)

    s3 = current_app.aws.connect_to('s3', region)
    for filename, info in body.files.iteritems():
        log = logger.bind(tooltool_sha512=info.digest,
                          tooltool_operation='upload',
                          tooltool_batch_id=batch.id,
                          mozdef=True)
        if info.algorithm != 'sha512':
            raise BadRequest("'sha512' is the only allowed digest algorithm")
        if not is_valid_sha512(info.digest):
            raise BadRequest("Invalid sha512 digest")
        digest = info.digest
        file = tables.File.query.filter(tables.File.sha512 == digest).first()
        if file and file.visibility != info.visibility:
            raise BadRequest("Cannot change a file's visibility level")
        if file and file.instances != []:
            if file.size != info.size:
                raise BadRequest("Size mismatch for {}".format(filename))
        else:
            if not file:
                file = tables.File(sha512=digest,
                                   visibility=info.visibility,
                                   size=info.size)
                session.add(file)
            log.info(
                "generating signed S3 PUT URL to {} for {}; expiring in {}s".
                format(info.digest[:10], current_user, UPLOAD_EXPIRES_IN))
            info.put_url = s3.generate_url(
                method='PUT',
                expires_in=UPLOAD_EXPIRES_IN,
                bucket=bucket,
                key=util.keyname(info.digest),
                headers={'Content-Type': 'application/octet-stream'})
            # The PendingUpload row needs to reflect the updated expiration
            # time, even if there's an existing pending upload that expires
            # earlier.  The `merge` method does a SELECT and then either UPDATEs
            # or INSERTs the row.  However, merge needs the file_id, rather than
            # just a reference to the file object; and for that, we need to flush
            # the inserted file.
            session.flush()
            pu = tables.PendingUpload(
                file_id=file.id,
                region=region,
                expires=time.now() +
                datetime.timedelta(seconds=UPLOAD_EXPIRES_IN))
            session.merge(pu)
        session.add(tables.BatchFile(filename=filename, file=file,
                                     batch=batch))
    session.add(batch)
    session.commit()

    body.id = batch.id
    return body
Exemplo n.º 27
0
import os

from contextlib import contextmanager
from datetime import datetime
from datetime import timedelta
from flask import current_app
from nose.tools import eq_
from relengapi.blueprints.tooltool import grooming
from relengapi.blueprints.tooltool import tables
from relengapi.blueprints.tooltool import util
from relengapi.lib import time
from relengapi.lib.testing.context import TestContext

DATA = os.urandom(10240)
DATA_DIGEST = hashlib.sha512(DATA).hexdigest()
DATA_KEY = util.keyname(DATA_DIGEST)

NOW = 1425592922

cfg = {
    'AWS': {
        'access_key_id': 'aa',
        'secret_access_key': 'ss',
    },
    'TOOLTOOL_REGIONS': {
        'us-east-1': 'tt-use1',
        'us-west-2': 'tt-usw2',
    }
}
test_context = TestContext(config=cfg, databases=['relengapi'])