def add_file_row(size, sha512, instances=[]): session = current_app.db.session('relengapi') file_row = tables.File(size=size, visibility='public', sha512=sha512) session.add(file_row) for region in instances: session.add(tables.FileInstance(file=file_row, region=region)) session.commit() return file_row
def add_pending_upload_and_file_row(size, sha512, expires, region): session = current_app.db.session('tooltool') file_row = tables.File(size=size, visibility='public', sha512=sha512) pu_row = tables.PendingUpload( file=file_row, expires=expires, region=region) session.add(file_row) session.commit() return pu_row, file_row
def add_file_to_db(app, content, regions=['us-east-1'], pending_regions=[], visibility='public'): with app.app_context(): session = app.db.session('relengapi') file_row = tables.File(size=len(content), visibility=visibility, sha512=hashlib.sha512(content).hexdigest()) session.add(file_row) session.commit() for region in regions: session.add(tables.FileInstance( file_id=file_row.id, region=region)) for region in pending_regions: session.add(tables.PendingUpload( file=file_row, region=region, expires=relengapi_time.now() + datetime.timedelta(seconds=60))) session.commit() return file_row
def test_file_batches_relationship(app): with app.app_context(): session = app.db.session('tooltool') file = tables.File(size=100, sha512='abcd', visibility='internal') session.add(file) batch = tables.Batch( uploaded=time.now(), author="dustin", message="hi") session.add(batch) bf = tables.BatchFile(batch=batch, file=file, filename="foo.txt") session.add(bf) session.commit() with app.app_context(): file = tables.File.query.first() eq_(file.batches['foo.txt'].message, 'hi') with app.app_context(): batch = tables.Batch.query.first() eq_(batch.files['foo.txt'].sha512, 'abcd')
def upload_batch(region=None, body=None): """Create a new upload batch. The response object will contain a ``put_url`` for each file which needs to be uploaded -- which may not be all! The caller is then responsible for uploading to those URLs. The resulting signed URLs are valid for one hour, so uploads should begin within that timeframe. Consider using Amazon's MD5-verification capabilities to ensure that the uploaded files are transferred correctly, although the tooltool server will verify the integrity anyway. The upload must have the header ``Content-Type: application/octet-stream```. The query argument ``region=us-west-1`` indicates a preference for URLs in that region, although if the region is not available then URLs in other regions may be returned. The returned URLs are only valid for 60 seconds, so all upload requests must begin within that timeframe. Clients should therefore perform all uploads in parallel, rather than sequentially. This limitation is in place to prevent malicious modification of files after they have been verified.""" region, bucket = get_region_and_bucket(region) if not body.message: raise BadRequest("message must be non-empty") if not body.files: raise BadRequest("a batch must include at least one file") if body.author: raise BadRequest("Author must not be specified for upload") try: body.author = current_user.authenticated_email except AttributeError: raise BadRequest("Could not determine authenticated username") # verify permissions based on visibilities visibilities = set(f.visibility for f in body.files.itervalues()) for v in visibilities: prm = p.get('tooltool.upload.{}'.format(v)) if not prm or not prm.can(): raise Forbidden("no permission to upload {} files".format(v)) session = g.db.session('relengapi') batch = tables.Batch(uploaded=time.now(), author=body.author, message=body.message) s3 = current_app.aws.connect_to('s3', region) for filename, info in body.files.iteritems(): log = logger.bind(tooltool_sha512=info.digest, tooltool_operation='upload', tooltool_batch_id=batch.id, mozdef=True) if info.algorithm != 'sha512': raise BadRequest("'sha512' is the only allowed digest algorithm") if not is_valid_sha512(info.digest): raise BadRequest("Invalid sha512 digest") digest = info.digest file = tables.File.query.filter(tables.File.sha512 == digest).first() if file and file.visibility != info.visibility: raise BadRequest("Cannot change a file's visibility level") if file and file.instances != []: if file.size != info.size: raise BadRequest("Size mismatch for {}".format(filename)) else: if not file: file = tables.File(sha512=digest, visibility=info.visibility, size=info.size) session.add(file) log.info( "generating signed S3 PUT URL to {} for {}; expiring in {}s". format(info.digest[:10], current_user, UPLOAD_EXPIRES_IN)) info.put_url = s3.generate_url( method='PUT', expires_in=UPLOAD_EXPIRES_IN, bucket=bucket, key=util.keyname(info.digest), headers={'Content-Type': 'application/octet-stream'}) # The PendingUpload row needs to reflect the updated expiration # time, even if there's an existing pending upload that expires # earlier. The `merge` method does a SELECT and then either UPDATEs # or INSERTs the row. However, merge needs the file_id, rather than # just a reference to the file object; and for that, we need to flush # the inserted file. session.flush() pu = tables.PendingUpload( file_id=file.id, region=region, expires=time.now() + datetime.timedelta(seconds=UPLOAD_EXPIRES_IN)) session.merge(pu) session.add(tables.BatchFile(filename=filename, file=file, batch=batch)) session.add(batch) session.commit() body.id = batch.id return body