def upload_hash_content_to_blobstore(generate_upload_url, data, hash_key, content): """Uploads the given hash contents directly to the blobsotre via a generated url. Arguments: generate_upload_url: The url to get the new upload url from. data: extra POST data. hash_key: sha1 of the uncompressed version of content. content: The contents to upload. Must fit in memory for now. """ logging.debug('Generating url to directly upload file to blobstore') assert isinstance(hash_key, str), hash_key assert isinstance(content, str), (hash_key, content) # TODO(maruel): Support large files. This would require streaming support. content_type, body = encode_multipart_formdata( data, [('content', hash_key, content)]) for _ in range(run_isolated.MAX_URL_OPEN_ATTEMPTS): # Retry HTTP 50x here. response = run_isolated.url_open(generate_upload_url, data=data) if not response: raise run_isolated.MappingError('Unable to connect to server %s' % generate_upload_url) upload_url = response.read() # Do not retry this request on HTTP 50x. Regenerate an upload url each time # since uploading "consumes" the upload url. result = run_isolated.url_open(upload_url, data=body, content_type=content_type, retry_50x=False) if result: return result.read() raise run_isolated.MappingError('Unable to connect to server %s' % generate_upload_url)
def update_files_to_upload(query_url, queries, upload): """Queries the server to see which files from this batch already exist there. Arguments: queries: The hash files to potential upload to the server. upload: Any new files that need to be upload are sent to this function. """ body = ''.join( (binascii.unhexlify(meta_data['h']) for (_, meta_data) in queries)) assert (len(body) % 20) == 0, repr(body) response = url_open(query_url, data=body, content_type='application/octet-stream').read() if len(queries) != len(response): raise run_isolated.MappingError( 'Got an incorrect number of responses from the server. Expected %d, ' 'but got %d' % (len(queries), len(response))) hit = 0 for i in range(len(response)): if response[i] == chr(0): upload(queries[i]) else: hit += 1 logging.info('Queried %d files, %d cache hit', len(queries), hit)
def url_open(url, **kwargs): result = run_isolated.url_open(url, **kwargs) if not result: # If we get no response from the server, assume it is down and raise an # exception. raise run_isolated.MappingError('Unable to connect to server %s' % url) return result
def check_files_exist_on_server(query_url, queries): """Queries the server to see which files from this batch already exist there. Arguments: queries: The hash files to potential upload to the server. Returns: missing_files: list of files that are missing on the server. """ logging.info('Checking existence of %d files...', len(queries)) body = ''.join( (binascii.unhexlify(meta_data['h']) for (_, meta_data) in queries)) assert (len(body) % 20) == 0, repr(body) response = url_open(query_url, data=body, content_type='application/octet-stream').read() if len(queries) != len(response): raise run_isolated.MappingError( 'Got an incorrect number of responses from the server. Expected %d, ' 'but got %d' % (len(queries), len(response))) missing_files = [ queries[i] for i, flag in enumerate(response) if flag == chr(0) ] logging.info('Queried %d files, %d cache hit', len(queries), len(queries) - len(missing_files)) return missing_files