def multi_download(self): """ This performs the multipart download. It assigns ranges to get from s3 of particular object to a task.It creates a queue ``part_queue`` which is directly responsible with controlling the progress of the multipart download. It then creates ``DownloadPartTasks`` for threads to run via the ``executer``. This fucntion waits for all of the parts in the multipart download to finish, and then the last modification time is changed to the last modified time of the s3 object. This method waits on its parts to finish. So, threads are required to process the parts for this function to complete. """ part_queue = NoBlockQueue(self.interrupt) dest_queue = NoBlockQueue(self.interrupt) part_counter = MultiCounter() write_lock = threading.Lock() counter_lock = threading.Lock() d = os.path.dirname(self.dest) try: if not os.path.exists(d): os.makedirs(d) except Exception: pass size_uploads = self.chunksize num_uploads = int(self.size / size_uploads) with open(self.dest, 'wb') as f: for i in range(num_uploads): part = (self, i, size_uploads) part_queue.put(part) task = DownloadPartTask(session=self.session, executer=self.executer, part_queue=part_queue, dest_queue=dest_queue, f=f, region=self.region, printQueue=self.printQueue, write_lock=write_lock, part_counter=part_counter, counter_lock=counter_lock) self.executer.submit(task) part_queue.join() # The following ensures that if the multipart download is # in progress, all part uploads finish before releasing the # the file handle. This really only applies when an interrupt # signal is sent because the ``part_queue.join()`` ensures this # if the process is not interrupted. while part_counter.count: time.sleep(0.1) part_list = [] while not dest_queue.empty(): part = dest_queue.get() part_list.append(part) if len(part_list) != num_uploads: raise Exception() last_update_tuple = self.last_update.timetuple() mod_timestamp = time.mktime(last_update_tuple) os.utime(self.dest, (int(mod_timestamp), int(mod_timestamp)))
def test_max_size(self): q = NoBlockQueue(maxsize=3) q.put(1) q.put(2) q.put(3) with self.assertRaises(queue.Full): q.put(4, block=False)
def __init__(self, session, params, multi_threshold=MULTI_THRESHOLD, chunksize=CHUNKSIZE): self.session = session self.done = threading.Event() self.interrupt = threading.Event() self.printQueue = NoBlockQueue() self.params = { 'dryrun': False, 'quiet': False, 'acl': None, 'guess_mime_type': True } self.params['region'] = params['region'] for key in self.params.keys(): if key in params: self.params[key] = params[key] self.multi_threshold = multi_threshold self.chunksize = chunksize self.executer = Executer(done=self.done, num_threads=NUM_THREADS, timeout=QUEUE_TIMEOUT_GET, printQueue=self.printQueue, quiet=self.params['quiet'], interrupt=self.interrupt, max_multi=NUM_MULTI_THREADS)
def __init__(self, session, params, multi_threshold=MULTI_THRESHOLD, chunksize=CHUNKSIZE): self.session = session self.done = threading.Event() self.interrupt = threading.Event() self.result_queue = NoBlockQueue() # The write_queue has potential for optimizations, so the constant # for maxsize is scoped to this class (as opposed to constants.py) # so we have the ability to change this value later. self.write_queue = NoBlockQueue(self.interrupt, maxsize=self.MAX_IO_QUEUE_SIZE) self.params = { 'dryrun': False, 'quiet': False, 'acl': None, 'guess_mime_type': True, 'sse': False, 'storage_class': None, 'website_redirect': None, 'content_type': None, 'cache_control': None, 'content_disposition': None, 'content_encoding': None, 'content_language': None, 'expires': None, 'grants': None } self.params['region'] = params['region'] for key in self.params.keys(): if key in params: self.params[key] = params[key] self.multi_threshold = multi_threshold self.chunksize = chunksize self.executor = Executor(done=self.done, num_threads=NUM_THREADS, result_queue=self.result_queue, quiet=self.params['quiet'], interrupt=self.interrupt, max_queue_size=MAX_QUEUE_SIZE, write_queue=self.write_queue) self._multipart_uploads = [] self._multipart_downloads = []
def test_no_max_size(self): q = NoBlockQueue() q.put(1) q.put(2) q.put(3) q.put(4) self.assertEqual(q.get(), 1) self.assertEqual(q.get(), 2) self.assertEqual(q.get(), 3) self.assertEqual(q.get(), 4)
def start(self): self.print_thread = PrintThread(self.result_queue, self.done, self.quiet, self.interrupt) self.print_thread.daemon = True self.io_thread = IOWriterThread(self.write_queue, self.done) self.io_thread.start() self.threads_list.append(self.io_thread) self.queue = NoBlockQueue(self.interrupt, maxsize=self._max_queue_size) self.threads_list.append(self.print_thread) self.print_thread.start() for i in range(self.num_threads): worker = Worker(queue=self.queue, done=self.done) worker.setDaemon(True) self.threads_list.append(worker) worker.start()
def start(self): self.queue = NoBlockQueue(self.interrupt) self.multi_counter.count = 0 self.print_thread = PrintThread(self.printQueue, self.done, self.quiet, self.interrupt, self.timeout) self.print_thread.setDaemon(True) self.threads_list.append(self.print_thread) self.print_thread.start() for i in range(self.num_threads): worker = Worker(queue=self.queue, done=self.done, timeout=self.timeout, multi_lock=self.multi_lock, multi_counter=self.multi_counter, max_multi=self.max_multi) worker.setDaemon(True) self.threads_list.append(worker) worker.start()
def __init__(self, session, params, multi_threshold=MULTI_THRESHOLD, chunksize=CHUNKSIZE): self.session = session self.done = threading.Event() self.interrupt = threading.Event() self.result_queue = NoBlockQueue() self.params = { 'dryrun': False, 'quiet': False, 'acl': None, 'guess_mime_type': True, 'sse': False, 'storage_class': None, 'website_redirect': None, 'content_type': None, 'cache_control': None, 'content_disposition': None, 'content_encoding': None, 'content_language': None, 'expires': None, 'grants': None } self.params['region'] = params['region'] for key in self.params.keys(): if key in params: self.params[key] = params[key] self.multi_threshold = multi_threshold self.chunksize = chunksize self.executer = Executer( done=self.done, num_threads=NUM_THREADS, timeout=QUEUE_TIMEOUT_GET, result_queue=self.result_queue, quiet=self.params['quiet'], interrupt=self.interrupt, max_queue_size=MAX_QUEUE_SIZE, ) self._multipart_uploads = [] self._multipart_downloads = []
def multi_upload(self): """ Performs multipart uploads. It initiates the multipart upload. It creates a queue ``part_queue`` which is directly responsible with controlling the progress of the multipart upload. It then creates ``UploadPartTasks`` for threads to run via the ``executer``. This fucntion waits for all of the parts in the multipart upload to finish, and then it completes the multipart upload. This method waits on its parts to finish. So, threads are required to process the parts for this function to complete. """ part_queue = NoBlockQueue(self.interrupt) complete_upload_queue = Queue.PriorityQueue() part_counter = MultiCounter() counter_lock = threading.Lock() bucket, key = find_bucket_key(self.dest) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} if self.parameters['acl']: params['acl'] = self.parameters['acl'][0] if self.parameters['guess_mime_type']: self._inject_content_type(params, self.src) response_data, http = operate(self.service, 'CreateMultipartUpload', params) upload_id = response_data['UploadId'] size_uploads = self.chunksize num_uploads = int(math.ceil(self.size / float(size_uploads))) for i in range(1, (num_uploads + 1)): part_info = (self, upload_id, i, size_uploads) part_queue.put(part_info) task = UploadPartTask(session=self.session, executer=self.executer, part_queue=part_queue, dest_queue=complete_upload_queue, region=self.region, printQueue=self.printQueue, interrupt=self.interrupt, part_counter=part_counter, counter_lock=counter_lock) self.executer.submit(task) part_queue.join() # The following ensures that if the multipart upload is in progress, # all part uploads finish before aborting or completing. This # really only applies when an interrupt signal is sent because the # ``part_queue.join()`` ensures this if the process is not # interrupted. while part_counter.count: time.sleep(0.1) parts_list = [] while not complete_upload_queue.empty(): part = complete_upload_queue.get() parts_list.append(part[1]) if len(parts_list) == num_uploads: parts = {'Parts': parts_list} params = { 'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'upload_id': upload_id, 'multipart_upload': parts } operate(self.service, 'CompleteMultipartUpload', params) else: abort_params = { 'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'upload_id': upload_id } operate(self.service, 'AbortMultipartUpload', abort_params) raise Exception()