示例#1
0
 def multi_download(self):
     """
     This performs the multipart download.  It assigns ranges to get from
     s3 of particular object to a task.It creates a queue ``part_queue``
     which is directly responsible with controlling the progress of the
     multipart download.  It then creates ``DownloadPartTasks`` for
     threads to run via the ``executer``. This fucntion waits
     for all of the parts in the multipart download to finish, and then
     the last modification time is changed to the last modified time
     of the s3 object.  This method waits on its parts to finish.
     So, threads are required to process the parts for this function
     to complete.
     """
     part_queue = NoBlockQueue(self.interrupt)
     dest_queue = NoBlockQueue(self.interrupt)
     part_counter = MultiCounter()
     write_lock = threading.Lock()
     counter_lock = threading.Lock()
     d = os.path.dirname(self.dest)
     try:
         if not os.path.exists(d):
             os.makedirs(d)
     except Exception:
         pass
     size_uploads = self.chunksize
     num_uploads = int(self.size / size_uploads)
     with open(self.dest, 'wb') as f:
         for i in range(num_uploads):
             part = (self, i, size_uploads)
             part_queue.put(part)
             task = DownloadPartTask(session=self.session,
                                     executer=self.executer,
                                     part_queue=part_queue,
                                     dest_queue=dest_queue,
                                     f=f,
                                     region=self.region,
                                     printQueue=self.printQueue,
                                     write_lock=write_lock,
                                     part_counter=part_counter,
                                     counter_lock=counter_lock)
             self.executer.submit(task)
         part_queue.join()
         # The following ensures that if the multipart download is
         # in progress, all part uploads finish before releasing the
         # the file handle.  This really only applies when an interrupt
         # signal is sent because the ``part_queue.join()`` ensures this
         # if the process is not interrupted.
         while part_counter.count:
             time.sleep(0.1)
     part_list = []
     while not dest_queue.empty():
         part = dest_queue.get()
         part_list.append(part)
     if len(part_list) != num_uploads:
         raise Exception()
     last_update_tuple = self.last_update.timetuple()
     mod_timestamp = time.mktime(last_update_tuple)
     os.utime(self.dest, (int(mod_timestamp), int(mod_timestamp)))
示例#2
0
 def test_max_size(self):
     q = NoBlockQueue(maxsize=3)
     q.put(1)
     q.put(2)
     q.put(3)
     with self.assertRaises(queue.Full):
         q.put(4, block=False)
示例#3
0
 def __init__(self,
              session,
              params,
              multi_threshold=MULTI_THRESHOLD,
              chunksize=CHUNKSIZE):
     self.session = session
     self.done = threading.Event()
     self.interrupt = threading.Event()
     self.printQueue = NoBlockQueue()
     self.params = {
         'dryrun': False,
         'quiet': False,
         'acl': None,
         'guess_mime_type': True
     }
     self.params['region'] = params['region']
     for key in self.params.keys():
         if key in params:
             self.params[key] = params[key]
     self.multi_threshold = multi_threshold
     self.chunksize = chunksize
     self.executer = Executer(done=self.done,
                              num_threads=NUM_THREADS,
                              timeout=QUEUE_TIMEOUT_GET,
                              printQueue=self.printQueue,
                              quiet=self.params['quiet'],
                              interrupt=self.interrupt,
                              max_multi=NUM_MULTI_THREADS)
示例#4
0
 def __init__(self,
              session,
              params,
              multi_threshold=MULTI_THRESHOLD,
              chunksize=CHUNKSIZE):
     self.session = session
     self.done = threading.Event()
     self.interrupt = threading.Event()
     self.result_queue = NoBlockQueue()
     # The write_queue has potential for optimizations, so the constant
     # for maxsize is scoped to this class (as opposed to constants.py)
     # so we have the ability to change this value later.
     self.write_queue = NoBlockQueue(self.interrupt,
                                     maxsize=self.MAX_IO_QUEUE_SIZE)
     self.params = {
         'dryrun': False,
         'quiet': False,
         'acl': None,
         'guess_mime_type': True,
         'sse': False,
         'storage_class': None,
         'website_redirect': None,
         'content_type': None,
         'cache_control': None,
         'content_disposition': None,
         'content_encoding': None,
         'content_language': None,
         'expires': None,
         'grants': None
     }
     self.params['region'] = params['region']
     for key in self.params.keys():
         if key in params:
             self.params[key] = params[key]
     self.multi_threshold = multi_threshold
     self.chunksize = chunksize
     self.executor = Executor(done=self.done,
                              num_threads=NUM_THREADS,
                              result_queue=self.result_queue,
                              quiet=self.params['quiet'],
                              interrupt=self.interrupt,
                              max_queue_size=MAX_QUEUE_SIZE,
                              write_queue=self.write_queue)
     self._multipart_uploads = []
     self._multipart_downloads = []
示例#5
0
 def test_no_max_size(self):
     q = NoBlockQueue()
     q.put(1)
     q.put(2)
     q.put(3)
     q.put(4)
     self.assertEqual(q.get(), 1)
     self.assertEqual(q.get(), 2)
     self.assertEqual(q.get(), 3)
     self.assertEqual(q.get(), 4)
示例#6
0
 def start(self):
     self.print_thread = PrintThread(self.result_queue, self.done,
                                     self.quiet, self.interrupt)
     self.print_thread.daemon = True
     self.io_thread = IOWriterThread(self.write_queue, self.done)
     self.io_thread.start()
     self.threads_list.append(self.io_thread)
     self.queue = NoBlockQueue(self.interrupt, maxsize=self._max_queue_size)
     self.threads_list.append(self.print_thread)
     self.print_thread.start()
     for i in range(self.num_threads):
         worker = Worker(queue=self.queue, done=self.done)
         worker.setDaemon(True)
         self.threads_list.append(worker)
         worker.start()
示例#7
0
 def start(self):
     self.queue = NoBlockQueue(self.interrupt)
     self.multi_counter.count = 0
     self.print_thread = PrintThread(self.printQueue, self.done, self.quiet,
                                     self.interrupt, self.timeout)
     self.print_thread.setDaemon(True)
     self.threads_list.append(self.print_thread)
     self.print_thread.start()
     for i in range(self.num_threads):
         worker = Worker(queue=self.queue,
                         done=self.done,
                         timeout=self.timeout,
                         multi_lock=self.multi_lock,
                         multi_counter=self.multi_counter,
                         max_multi=self.max_multi)
         worker.setDaemon(True)
         self.threads_list.append(worker)
         worker.start()
示例#8
0
 def __init__(self,
              session,
              params,
              multi_threshold=MULTI_THRESHOLD,
              chunksize=CHUNKSIZE):
     self.session = session
     self.done = threading.Event()
     self.interrupt = threading.Event()
     self.result_queue = NoBlockQueue()
     self.params = {
         'dryrun': False,
         'quiet': False,
         'acl': None,
         'guess_mime_type': True,
         'sse': False,
         'storage_class': None,
         'website_redirect': None,
         'content_type': None,
         'cache_control': None,
         'content_disposition': None,
         'content_encoding': None,
         'content_language': None,
         'expires': None,
         'grants': None
     }
     self.params['region'] = params['region']
     for key in self.params.keys():
         if key in params:
             self.params[key] = params[key]
     self.multi_threshold = multi_threshold
     self.chunksize = chunksize
     self.executer = Executer(
         done=self.done,
         num_threads=NUM_THREADS,
         timeout=QUEUE_TIMEOUT_GET,
         result_queue=self.result_queue,
         quiet=self.params['quiet'],
         interrupt=self.interrupt,
         max_queue_size=MAX_QUEUE_SIZE,
     )
     self._multipart_uploads = []
     self._multipart_downloads = []
示例#9
0
 def multi_upload(self):
     """
     Performs multipart uploads.  It initiates the multipart upload.
     It creates a queue ``part_queue`` which is directly responsible
     with controlling the progress of the multipart upload.  It then
     creates ``UploadPartTasks`` for threads to run via the
     ``executer``.  This fucntion waits for all of the parts in the
     multipart upload to finish, and then it completes the multipart
     upload.  This method waits on its parts to finish.  So, threads
     are required to process the parts for this function to complete.
     """
     part_queue = NoBlockQueue(self.interrupt)
     complete_upload_queue = Queue.PriorityQueue()
     part_counter = MultiCounter()
     counter_lock = threading.Lock()
     bucket, key = find_bucket_key(self.dest)
     params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key}
     if self.parameters['acl']:
         params['acl'] = self.parameters['acl'][0]
     if self.parameters['guess_mime_type']:
         self._inject_content_type(params, self.src)
     response_data, http = operate(self.service, 'CreateMultipartUpload',
                                   params)
     upload_id = response_data['UploadId']
     size_uploads = self.chunksize
     num_uploads = int(math.ceil(self.size / float(size_uploads)))
     for i in range(1, (num_uploads + 1)):
         part_info = (self, upload_id, i, size_uploads)
         part_queue.put(part_info)
         task = UploadPartTask(session=self.session,
                               executer=self.executer,
                               part_queue=part_queue,
                               dest_queue=complete_upload_queue,
                               region=self.region,
                               printQueue=self.printQueue,
                               interrupt=self.interrupt,
                               part_counter=part_counter,
                               counter_lock=counter_lock)
         self.executer.submit(task)
     part_queue.join()
     # The following ensures that if the multipart upload is in progress,
     # all part uploads finish before aborting or completing.  This
     # really only applies when an interrupt signal is sent because the
     # ``part_queue.join()`` ensures this if the process is not
     # interrupted.
     while part_counter.count:
         time.sleep(0.1)
     parts_list = []
     while not complete_upload_queue.empty():
         part = complete_upload_queue.get()
         parts_list.append(part[1])
     if len(parts_list) == num_uploads:
         parts = {'Parts': parts_list}
         params = {
             'endpoint': self.endpoint,
             'bucket': bucket,
             'key': key,
             'upload_id': upload_id,
             'multipart_upload': parts
         }
         operate(self.service, 'CompleteMultipartUpload', params)
     else:
         abort_params = {
             'endpoint': self.endpoint,
             'bucket': bucket,
             'key': key,
             'upload_id': upload_id
         }
         operate(self.service, 'AbortMultipartUpload', abort_params)
         raise Exception()