def append_object(self, obj, source_file, upload_cb=None): """ :param obj: (str) remote object path :param source_file: open file descriptor :param upload_db: progress.bar for uploading """ self._assert_container() meta = self.get_container_info() blocksize = int(meta["x-container-block-size"]) filesize = fstat(source_file.fileno()).st_size nblocks = 1 + (filesize - 1) // blocksize offset = 0 headers = {} if upload_cb: self.progress_bar_gen = upload_cb(nblocks) self._cb_next() flying = {} self._init_thread_limit() try: for i in range(nblocks): block = source_file.read(min(blocksize, filesize - offset)) offset += len(block) self._watch_thread_limit(flying.values()) unfinished = {} flying[i] = SilentEvent( method=self.object_post, obj=obj, update=True, content_range="bytes */*", content_type="application/octet-stream", content_length=len(block), data=block, ) flying[i].start() for key, thread in flying.items(): if thread.isAlive(): if i < (nblocks - 1): unfinished[key] = thread continue thread.join() if thread.exception: raise thread.exception headers[key] = thread.value.headers self._cb_next() flying = unfinished except KeyboardInterrupt: sendlog.info("- - - wait for threads to finish") for thread in activethreads(): thread.join() finally: from time import sleep sleep(2 * len(activethreads())) self._cb_next() return headers.values()
def download_to_string( self, obj, download_cb=None, version=None, range_str=None, if_match=None, if_none_match=None, if_modified_since=None, if_unmodified_since=None): """Download an object to a string (multiple connections). This method uses threads for http requests, but stores all content in memory. :param obj: (str) remote object path :param download_cb: optional progress.bar object for downloading :param version: (str) file version :param range_str: (str) from, to are file positions (int) in bytes :param if_match: (str) :param if_none_match: (str) :param if_modified_since: (str) formated date :param if_unmodified_since: (str) formated date :returns: (str) the whole object contents """ restargs = dict( version=version, data_range=None if range_str is None else 'bytes=%s' % range_str, if_match=if_match, if_none_match=if_none_match, if_modified_since=if_modified_since, if_unmodified_since=if_unmodified_since) ( blocksize, blockhash, total_size, hash_list, remote_hashes) = self._get_remote_blocks_info(obj, **restargs) assert total_size >= 0 if download_cb: self.progress_bar_gen = download_cb(len(hash_list)) self._cb_next() num_of_blocks = len(remote_hashes) ret = [''] * num_of_blocks self._init_thread_limit() flying = dict() try: for blockid, blockhash in enumerate(remote_hashes): start = blocksize * blockid is_last = start + blocksize > total_size end = (total_size - 1) if is_last else (start + blocksize - 1) data_range_str = _range_up(start, end, end, range_str) if data_range_str: self._watch_thread_limit(flying.values()) restargs['data_range'] = 'bytes=%s' % data_range_str flying[blockid] = self._get_block_async(obj, **restargs) for runid, thread in flying.items(): if (blockid + 1) == num_of_blocks: thread.join() elif thread.isAlive(): continue if thread.exception: raise thread.exception ret[runid] = thread.value.content self._cb_next() flying.pop(runid) return ''.join(ret) except KeyboardInterrupt: sendlog.info('- - - wait for threads to finish') for thread in activethreads(): thread.join()
def upload_from_string( self, obj, input_str, hash_cb=None, upload_cb=None, etag=None, if_etag_match=None, if_not_exist=None, content_encoding=None, content_disposition=None, content_type=None, sharing=None, public=None, container_info_cache=None): """Upload an object using multiple connections (threads) :param obj: (str) remote object path :param input_str: (str) upload content :param hash_cb: optional progress.bar object for calculating hashes :param upload_cb: optional progress.bar object for uploading :param etag: (str) :param if_etag_match: (str) Push that value to if-match header at file creation :param if_not_exist: (bool) If true, the file will be uploaded ONLY if it does not exist remotely, otherwise the operation will fail. Involves the case of an object with the same path is created while the object is being uploaded. :param content_encoding: (str) :param content_disposition: (str) :param content_type: (str) :param sharing: {'read':[user and/or grp names], 'write':[usr and/or grp names]} :param public: (bool) :param container_info_cache: (dict) if given, avoid redundant calls to server for container info (block size and hash information) """ self._assert_container() blocksize, blockhash, size, nblocks = self._get_file_block_info( fileobj=None, size=len(input_str), cache=container_info_cache) (hashes, hmap, offset) = ([], {}, 0) if not content_type: content_type = 'application/octet-stream' hashes = [] hmap = {} for blockid in range(nblocks): start = blockid * blocksize block = input_str[start: (start + blocksize)] hashes.append(_pithos_hash(block, blockhash)) hmap[hashes[blockid]] = (start, block) hashmap = dict(bytes=size, hashes=hashes) missing, obj_headers = self._create_object_or_get_missing_hashes( obj, hashmap, content_type=content_type, size=size, if_etag_match=if_etag_match, if_etag_not_match='*' if if_not_exist else None, content_encoding=content_encoding, content_disposition=content_disposition, permissions=sharing, public=public) if missing is None: return obj_headers num_of_missing = len(missing) if upload_cb: self.progress_bar_gen = upload_cb(nblocks) for i in range(nblocks + 1 - num_of_missing): self._cb_next() tries = 7 old_failures = 0 try: while tries and missing: flying = [] failures = [] for hash in missing: offset, block = hmap[hash] bird = self._put_block_async(block, hash) flying.append(bird) unfinished = self._watch_thread_limit(flying) for thread in set(flying).difference(unfinished): if thread.exception: failures.append(thread.kwargs['hash']) if thread.isAlive(): flying.append(thread) else: self._cb_next() flying = unfinished for thread in flying: thread.join() if thread.exception: failures.append(thread.kwargs['hash']) self._cb_next() missing = failures if missing and len(missing) == old_failures: tries -= 1 old_failures = len(missing) if missing: raise ClientError('%s blocks failed to upload' % len(missing)) except KeyboardInterrupt: sendlog.info('- - - wait for threads to finish') for thread in activethreads(): thread.join() raise self._cb_next() r = self.object_put( obj, format='json', hashmap=True, content_type=content_type, content_encoding=content_encoding, if_etag_match=if_etag_match, if_etag_not_match='*' if if_not_exist else None, etag=etag, json=hashmap, permissions=sharing, public=public, success=201) return r.headers
def upload_object( self, obj, f, size=None, hash_cb=None, upload_cb=None, etag=None, if_etag_match=None, if_not_exist=None, content_encoding=None, content_disposition=None, content_type=None, sharing=None, public=None, container_info_cache=None, ): """Upload an object using multiple connections (threads) :param obj: (str) remote object path :param f: open file descriptor (rb) :param hash_cb: optional progress.bar object for calculating hashes :param upload_cb: optional progress.bar object for uploading :param etag: (str) :param if_etag_match: (str) Push that value to if-match header at file creation :param if_not_exist: (bool) If true, the file will be uploaded ONLY if it does not exist remotely, otherwise the operation will fail. Involves the case of an object with the same path is created while the object is being uploaded. :param content_encoding: (str) :param content_disposition: (str) :param content_type: (str) :param sharing: {'read':[user and/or grp names], 'write':[usr and/or grp names]} :param public: (bool) :param container_info_cache: (dict) if given, avoid redundant calls to server for container info (block size and hash information) """ self._assert_container() block_info = (blocksize, blockhash, size, nblocks) = self._get_file_block_info(f, size, container_info_cache) (hashes, hmap, offset) = ([], {}, 0) if not content_type: content_type = "application/octet-stream" self._calculate_blocks_for_upload(*block_info, hashes=hashes, hmap=hmap, fileobj=f, hash_cb=hash_cb) hashmap = dict(bytes=size, hashes=hashes) missing, obj_headers = self._create_object_or_get_missing_hashes( obj, hashmap, content_type=content_type, size=size, if_etag_match=if_etag_match, if_etag_not_match="*" if if_not_exist else None, content_encoding=content_encoding, content_disposition=content_disposition, permissions=sharing, public=public, ) if missing is None: return obj_headers if upload_cb: upload_gen = upload_cb(len(missing)) for i in range(len(missing), len(hashmap["hashes"]) + 1): try: upload_gen.next() except: upload_gen = None else: upload_gen = None retries = 7 try: while retries: sendlog.info("%s blocks missing" % len(missing)) num_of_blocks = len(missing) missing = self._upload_missing_blocks(missing, hmap, f, upload_gen) if missing: if num_of_blocks == len(missing): retries -= 1 else: num_of_blocks = len(missing) else: break if missing: try: details = ["%s" % thread.exception for thread in missing] except Exception: details = ["Also, failed to read thread exceptions"] raise ClientError("%s blocks failed to upload" % len(missing), details=details) except KeyboardInterrupt: sendlog.info("- - - wait for threads to finish") for thread in activethreads(): thread.join() raise r = self.object_put( obj, format="json", hashmap=True, content_type=content_type, content_encoding=content_encoding, if_etag_match=if_etag_match, if_etag_not_match="*" if if_not_exist else None, etag=etag, json=hashmap, permissions=sharing, public=public, success=201, ) return r.headers