def _get_hashes(self, policy, path, recalculate=None, do_listdir=False): df_mgr = self._df_router[policy] hashed, suffix_hashes = tpool_reraise( df_mgr._get_hashes, path, recalculate=recalculate, do_listdir=do_listdir, reclaim_age=self.reclaim_age) self.logger.update_stats('suffix.hashes', hashed) return suffix_hashes
def _get_hashes(self, device, partition, policy, recalculate=None, do_listdir=False): df_mgr = self._df_router[policy] hashed, suffix_hashes = tpool_reraise( df_mgr._get_hashes, device, partition, policy, recalculate=recalculate, do_listdir=do_listdir) self.logger.update_stats('suffix.hashes', hashed) return suffix_hashes
def update(self, job): """ High-level method that replicates a single partition. :param job: a dict containing info about the partition to be replicated """ self.replication_count += 1 self.logger.increment('partition.update.count.%s' % (job['device'], )) headers = dict(self.default_headers) headers['X-Backend-Storage-Policy-Index'] = int(job['policy']) target_devs_info = set() failure_devs_info = set() begin = time.time() try: hashed, local_hash = tpool_reraise(self._diskfile_mgr._get_hashes, job['path'], do_listdir=_do_listdir( int(job['partition']), self.replication_cycle), reclaim_age=self.reclaim_age) self.suffix_hash += hashed self.logger.update_stats('suffix.hashes', hashed) attempts_left = len(job['nodes']) synced_remote_regions = set() random.shuffle(job['nodes']) nodes = itertools.chain( job['nodes'], job['policy'].object_ring.get_more_nodes( int(job['partition']))) while attempts_left > 0: # If this throws StopIteration it will be caught way below node = next(nodes) target_devs_info.add((node['replication_ip'], node['device'])) attempts_left -= 1 # if we have already synced to this remote region, # don't sync again on this replication pass if node['region'] in synced_remote_regions: continue try: with Timeout(self.http_timeout): resp = http_connect(node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '', headers=headers).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error( _('%(replication_ip)s/%(device)s ' 'responded as unmounted'), node) attempts_left += 1 failure_devs_info.add( (node['replication_ip'], node['device'])) continue if resp.status != HTTP_OK: self.logger.error( _("Invalid response %(resp)s " "from %(ip)s"), { 'resp': resp.status, 'ip': node['replication_ip'] }) failure_devs_info.add( (node['replication_ip'], node['device'])) continue remote_hash = pickle.loads(resp.read()) del resp suffixes = [ suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1) ] if not suffixes: self.stats['hashmatch'] += 1 continue hashed, recalc_hash = tpool_reraise( self._diskfile_mgr._get_hashes, job['path'], recalculate=suffixes, reclaim_age=self.reclaim_age) self.logger.update_stats('suffix.hashes', hashed) local_hash = recalc_hash suffixes = [ suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1) ] self.stats['rsync'] += 1 success, _junk = self.sync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect(node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '/' + '-'.join(suffixes), headers=headers) conn.getresponse().read() if not success: failure_devs_info.add( (node['replication_ip'], node['device'])) # add only remote region when replicate succeeded if success and node['region'] != job['region']: synced_remote_regions.add(node['region']) self.suffix_sync += len(suffixes) self.logger.update_stats('suffix.syncs', len(suffixes)) except (Exception, Timeout): failure_devs_info.add( (node['replication_ip'], node['device'])) self.logger.exception( _("Error syncing with node: %s") % node) self.suffix_count += len(local_hash) except (Exception, Timeout): failure_devs_info.update(target_devs_info) self._add_failure_stats(failure_devs_info) self.logger.exception(_("Error syncing partition")) finally: self.stats['success'] += len(target_devs_info - failure_devs_info) self.partition_times.append(time.time() - begin) self.logger.timing_since('partition.update.timing', begin)
def update(self, job): """ High-level method that replicates a single partition. :param job: a dict containing info about the partition to be replicated """ self.replication_count += 1 self.logger.increment('partition.update.count.%s' % (job['device'],)) headers = dict(self.default_headers) headers['X-Backend-Storage-Policy-Index'] = int(job['policy']) target_devs_info = set() failure_devs_info = set() begin = time.time() df_mgr = self._df_router[job['policy']] try: hashed, local_hash = tpool_reraise( df_mgr._get_hashes, job['device'], job['partition'], job['policy'], do_listdir=_do_listdir( int(job['partition']), self.replication_cycle)) self.suffix_hash += hashed self.logger.update_stats('suffix.hashes', hashed) attempts_left = len(job['nodes']) synced_remote_regions = set() random.shuffle(job['nodes']) nodes = itertools.chain( job['nodes'], job['policy'].object_ring.get_more_nodes( int(job['partition']))) while attempts_left > 0: # If this throws StopIteration it will be caught way below node = next(nodes) target_devs_info.add((node['replication_ip'], node['device'])) attempts_left -= 1 # if we have already synced to this remote region, # don't sync again on this replication pass if node['region'] in synced_remote_regions: continue try: with Timeout(self.http_timeout): resp = http_connect( node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '', headers=headers).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error( _('%(replication_ip)s/%(device)s ' 'responded as unmounted'), node) attempts_left += 1 failure_devs_info.add((node['replication_ip'], node['device'])) continue if resp.status != HTTP_OK: self.logger.error(_("Invalid response %(resp)s " "from %(ip)s"), {'resp': resp.status, 'ip': node['replication_ip']}) failure_devs_info.add((node['replication_ip'], node['device'])) continue remote_hash = pickle.loads(resp.read()) del resp suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] if not suffixes: self.stats['hashmatch'] += 1 continue hashed, recalc_hash = tpool_reraise( df_mgr._get_hashes, job['device'], job['partition'], job['policy'], recalculate=suffixes) self.logger.update_stats('suffix.hashes', hashed) local_hash = recalc_hash suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] self.stats['rsync'] += 1 success, _junk = self.sync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect( node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '/' + '-'.join(suffixes), headers=headers) conn.getresponse().read() if not success: failure_devs_info.add((node['replication_ip'], node['device'])) # add only remote region when replicate succeeded if success and node['region'] != job['region']: synced_remote_regions.add(node['region']) self.suffix_sync += len(suffixes) self.logger.update_stats('suffix.syncs', len(suffixes)) except (Exception, Timeout): failure_devs_info.add((node['replication_ip'], node['device'])) self.logger.exception(_("Error syncing with node: %s") % node) self.suffix_count += len(local_hash) except StopIteration: self.logger.error('Ran out of handoffs while replicating ' 'partition %s of policy %d', job['partition'], int(job['policy'])) except (Exception, Timeout): failure_devs_info.update(target_devs_info) self.logger.exception(_("Error syncing partition")) finally: self._add_failure_stats(failure_devs_info) self.stats['success'] += len(target_devs_info - failure_devs_info) self.partition_times.append(time.time() - begin) self.logger.timing_since('partition.update.timing', begin)
def update(self, job): """ High-level method that replicates a single partition. :param job: a dict containing info about the partition to be replicated """ self.replication_count += 1 self.logger.increment('partition.update.count.%s' % (job['device'], )) self.headers[POLICY_INDEX] = job['policy_idx'] begin = time.time() try: hashed, local_hash = tpool_reraise( get_hashes, job['path'], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age) self.suffix_hash += hashed self.logger.update_stats('suffix.hashes', hashed) attempts_left = len(job['nodes']) nodes = itertools.chain( job['nodes'], job['object_ring'].get_more_nodes(int(job['partition']))) while attempts_left > 0: # If this throws StopIterator it will be caught way below node = next(nodes) attempts_left -= 1 try: with Timeout(self.http_timeout): resp = http_connect( node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '', headers=self.headers).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error( _('%(ip)s/%(device)s responded' ' as unmounted'), node) attempts_left += 1 continue if resp.status != HTTP_OK: self.logger.error( _("Invalid response %(resp)s " "from %(ip)s"), { 'resp': resp.status, 'ip': node['replication_ip'] }) continue remote_hash = pickle.loads(resp.read()) del resp suffixes = [ suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1) ] if not suffixes: continue hashed, recalc_hash = tpool_reraise( get_hashes, job['path'], recalculate=suffixes, reclaim_age=self.reclaim_age) self.logger.update_stats('suffix.hashes', hashed) local_hash = recalc_hash suffixes = [ suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1) ] self.sync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect(node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '/' + '-'.join(suffixes), headers=self.headers) conn.getresponse().read() self.suffix_sync += len(suffixes) self.logger.update_stats('suffix.syncs', len(suffixes)) except (Exception, Timeout): self.logger.exception( _("Error syncing with node: %s") % node) self.suffix_count += len(local_hash) except (Exception, Timeout): self.logger.exception(_("Error syncing partition")) finally: self.partition_times.append(time.time() - begin) self.logger.timing_since('partition.update.timing', begin)
by the object replicator to get hashes for directories. """ try: device, partition, suffix = split_path( unquote(request.path), 2, 3, True) validate_device_partition(device, partition) except ValueError, e: return HTTPBadRequest(body=str(e), request=request, content_type='text/plain') if self.mount_check and not check_mount(self.devices, device): return HTTPInsufficientStorage(drive=device, request=request) path = os.path.join(self.devices, device, DATADIR, partition) if not os.path.exists(path): mkdirs(path) suffixes = suffix.split('-') if suffix else [] _junk, hashes = tpool_reraise(get_hashes, path, recalculate=suffixes) return Response(body=pickle.dumps(hashes)) def __call__(self, env, start_response): """WSGI Application entry point for the Swift Object Server.""" start_time = time.time() req = Request(env) self.logger.txn_id = req.headers.get('x-trans-id', None) if not check_utf8(req.path_info): res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL') else: try: # disallow methods which have not been marked 'public' try: method = getattr(self, req.method)
def update(self, job): """ High-level method that replicates a single partition. :param job: a dict containing info about the partition to be replicated """ self.replication_count += 1 self.logger.increment('partition.update.count.%s' % (job['device'],)) begin = time.time() try: #MODIFIED LightSync local_hash = None part_hash_local = tpool_reraise(get_part_hash, job['path']) #Partition has been modified if part_hash_local is None: hashed, local_hash = tpool_reraise( get_hashes, job['path'], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age) self.suffix_hash += hashed self.logger.update_stats('suffix.hashes', hashed) part_hash_local = tpool_reraise(get_part_hash, job['path']) """hashed, local_hash = tpool_reraise( get_hashes, job['path'], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age) self.suffix_hash += hashed self.logger.update_stats('suffix.hashes', hashed)""" attempts_left = True nodes = itertools.chain(job['nodes']) while (True): ## # If this throws StopIterator it will be caught way below node = next(nodes) try: #MODIFIED LightSync req_suff = '' if part_hash_local is None else '/_SHORTREP_-'\ +part_hash_local with Timeout(self.http_timeout): resp = http_connect( node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', req_suff, headers=self.headers).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error(_('%(ip)s/%(device)s responded' ' as unmounted'), node) if(attempts_left): attempts_left = False ########To modify to start from current node's hand-off: Hash node info to get hand-off position nodes = itertools.chain( self.object_ring.get_more_nodes(int(job['partition'])), nodes) continue if resp.status != HTTP_OK: self.logger.error(_("Invalid response %(resp)s " "from %(ip)s"), {'resp': resp.status, 'ip': node['replication_ip']}) continue part_hash_remote = pickle.loads(resp.read()) del resp if part_hash_remote == "OK": break remote_hash = part_hash_remote if local_hash is None: hashed, local_hash = tpool_reraise( get_hashes, job['path'], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age) self.suffix_hash += hashed self.logger.update_stats('suffix.hashes', hashed) ''' with Timeout(self.http_timeout): resp = http_connect( node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '', headers=self.headers).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error(_('%(ip)s/%(device)s responded' ' as unmounted'), node) attempts_left += 1 continue if resp.status != HTTP_OK: self.logger.error(_("Invalid response %(resp)s " "from %(ip)s"), {'resp': resp.status, 'ip': node['replication_ip']}) continue remote_hash = pickle.loads(resp.read()) del resp ''' ## suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] if not suffixes: #MODIFIED LightSync break ## hashed, recalc_hash = tpool_reraise( get_hashes, job['path'], recalculate=suffixes, reclaim_age=self.reclaim_age) self.logger.update_stats('suffix.hashes', hashed) local_hash = recalc_hash suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] self.sync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect( node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '/' + '-'.join(suffixes), headers=self.headers) conn.getresponse().read() self.suffix_sync += len(suffixes) self.logger.update_stats('suffix.syncs', len(suffixes)) #MODIFIED LightSync break ## except (Exception, Timeout): self.logger.exception(_("Error syncing with node: %s") % node) #MODIFIED LightSync (after if) self.suffix_count += len(local_hash) if local_hash is not None else 0 ## except (Exception, Timeout): self.logger.exception(_("Error syncing partition")) finally: self.partition_times.append(time.time() - begin) self.logger.timing_since('partition.update.timing', begin)
def update(self, job): """ High-level method that replicates a single partition. :param job: a dict containing info about the partition to be replicated """ self.replication_count += 1 self.logger.increment('partition.update.count.%s' % (job['device'],)) self.headers['X-Backend-Storage-Policy-Index'] = job['policy_idx'] begin = time.time() try: hashed, local_hash = tpool_reraise( get_hashes, job['path'], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age) self.suffix_hash += hashed self.logger.update_stats('suffix.hashes', hashed) attempts_left = len(job['nodes']) nodes = itertools.chain( job['nodes'], job['object_ring'].get_more_nodes(int(job['partition']))) while attempts_left > 0: # If this throws StopIterator it will be caught way below node = next(nodes) attempts_left -= 1 try: with Timeout(self.http_timeout): resp = http_connect( node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '', headers=self.headers).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error(_('%(ip)s/%(device)s responded' ' as unmounted'), node) attempts_left += 1 continue if resp.status != HTTP_OK: self.logger.error(_("Invalid response %(resp)s " "from %(ip)s"), {'resp': resp.status, 'ip': node['replication_ip']}) continue remote_hash = pickle.loads(resp.read()) del resp suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] if not suffixes: continue hashed, recalc_hash = tpool_reraise( get_hashes, job['path'], recalculate=suffixes, reclaim_age=self.reclaim_age) self.logger.update_stats('suffix.hashes', hashed) local_hash = recalc_hash suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] self.sync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect( node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '/' + '-'.join(suffixes), headers=self.headers) conn.getresponse().read() self.suffix_sync += len(suffixes) self.logger.update_stats('suffix.syncs', len(suffixes)) except (Exception, Timeout): self.logger.exception(_("Error syncing with node: %s") % node) self.suffix_count += len(local_hash) except (Exception, Timeout): self.logger.exception(_("Error syncing partition")) finally: self.partition_times.append(time.time() - begin) self.logger.timing_since('partition.update.timing', begin)
def update(self, job): """ High-level method that replicates a single partition. :param job: a dict containing info about the partition to be replicated """ self.replication_count += 1 self.logger.increment("partition.update.count.%s" % (job["device"],)) headers = dict(self.default_headers) headers["X-Backend-Storage-Policy-Index"] = int(job["policy"]) target_devs_info = set() failure_devs_info = set() begin = time.time() try: hashed, local_hash = tpool_reraise( self._diskfile_mgr._get_hashes, job["path"], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age, ) self.suffix_hash += hashed self.logger.update_stats("suffix.hashes", hashed) attempts_left = len(job["nodes"]) synced_remote_regions = set() random.shuffle(job["nodes"]) nodes = itertools.chain(job["nodes"], job["policy"].object_ring.get_more_nodes(int(job["partition"]))) while attempts_left > 0: # If this throws StopIteration it will be caught way below node = next(nodes) target_devs_info.add((node["replication_ip"], node["device"])) attempts_left -= 1 # if we have already synced to this remote region, # don't sync again on this replication pass if node["region"] in synced_remote_regions: continue try: with Timeout(self.http_timeout): resp = http_connect( node["replication_ip"], node["replication_port"], node["device"], job["partition"], "REPLICATE", "", headers=headers, ).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error(_("%(ip)s/%(device)s responded" " as unmounted"), node) attempts_left += 1 failure_devs_info.add((node["replication_ip"], node["device"])) continue if resp.status != HTTP_OK: self.logger.error( _("Invalid response %(resp)s " "from %(ip)s"), {"resp": resp.status, "ip": node["replication_ip"]}, ) failure_devs_info.add((node["replication_ip"], node["device"])) continue remote_hash = pickle.loads(resp.read()) del resp suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] if not suffixes: self.stats["hashmatch"] += 1 continue hashed, recalc_hash = tpool_reraise( self._diskfile_mgr._get_hashes, job["path"], recalculate=suffixes, reclaim_age=self.reclaim_age ) self.logger.update_stats("suffix.hashes", hashed) local_hash = recalc_hash suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] self.stats["rsync"] += 1 success, _junk = self.sync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect( node["replication_ip"], node["replication_port"], node["device"], job["partition"], "REPLICATE", "/" + "-".join(suffixes), headers=headers, ) conn.getresponse().read() if not success: failure_devs_info.add((node["replication_ip"], node["device"])) # add only remote region when replicate succeeded if success and node["region"] != job["region"]: synced_remote_regions.add(node["region"]) self.suffix_sync += len(suffixes) self.logger.update_stats("suffix.syncs", len(suffixes)) except (Exception, Timeout): failure_devs_info.add((node["replication_ip"], node["device"])) self.logger.exception(_("Error syncing with node: %s") % node) self.suffix_count += len(local_hash) except (Exception, Timeout): failure_devs_info.update(target_devs_info) self.logger.exception(_("Error syncing partition")) finally: self.stats["success"] += len(target_devs_info - failure_devs_info) self._add_failure_stats(failure_devs_info) self.partition_times.append(time.time() - begin) self.logger.timing_since("partition.update.timing", begin)
""" try: device, partition, suffix = split_path(unquote(request.path), 2, 3, True) validate_device_partition(device, partition) except ValueError, e: return HTTPBadRequest(body=str(e), request=request, content_type='text/plain') if self.mount_check and not check_mount(self.devices, device): return HTTPInsufficientStorage(drive=device, request=request) path = os.path.join(self.devices, device, DATADIR, partition) if not os.path.exists(path): mkdirs(path) suffixes = suffix.split('-') if suffix else [] _junk, hashes = tpool_reraise(get_hashes, path, recalculate=suffixes) return Response(body=pickle.dumps(hashes)) def __call__(self, env, start_response): """WSGI Application entry point for the Swift Object Server.""" start_time = time.time() req = Request(env) self.logger.txn_id = req.headers.get('x-trans-id', None) if not check_utf8(req.path_info): res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL') else: try: # disallow methods which have not been marked 'public' try: method = getattr(self, req.method)
def update(self, job): """ High-level method that replicates a single partition. :param job: a dict containing info about the partition to be replicated """ self.replication_count += 1 self.logger.increment("partition.update.count.%s" % (job["device"],)) begin = time.time() try: hashed, local_hash = tpool_reraise( get_hashes, job["path"], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age ) self.suffix_hash += hashed self.logger.update_stats("suffix.hashes", hashed) attempts_left = len(job["nodes"]) nodes = itertools.chain(job["nodes"], self.object_ring.get_more_nodes(int(job["partition"]))) while attempts_left > 0: # If this throws StopIterator it will be caught way below node = next(nodes) attempts_left -= 1 try: with Timeout(self.http_timeout): resp = http_connect( node["replication_ip"], node["replication_port"], node["device"], job["partition"], "REPLICATE", "", headers=self.headers, ).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error(_("%(ip)s/%(device)s responded" " as unmounted"), node) attempts_left += 1 continue if resp.status != HTTP_OK: self.logger.error( _("Invalid response %(resp)s " "from %(ip)s"), {"resp": resp.status, "ip": node["replication_ip"]}, ) continue remote_hash = pickle.loads(resp.read()) del resp suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] if not suffixes: continue hashed, recalc_hash = tpool_reraise( get_hashes, job["path"], recalculate=suffixes, reclaim_age=self.reclaim_age ) self.logger.update_stats("suffix.hashes", hashed) local_hash = recalc_hash suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] self.sync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect( node["replication_ip"], node["replication_port"], node["device"], job["partition"], "REPLICATE", "/" + "-".join(suffixes), headers=self.headers, ) conn.getresponse().read() self.suffix_sync += len(suffixes) self.logger.update_stats("suffix.syncs", len(suffixes)) except (Exception, Timeout): self.logger.exception(_("Error syncing with node: %s") % node) self.suffix_count += len(local_hash) except (Exception, Timeout): self.logger.exception(_("Error syncing partition")) finally: self.partition_times.append(time.time() - begin) self.logger.timing_since("partition.update.timing", begin)