def remote_advise_corrupt_share(self, share_type, storage_index, shnum, reason): fileutil.make_dirs(self.corruption_advisory_dir) now = time_format.iso_utc(sep="T") si_s = si_b2a(storage_index) # windows can't handle colons in the filename fn = os.path.join(self.corruption_advisory_dir, "%s--%s-%d" % (now, si_s, shnum)).replace(":", "") f = open(fn, "w") f.write("report: Share Corruption\n") f.write("type: %s\n" % share_type) f.write("storage_index: %s\n" % si_s) f.write("share_number: %d\n" % shnum) f.write("\n") f.write(reason) f.write("\n") f.close() log.msg(format=("client claims corruption in (%(share_type)s) " + "%(si)s-%(shnum)d: %(reason)s"), share_type=share_type, si=si_s, shnum=shnum, reason=reason, level=log.SCARY, umid="SGx2fA") return None
def remote_slot_readv(self, storage_index, shares, readv): start = time.time() self.count("readv") si_s = si_b2a(storage_index) lp = log.msg("storage: slot_readv %s %s" % (si_s, shares), facility="tahoe.storage", level=log.OPERATIONAL) si_dir = storage_index_to_dir(storage_index) # shares exist if there is a file for them bucketdir = os.path.join(self.sharedir, si_dir) if not os.path.isdir(bucketdir): self.add_latency("readv", time.time() - start) return {} datavs = {} for sharenum_s in os.listdir(bucketdir): try: sharenum = int(sharenum_s) except ValueError: continue if sharenum in shares or not shares: filename = os.path.join(bucketdir, sharenum_s) msf = MutableShareFile(filename, self) datavs[sharenum] = msf.readv(readv) log.msg("returning shares %s" % (datavs.keys(),), facility="tahoe.storage", level=log.NOISY, parent=lp) self.add_latency("readv", time.time() - start) return datavs
def remote_advise_corrupt_share(self, share_type, storage_index, shnum, reason): # This is a remote API, I believe, so this has to be bytes for legacy # protocol backwards compatibility reasons. assert isinstance(share_type, bytes) assert isinstance(reason, bytes), "%r is not bytes" % (reason, ) fileutil.make_dirs(self.corruption_advisory_dir) now = time_format.iso_utc(sep="T") si_s = si_b2a(storage_index) # windows can't handle colons in the filename fn = os.path.join(self.corruption_advisory_dir, "%s--%s-%d" % (now, si_s, shnum)).replace(":", "") with open(fn, "w") as f: f.write("report: Share Corruption\n") f.write("type: %s\n" % bytes_to_native_str(share_type)) f.write("storage_index: %s\n" % bytes_to_native_str(si_s)) f.write("share_number: %d\n" % shnum) f.write("\n") f.write(bytes_to_native_str(reason)) f.write("\n") log.msg(format=("client claims corruption in (%(share_type)s) " + "%(si)s-%(shnum)d: %(reason)s"), share_type=share_type, si=si_s, shnum=shnum, reason=reason, level=log.SCARY, umid="SGx2fA") return None
def remote_slot_readv(self, storage_index, shares, readv): start = time.time() self.count("readv") si_s = si_b2a(storage_index) lp = log.msg("storage: slot_readv %s %s" % (si_s, shares), facility="tahoe.storage", level=log.OPERATIONAL) si_dir = storage_index_to_dir(storage_index) # shares exist if there is a file for them bucketdir = os.path.join(self.sharedir, si_dir) if not os.path.isdir(bucketdir): self.add_latency("readv", time.time() - start) return {} datavs = {} for sharenum_s in os.listdir(bucketdir): try: sharenum = int(sharenum_s) except ValueError: continue if sharenum in shares or not shares: filename = os.path.join(bucketdir, sharenum_s) msf = MutableShareFile(filename, self) datavs[sharenum] = msf.readv(readv) log.msg("returning shares %s" % (datavs.keys(), ), facility="tahoe.storage", level=log.NOISY, parent=lp) self.add_latency("readv", time.time() - start) return datavs
def remote_get_buckets(self, storage_index): start = time.time() self.count("get") si_s = si_b2a(storage_index) log.msg("storage: get_buckets %s" % si_s) bucketreaders = {} # k: sharenum, v: BucketReader for shnum, filename in self._get_bucket_shares(storage_index): bucketreaders[shnum] = BucketReader(self, filename, storage_index, shnum) self.add_latency("get", time.time() - start) return bucketreaders
def get_buckets(self, storage_index): """ Get ``BucketReaders`` for an immutable. """ start = self._clock.seconds() self.count("get") si_s = si_b2a(storage_index) log.msg("storage: get_buckets %r" % si_s) bucketreaders = {} # k: sharenum, v: BucketReader for shnum, filename in self.get_shares(storage_index): bucketreaders[shnum] = BucketReader(self, filename, storage_index, shnum) self.add_latency("get", self._clock.seconds() - start) return bucketreaders
def advise_corrupt_share(self, share_type, storage_index, shnum, reason): # Previously this had to be bytes for legacy protocol backwards # compatibility reasons. Now that Foolscap layer has been abstracted # out, we can probably refactor this to be unicode... assert isinstance(share_type, bytes) assert isinstance(reason, bytes), "%r is not bytes" % (reason, ) si_s = si_b2a(storage_index) if not self._share_exists(storage_index, shnum): log.msg( format=( "discarding client corruption claim for %(si)s/%(shnum)d " "which I do not have"), si=si_s, shnum=shnum, ) return log.msg(format=("client claims corruption in (%(share_type)s) " + "%(si)s-%(shnum)d: %(reason)s"), share_type=share_type, si=si_s, shnum=shnum, reason=reason, level=log.SCARY, umid="SGx2fA") report = render_corruption_report(share_type, si_s, shnum, reason) if len(report) > self.get_available_space(): return None now = time_format.iso_utc(sep="T") report_path = get_corruption_report_path( self.corruption_advisory_dir, now, si_s, shnum, ) with open(report_path, "w", encoding="utf-8") as f: f.write(report) return None
def __init__(self, server, statefile, allowed_cpu_percentage=None): service.MultiService.__init__(self) if allowed_cpu_percentage is not None: self.allowed_cpu_percentage = allowed_cpu_percentage self.server = server self.sharedir = server.sharedir self.statefile = statefile self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2] for i in range(2**10)] self.prefixes.sort() self.timer = None self.bucket_cache = (None, []) self.current_sleep_time = None self.next_wake_time = None self.last_prefix_finished_time = None self.last_prefix_elapsed_time = None self.last_cycle_started_time = None self.last_cycle_elapsed_time = None self.load_state()
def __init__(self, server, statefile, allowed_cpu_percentage=None): service.MultiService.__init__(self) if allowed_cpu_percentage is not None: self.allowed_cpu_percentage = allowed_cpu_percentage self.server = server self.sharedir = server.sharedir self.statefile = statefile self.prefixes = [ si_b2a(struct.pack(">H", i << (16 - 10)))[:2] for i in range(2**10) ] self.prefixes.sort() self.timer = None self.bucket_cache = (None, []) self.current_sleep_time = None self.next_wake_time = None self.last_prefix_finished_time = None self.last_prefix_elapsed_time = None self.last_cycle_started_time = None self.last_cycle_elapsed_time = None self.load_state()
def remote_advise_corrupt_share(self, share_type, storage_index, shnum, reason): fileutil.make_dirs(self.corruption_advisory_dir) now = time_format.iso_utc(sep="T") si_s = si_b2a(storage_index) # windows can't handle colons in the filename fn = os.path.join(self.corruption_advisory_dir, "%s--%s-%d" % (now, si_s, shnum)).replace(":","") f = open(fn, "w") f.write("report: Share Corruption\n") f.write("type: %s\n" % share_type) f.write("storage_index: %s\n" % si_s) f.write("share_number: %d\n" % shnum) f.write("\n") f.write(reason) f.write("\n") f.close() log.msg(format=("client claims corruption in (%(share_type)s) " + "%(si)s-%(shnum)d: %(reason)s"), share_type=share_type, si=si_s, shnum=shnum, reason=reason, level=log.SCARY, umid="SGx2fA") return None
def __init__(self, server, statefile, allowed_cpu_percentage=None): service.MultiService.__init__(self) if allowed_cpu_percentage is not None: self.allowed_cpu_percentage = allowed_cpu_percentage self.server = server self.sharedir = server.sharedir self.statefile = statefile self.prefixes = [ si_b2a(struct.pack(">H", i << (16 - 10)))[:2] for i in range(2**10) ] if PY3: # On Python 3 we expect the paths to be unicode, not bytes. self.prefixes = [p.decode("ascii") for p in self.prefixes] self.prefixes.sort() self.timer = None self.bucket_cache = (None, []) self.current_sleep_time = None self.next_wake_time = None self.last_prefix_finished_time = None self.last_prefix_elapsed_time = None self.last_cycle_started_time = None self.last_cycle_elapsed_time = None self.load_state()
def allocate_buckets(self, storage_index, renew_secret, cancel_secret, sharenums, allocated_size, owner_num=0, renew_leases=True): """ Generic bucket allocation API. :param bool renew_leases: If and only if this is ``True`` then renew a secret-matching lease on (or, if none match, add a new lease to) existing shares in this bucket. Any *new* shares are given a new lease regardless. """ # owner_num is not for clients to set, but rather it should be # curried into the PersonalStorageServer instance that is dedicated # to a particular owner. start = self._clock.seconds() self.count("allocate") alreadygot = {} bucketwriters = {} # k: shnum, v: BucketWriter si_dir = storage_index_to_dir(storage_index) si_s = si_b2a(storage_index) log.msg("storage: allocate_buckets %r" % si_s) # in this implementation, the lease information (including secrets) # goes into the share files themselves. It could also be put into a # separate database. Note that the lease should not be added until # the BucketWriter has been closed. expire_time = self._clock.seconds() + DEFAULT_RENEWAL_TIME lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, expire_time, self.my_nodeid) max_space_per_bucket = allocated_size remaining_space = self.get_available_space() limited = remaining_space is not None if limited: # this is a bit conservative, since some of this allocated_size() # has already been written to disk, where it will show up in # get_available_space. remaining_space -= self.allocated_size() # self.readonly_storage causes remaining_space <= 0 # fill alreadygot with all shares that we have, not just the ones # they asked about: this will save them a lot of work. Add or update # leases for all of them: if they want us to hold shares for this # file, they'll want us to hold leases for this file. for (shnum, fn) in self.get_shares(storage_index): alreadygot[shnum] = ShareFile(fn) if renew_leases: self._add_or_renew_leases(alreadygot.values(), lease_info) for shnum in sharenums: incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum) finalhome = os.path.join(self.sharedir, si_dir, "%d" % shnum) if os.path.exists(finalhome): # great! we already have it. easy. pass elif os.path.exists(incominghome): # For Foolscap we don't create BucketWriters for shnums that # have a partial share (in incoming/), so if a second upload # occurs while the first is still in progress, the second # uploader will use different storage servers. pass elif (not limited) or (remaining_space >= max_space_per_bucket): # ok! we need to create the new share file. bw = BucketWriter(self, incominghome, finalhome, max_space_per_bucket, lease_info, clock=self._clock) if self.no_storage: # Really this should be done by having a separate class for # this situation; see # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3862 bw.throw_out_all_data = True bucketwriters[shnum] = bw self._bucket_writers[incominghome] = bw if limited: remaining_space -= max_space_per_bucket else: # bummer! not enough space to accept this bucket pass if bucketwriters: fileutil.make_dirs(os.path.join(self.sharedir, si_dir)) self.add_latency("allocate", self._clock.seconds() - start) return set(alreadygot), bucketwriters
def remote_slot_testv_and_readv_and_writev(self, storage_index, secrets, test_and_write_vectors, read_vector): start = time.time() self.count("writev") si_s = si_b2a(storage_index) log.msg("storage: slot_writev %s" % si_s) si_dir = storage_index_to_dir(storage_index) (write_enabler, renew_secret, cancel_secret) = secrets # shares exist if there is a file for them bucketdir = os.path.join(self.sharedir, si_dir) shares = {} if os.path.isdir(bucketdir): for sharenum_s in os.listdir(bucketdir): try: sharenum = int(sharenum_s) except ValueError: continue filename = os.path.join(bucketdir, sharenum_s) msf = MutableShareFile(filename, self) msf.check_write_enabler(write_enabler, si_s) shares[sharenum] = msf # write_enabler is good for all existing shares. # Now evaluate test vectors. testv_is_good = True for sharenum in test_and_write_vectors: (testv, datav, new_length) = test_and_write_vectors[sharenum] if sharenum in shares: if not shares[sharenum].check_testv(testv): self.log("testv failed: [%d]: %r" % (sharenum, testv)) testv_is_good = False break else: # compare the vectors against an empty share, in which all # reads return empty strings. if not EmptyShare().check_testv(testv): self.log("testv failed (empty): [%d] %r" % (sharenum, testv)) testv_is_good = False break # now gather the read vectors, before we do any writes read_data = {} for sharenum, share in shares.items(): read_data[sharenum] = share.readv(read_vector) ownerid = 1 # TODO expire_time = time.time() + 31*24*60*60 # one month lease_info = LeaseInfo(ownerid, renew_secret, cancel_secret, expire_time, self.my_nodeid) if testv_is_good: # now apply the write vectors for sharenum in test_and_write_vectors: (testv, datav, new_length) = test_and_write_vectors[sharenum] if new_length == 0: if sharenum in shares: shares[sharenum].unlink() else: if sharenum not in shares: # allocate a new share allocated_size = 2000 # arbitrary, really share = self._allocate_slot_share(bucketdir, secrets, sharenum, allocated_size, owner_num=0) shares[sharenum] = share shares[sharenum].writev(datav, new_length) # and update the lease shares[sharenum].add_or_renew_lease(lease_info) if new_length == 0: # delete empty bucket directories if not os.listdir(bucketdir): os.rmdir(bucketdir) # all done self.add_latency("writev", time.time() - start) return (testv_is_good, read_data)
def slot_testv_and_readv_and_writev( # type: ignore # warner/foolscap#78 self, storage_index, secrets, test_and_write_vectors, read_vector, renew_leases, ): """ Read data from shares and conditionally write some data to them. :param bool renew_leases: If and only if this is ``True`` and the test vectors pass then shares in this slot will also have an updated lease applied to them. See ``allmydata.interfaces.RIStorageServer`` for details about other parameters and return value. """ start = time.time() self.count("writev") si_s = si_b2a(storage_index) log.msg("storage: slot_writev %s" % si_s) si_dir = storage_index_to_dir(storage_index) (write_enabler, renew_secret, cancel_secret) = secrets bucketdir = os.path.join(self.sharedir, si_dir) # If collection succeeds we know the write_enabler is good for all # existing shares. shares = self._collect_mutable_shares_for_storage_index( bucketdir, write_enabler, si_s, ) # Now evaluate test vectors. testv_is_good = self._evaluate_test_vectors( test_and_write_vectors, shares, ) # now gather the read vectors, before we do any writes read_data = self._evaluate_read_vectors( read_vector, shares, ) if testv_is_good: # now apply the write vectors remaining_shares = self._evaluate_write_vectors( bucketdir, secrets, test_and_write_vectors, shares, ) if renew_leases: lease_info = self._make_lease_info(renew_secret, cancel_secret) self._add_or_renew_leases(remaining_shares, lease_info) # all done self.add_latency("writev", time.time() - start) return (testv_is_good, read_data)
def remote_allocate_buckets(self, storage_index, renew_secret, cancel_secret, sharenums, allocated_size, canary, owner_num=0): # owner_num is not for clients to set, but rather it should be # curried into the PersonalStorageServer instance that is dedicated # to a particular owner. start = time.time() self.count("allocate") alreadygot = set() bucketwriters = {} # k: shnum, v: BucketWriter si_dir = storage_index_to_dir(storage_index) si_s = si_b2a(storage_index) log.msg("storage: allocate_buckets %s" % si_s) # in this implementation, the lease information (including secrets) # goes into the share files themselves. It could also be put into a # separate database. Note that the lease should not be added until # the BucketWriter has been closed. expire_time = time.time() + 31*24*60*60 lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, expire_time, self.my_nodeid) max_space_per_bucket = allocated_size remaining_space = self.get_available_space() limited = remaining_space is not None if limited: # this is a bit conservative, since some of this allocated_size() # has already been written to disk, where it will show up in # get_available_space. remaining_space -= self.allocated_size() # self.readonly_storage causes remaining_space <= 0 # fill alreadygot with all shares that we have, not just the ones # they asked about: this will save them a lot of work. Add or update # leases for all of them: if they want us to hold shares for this # file, they'll want us to hold leases for this file. for (shnum, fn) in self._get_bucket_shares(storage_index): alreadygot.add(shnum) sf = ShareFile(fn) sf.add_or_renew_lease(lease_info) for shnum in sharenums: incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum) finalhome = os.path.join(self.sharedir, si_dir, "%d" % shnum) if os.path.exists(finalhome): # great! we already have it. easy. pass elif os.path.exists(incominghome): # Note that we don't create BucketWriters for shnums that # have a partial share (in incoming/), so if a second upload # occurs while the first is still in progress, the second # uploader will use different storage servers. pass elif (not limited) or (remaining_space >= max_space_per_bucket): # ok! we need to create the new share file. bw = BucketWriter(self, incominghome, finalhome, max_space_per_bucket, lease_info, canary) if self.no_storage: bw.throw_out_all_data = True bucketwriters[shnum] = bw self._active_writers[bw] = 1 if limited: remaining_space -= max_space_per_bucket else: # bummer! not enough space to accept this bucket pass if bucketwriters: fileutil.make_dirs(os.path.join(self.sharedir, si_dir)) self.add_latency("allocate", time.time() - start) return alreadygot, bucketwriters
def remote_allocate_buckets(self, storage_index, renew_secret, cancel_secret, sharenums, allocated_size, canary, owner_num=0): # owner_num is not for clients to set, but rather it should be # curried into the PersonalStorageServer instance that is dedicated # to a particular owner. start = time.time() self.count("allocate") alreadygot = set() bucketwriters = {} # k: shnum, v: BucketWriter si_dir = storage_index_to_dir(storage_index) si_s = si_b2a(storage_index) log.msg("storage: allocate_buckets %s" % si_s) # in this implementation, the lease information (including secrets) # goes into the share files themselves. It could also be put into a # separate database. Note that the lease should not be added until # the BucketWriter has been closed. expire_time = time.time() + 31 * 24 * 60 * 60 lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, expire_time, self.my_nodeid) max_space_per_bucket = allocated_size remaining_space = self.get_available_space() limited = remaining_space is not None if limited: # this is a bit conservative, since some of this allocated_size() # has already been written to disk, where it will show up in # get_available_space. remaining_space -= self.allocated_size() # self.readonly_storage causes remaining_space <= 0 # fill alreadygot with all shares that we have, not just the ones # they asked about: this will save them a lot of work. Add or update # leases for all of them: if they want us to hold shares for this # file, they'll want us to hold leases for this file. for (shnum, fn) in self._get_bucket_shares(storage_index): alreadygot.add(shnum) sf = ShareFile(fn) sf.add_or_renew_lease(lease_info) for shnum in sharenums: incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum) finalhome = os.path.join(self.sharedir, si_dir, "%d" % shnum) if os.path.exists(finalhome): # great! we already have it. easy. pass elif os.path.exists(incominghome): # Note that we don't create BucketWriters for shnums that # have a partial share (in incoming/), so if a second upload # occurs while the first is still in progress, the second # uploader will use different storage servers. pass elif (not limited) or (remaining_space >= max_space_per_bucket): # ok! we need to create the new share file. bw = BucketWriter(self, incominghome, finalhome, max_space_per_bucket, lease_info, canary) if self.no_storage: bw.throw_out_all_data = True bucketwriters[shnum] = bw self._active_writers[bw] = 1 if limited: remaining_space -= max_space_per_bucket else: # bummer! not enough space to accept this bucket pass if bucketwriters: fileutil.make_dirs(os.path.join(self.sharedir, si_dir)) self.add_latency("allocate", time.time() - start) return alreadygot, bucketwriters
def remote_slot_testv_and_readv_and_writev(self, storage_index, secrets, test_and_write_vectors, read_vector): start = time.time() self.count("writev") si_s = si_b2a(storage_index) log.msg("storage: slot_writev %s" % si_s) si_dir = storage_index_to_dir(storage_index) (write_enabler, renew_secret, cancel_secret) = secrets # shares exist if there is a file for them bucketdir = os.path.join(self.sharedir, si_dir) shares = {} if os.path.isdir(bucketdir): for sharenum_s in os.listdir(bucketdir): try: sharenum = int(sharenum_s) except ValueError: continue filename = os.path.join(bucketdir, sharenum_s) msf = MutableShareFile(filename, self) msf.check_write_enabler(write_enabler, si_s) shares[sharenum] = msf # write_enabler is good for all existing shares. # Now evaluate test vectors. testv_is_good = True for sharenum in test_and_write_vectors: (testv, datav, new_length) = test_and_write_vectors[sharenum] if sharenum in shares: if not shares[sharenum].check_testv(testv): self.log("testv failed: [%d]: %r" % (sharenum, testv)) testv_is_good = False break else: # compare the vectors against an empty share, in which all # reads return empty strings. if not EmptyShare().check_testv(testv): self.log("testv failed (empty): [%d] %r" % (sharenum, testv)) testv_is_good = False break # now gather the read vectors, before we do any writes read_data = {} for sharenum, share in shares.items(): read_data[sharenum] = share.readv(read_vector) ownerid = 1 # TODO expire_time = time.time() + 31 * 24 * 60 * 60 # one month lease_info = LeaseInfo(ownerid, renew_secret, cancel_secret, expire_time, self.my_nodeid) if testv_is_good: # now apply the write vectors for sharenum in test_and_write_vectors: (testv, datav, new_length) = test_and_write_vectors[sharenum] if new_length == 0: if sharenum in shares: shares[sharenum].unlink() else: if sharenum not in shares: # allocate a new share allocated_size = 2000 # arbitrary, really share = self._allocate_slot_share(bucketdir, secrets, sharenum, allocated_size, owner_num=0) shares[sharenum] = share shares[sharenum].writev(datav, new_length) # and update the lease shares[sharenum].add_or_renew_lease(lease_info) if new_length == 0: # delete empty bucket directories if not os.listdir(bucketdir): os.rmdir(bucketdir) # all done self.add_latency("writev", time.time() - start) return (testv_is_good, read_data)