def _satisfy_block_hash_tree(self, needed_hashes): o_bh = self.actual_offsets["block_hashes"] block_hashes = {} for hashnum in needed_hashes: hashdata = self._received.get(o_bh + hashnum * HASH_SIZE, HASH_SIZE) if hashdata: block_hashes[hashnum] = hashdata else: return False # missing some hashes # note that we don't submit any hashes to the block_hash_tree until # we've gotten them all, because the hash tree will throw an # exception if we only give it a partial set (which it therefore # cannot validate) try: self._commonshare.process_block_hashes(block_hashes) except (BadHashError, NotEnoughHashesError) as e: f = Failure(e) hashnums = ",".join([str(n) for n in sorted(block_hashes.keys())]) log.msg(format="hash failure in block_hashes=(%(hashnums)s)," " from %(share)s", hashnums=hashnums, shnum=self._shnum, share=repr(self), failure=f, level=log.WEIRD, parent=self._lp, umid="yNyFdA") hsize = max(0, max(needed_hashes)) * HASH_SIZE self._signal_corruption(f, o_bh, hsize) self.had_corruption = True raise for hashnum in needed_hashes: self._received.remove(o_bh + hashnum * HASH_SIZE, HASH_SIZE) return True
def _satisfy_ciphertext_hash_tree(self, needed_hashes): start = self.actual_offsets["crypttext_hash_tree"] hashes = {} for hashnum in needed_hashes: hashdata = self._received.get(start + hashnum * HASH_SIZE, HASH_SIZE) if hashdata: hashes[hashnum] = hashdata else: return False # missing some hashes # we don't submit any hashes to the ciphertext_hash_tree until we've # gotten them all try: self._node.process_ciphertext_hashes(hashes) except (BadHashError, NotEnoughHashesError) as e: f = Failure(e) hashnums = ",".join([str(n) for n in sorted(hashes.keys())]) log.msg(format="hash failure in ciphertext_hashes=(%(hashnums)s)," " from %(share)s", hashnums=hashnums, share=repr(self), failure=f, level=log.WEIRD, parent=self._lp, umid="iZI0TA") hsize = max(0, max(needed_hashes)) * HASH_SIZE self._signal_corruption(f, start, hsize) self.had_corruption = True raise for hashnum in needed_hashes: self._received.remove(start + hashnum * HASH_SIZE, HASH_SIZE) return True
def write_results(self, data): stdout = self.options.stdout keys = ( "count-immutable-files", "count-mutable-files", "count-literal-files", "count-files", "count-directories", "size-immutable-files", "size-mutable-files", "size-literal-files", "size-directories", "largest-directory", "largest-immutable-file", ) width = max([len(k) for k in keys]) print("Counts and Total Sizes:", file=stdout) for k in keys: fmt = "%" + str(width) + "s: %d" if k in data: value = data[k] if not k.startswith("count-") and value > 1000: absize = abbreviate_space_both(value) print(fmt % (k, data[k]), " ", absize, file=stdout) else: print(fmt % (k, data[k]), file=stdout) if data["size-files-histogram"]: print("Size Histogram:", file=stdout) prevmax = None maxlen = max([ len(str(maxsize)) for (minsize, maxsize, count) in data["size-files-histogram"] ]) maxcountlen = max([ len(str(count)) for (minsize, maxsize, count) in data["size-files-histogram"] ]) minfmt = "%" + str(maxlen) + "d" maxfmt = "%-" + str(maxlen) + "d" countfmt = "%-" + str(maxcountlen) + "d" linefmt = minfmt + "-" + maxfmt + " : " + countfmt + " %s" for (minsize, maxsize, count) in data["size-files-histogram"]: if prevmax is not None and minsize != prevmax + 1: print(" " * (maxlen - 1) + "...", file=stdout) prevmax = maxsize print( linefmt % (minsize, maxsize, count, abbreviate_space_both(maxsize)), file=stdout)
def list_aliases(options): """ Show aliases that exist. """ data = _get_alias_details(options['node-directory']) if options['json']: dumped = json.dumps(data, indent=4) if isinstance(dumped, bytes): dumped = dumped.decode("utf-8") output = _escape_format(dumped) else: def dircap(details): return (details['readonly'] if options['readonly-uri'] else details['readwrite']).decode("utf-8") def format_dircap(name, details): return fmt % (name, dircap(details)) max_width = max([len(quote_output(name)) for name in data.keys()] + [0]) fmt = "%" + str(max_width) + "s: %s" output = "\n".join( list( format_dircap(name, details) for name, details in data.items())) if output: # Show whatever we computed. Skip this if there is no output to avoid # a spurious blank line. show_output(options.stdout, output) return 0
def _describe_immutable_share(abs_sharefile, now, si_s, out): class ImmediateReadBucketProxy(ReadBucketProxy): def __init__(self, sf): self.sf = sf ReadBucketProxy.__init__(self, None, None, "") def __repr__(self): return "<ImmediateReadBucketProxy>" def _read(self, offset, size): return defer.succeed(sf.read_share_data(offset, size)) # use a ReadBucketProxy to parse the bucket and find the uri extension sf = ShareFile(abs_sharefile) bp = ImmediateReadBucketProxy(sf) expiration_time = min(lease.get_expiration_time() for lease in sf.get_leases()) expiration = max(0, expiration_time - now) UEB_data = call(bp.get_uri_extension) unpacked = uri.unpack_extension_readable(UEB_data) k = unpacked["needed_shares"] N = unpacked["total_shares"] filesize = unpacked["size"] ueb_hash = unpacked["UEB_hash"] print("CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize, str( ueb_hash, "utf-8"), expiration, quote_output(abs_sharefile)), file=out)
def _operation_complete(self, res, ophandle): if ophandle in self.handles: if ophandle not in self.timers: # the client has not provided a retain-for= value for this # handle, so we set our own. now = time.time() added = self.handles[ophandle][WHEN_ADDED] when = max(self.UNCOLLECTED_HANDLE_LIFETIME, now - added) self._set_timer(ophandle, when)
def read(self, consumer, offset, size): """I am the main entry point, from which FileNode.read() can get data. I feed the consumer with the desired range of ciphertext. I return a Deferred that fires (with the consumer) when the read is finished. Note that there is no notion of a 'file pointer': each call to read() uses an independent offset= value. """ # for concurrent operations: each gets its own Segmentation manager if size is None: size = self._verifycap.size # ignore overruns: clip size so offset+size does not go past EOF, and # so size is not negative (which indicates that offset >= EOF) size = max(0, min(size, self._verifycap.size - offset)) read_ev = self._download_status.add_read_event(offset, size, now()) if IDownloadStatusHandlingConsumer.providedBy(consumer): consumer.set_download_status_read_event(read_ev) consumer.set_download_status(self._download_status) lp = log.msg(format="imm Node(%(si)s).read(%(offset)d, %(size)d)", si=base32.b2a(self._verifycap.storage_index)[:8], offset=offset, size=size, level=log.OPERATIONAL, parent=self._lp, umid="l3j3Ww") if self._history: sp = self._history.stats_provider sp.count("downloader.files_downloaded", 1) # really read() calls sp.count("downloader.bytes_downloaded", size) if size == 0: read_ev.finished(now()) # no data, so no producer, so no register/unregisterProducer return defer.succeed(consumer) # for concurrent operations, each read() gets its own Segmentation # manager s = Segmentation(self, offset, size, consumer, read_ev, lp) # this raises an interesting question: what segments to fetch? if # offset=0, always fetch the first segment, and then allow # Segmentation to be responsible for pulling the subsequent ones if # the first wasn't large enough. If offset>0, we're going to need an # extra roundtrip to get the UEB (and therefore the segment size) # before we can figure out which segment to get. TODO: allow the # offset-table-guessing code (which starts by guessing the segsize) # to assist the offset>0 process. d = s.start() def _done(res): read_ev.finished(now()) return res d.addBoth(_done) return d
def read_share_data(self, offset, length): precondition(offset >= 0) # reads beyond the end of the data are truncated. Reads that start # beyond the end of the data return an empty string. seekpos = self._data_offset + offset actuallength = max(0, min(length, self._lease_offset - seekpos)) if actuallength == 0: return b"" with open(self.home, 'rb') as f: f.seek(seekpos) return f.read(actuallength)
def check_directory(self, contents): """I will tell you if a new directory needs to be created for a given set of directory contents, or if I know of an existing (immutable) directory that can be used instead. 'contents' should be a dictionary that maps from child name (a single unicode string) to immutable childcap (filecap or dircap). I return a DirectoryResult object, synchronously. If r.was_created() returns False, you should create the directory (with t=mkdir-immutable). When you are finished, call r.did_create(dircap) so I can update my database. If was_created() returns a dircap, you might be able to avoid the mkdir. Call r.should_check(), and if it says False, you can skip the mkdir and use the dircap returned by was_created(). If should_check() returns True, you should perform a check operation on the dircap returned by was_created(). If the check indicates the directory is healthy, please call r.did_check_healthy(checker_results) so I can update the database, using the de-JSONized response from the webapi t=check call for 'checker_results'. If the check indicates the directory is not healthy, please repair or re-create the directory and call r.did_create(dircap) when you're done. """ now = time.time() entries = [] for name in contents: entries.append([name.encode("utf-8"), contents[name]]) entries.sort() data = b"".join([ netstring(name_utf8) + netstring(cap) for (name_utf8, cap) in entries ]) dirhash = backupdb_dirhash(data) dirhash_s = base32.b2a(dirhash) c = self.cursor c.execute( "SELECT dircap, last_checked" " FROM directories WHERE dirhash=?", (dirhash_s, )) row = c.fetchone() if not row: return DirectoryResult(self, dirhash_s, None, False) (dircap, last_checked) = row age = now - last_checked probability = ((age - self.NO_CHECK_BEFORE) / (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE)) probability = min(max(probability, 0.0), 1.0) should_check = bool(random.random() < probability) return DirectoryResult(self, dirhash_s, to_bytes(dircap), should_check)
def _read_share_data(self, f, offset, length): precondition(offset >= 0) data_length = self._read_data_length(f) if offset + length > data_length: # reads beyond the end of the data are truncated. Reads that # start beyond the end of the data return an empty string. length = max(0, data_length - offset) if length == 0: return b"" precondition(offset + length <= data_length) f.seek(self.DATA_OFFSET + offset) data = f.read(length) return data
def lease_last_cycle_results(self, req, tag): lc = self._storage.lease_checker h = lc.get_state()["history"] if not h: return "" biggest = str(max(int(k) for k in h.keys())) last = h[biggest] start, end = last["cycle-start-finish-times"] tag( "Last complete cycle (which took %s and finished %s ago)" " recovered: " % (abbreviate_time(end - start), abbreviate_time(time.time() - end)), self.format_recovered(last["space-recovered"], "actual")) p = T.ul() def add(*pieces): p(T.li(pieces)) saw = self.format_recovered(last["space-recovered"], "examined") add("and saw a total of ", saw) if not last["expiration-enabled"]: rec = self.format_recovered(last["space-recovered"], "configured") add( "but expiration was not enabled. If it had been, " "it would have recovered: ", rec) if last["corrupt-shares"]: add( "Corrupt shares:", T.ul((T.li([ "SI %s shnum %d" % (si, shnum) for si, shnum in last["corrupt-shares"] ])))) return tag(p)
def ls(options): nodeurl = options['node-url'] aliases = options.aliases where = options.where stdout = options.stdout stderr = options.stderr if not nodeurl.endswith("/"): nodeurl += "/" if where.endswith("/"): where = where[:-1] try: rootcap, path = get_alias(aliases, where, DEFAULT_ALIAS) except UnknownAliasError as e: e.display(stderr) return 1 path = str(path, "utf-8") url = nodeurl + "uri/%s" % url_quote(rootcap) if path: # move where.endswith check here? url += "/" + escape_path(path) assert not url.endswith("/") url += "?t=json" resp = do_http("GET", url) if resp.status == 404: print("No such file or directory", file=stderr) return 2 if resp.status != 200: print(format_http_error("Error during GET", resp), file=stderr) if resp.status == 0: return 3 else: return resp.status data = resp.read() if options['json']: # The webapi server should always output printable ASCII. if is_printable_ascii(data): data = str(data, "ascii") print(data, file=stdout) return 0 else: print("The JSON response contained unprintable characters:", file=stderr) print(quote_output(data, quotemarks=False), file=stderr) return 1 try: parsed = json.loads(data) except Exception as e: print("error: %s" % quote_output(e.args[0], quotemarks=False), file=stderr) print("Could not parse JSON response:", file=stderr) print(quote_output(data, quotemarks=False), file=stderr) return 1 nodetype, d = parsed children = {} if nodetype == "dirnode": children = d['children'] else: # paths returned from get_alias are always valid UTF-8 childname = path.split("/")[-1] children = {childname: (nodetype, d)} if "metadata" not in d: d["metadata"] = {} childnames = sorted(children.keys()) now = time.time() # we build up a series of rows, then we loop through them to compute a # maxwidth so we can format them tightly. Size, filename, and URI are the # variable-width ones. rows = [] has_unknowns = False for name in childnames: child = children[name] name = str(name) childtype = child[0] # See webapi.txt for a discussion of the meanings of unix local # filesystem mtime and ctime, Tahoe mtime and ctime, and Tahoe # linkmotime and linkcrtime. ctime = child[1].get("metadata", {}).get('tahoe', {}).get("linkcrtime") if not ctime: ctime = child[1]["metadata"].get("ctime") mtime = child[1].get("metadata", {}).get('tahoe', {}).get("linkmotime") if not mtime: mtime = child[1]["metadata"].get("mtime") rw_uri = to_bytes(child[1].get("rw_uri")) ro_uri = to_bytes(child[1].get("ro_uri")) if ctime: # match for formatting that GNU 'ls' does if (now - ctime) > 6 * 30 * 24 * 60 * 60: # old files fmt = "%b %d %Y" else: fmt = "%b %d %H:%M" ctime_s = time.strftime(fmt, time.localtime(ctime)) else: ctime_s = "-" if childtype == "dirnode": t0 = "d" size = "-" classify = "/" elif childtype == "filenode": t0 = "-" size = str(child[1].get("size", "?")) classify = "" if rw_uri: classify = "*" else: has_unknowns = True t0 = "?" size = "?" classify = "?" t1 = "-" if ro_uri: t1 = "r" t2 = "-" if rw_uri: t2 = "w" t3 = "-" if childtype == "dirnode": t3 = "x" uri = rw_uri or ro_uri line = [] if options["long"]: line.append(t0 + t1 + t2 + t3) line.append(size) line.append(ctime_s) if not options["classify"]: classify = "" line.append(name + classify) if options["uri"]: line.append(ensure_text(uri)) if options["readonly-uri"]: line.append( quote_output(ensure_text(ro_uri) or "-", quotemarks=False)) rows.append(line) max_widths = [] left_justifys = [] for row in rows: for i, cell in enumerate(row): while len(max_widths) <= i: max_widths.append(0) while len(left_justifys) <= i: left_justifys.append(False) max_widths[i] = max(max_widths[i], len(cell)) if ensure_text(cell).startswith("URI"): left_justifys[i] = True if len(left_justifys) == 1: left_justifys[0] = True fmt_pieces = [] for i in range(len(max_widths)): piece = "%" if left_justifys[i]: piece += "-" piece += str(max_widths[i]) piece += "s" fmt_pieces.append(piece) fmt = " ".join(fmt_pieces) rc = 0 for row in rows: row = (fmt % tuple(row)).rstrip() encoding_error = False try: row = unicode_to_output(row) except UnicodeEncodeError: encoding_error = True row = quote_output(row) if encoding_error: print(row, file=stderr) rc = 1 else: print(row, file=stdout) if rc == 1: print("\nThis listing included files whose names could not be converted to the terminal" \ "\noutput encoding. Their names are shown using backslash escapes and in quotes.", file=stderr) if has_unknowns: print("\nThis listing included unknown objects. Using a webapi server that supports" \ "\na later version of Tahoe may help.", file=stderr) return rc
def _describe_mutable_share(abs_sharefile, f, now, si_s, out): # mutable share m = MutableShareFile(abs_sharefile) WE, nodeid = m._read_write_enabler_and_nodeid(f) data_length = m._read_data_length(f) expiration_time = min( [lease.get_expiration_time() for (i, lease) in m._enumerate_leases(f)]) expiration = max(0, expiration_time - now) share_type = "unknown" f.seek(m.DATA_OFFSET) version = f.read(1) if version == b"\x00": # this slot contains an SMDF share share_type = "SDMF" elif version == b"\x01": share_type = "MDMF" if share_type == "SDMF": f.seek(m.DATA_OFFSET) # Read at least the mutable header length, if possible. If there's # less data than that in the share, don't try to read more (we won't # be able to unpack the header in this case but we surely don't want # to try to unpack bytes *following* the data section as if they were # header data). Rather than 2000 we could use HEADER_LENGTH from # allmydata/mutable/layout.py, probably. data = f.read(min(data_length, 2000)) try: pieces = unpack_share(data) except NeedMoreDataError as e: # retry once with the larger size size = e.needed_bytes f.seek(m.DATA_OFFSET) data = f.read(min(data_length, size)) pieces = unpack_share(data) (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces print("SDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, seqnum, str(base32.b2a(root_hash), "utf-8"), expiration, quote_output(abs_sharefile)), file=out) elif share_type == "MDMF": fake_shnum = 0 # TODO: factor this out with dump_MDMF_share() class ShareDumper(MDMFSlotReadProxy): def _read(self, readvs, force_remote=False, queue=False): data = [] for (where, length) in readvs: f.seek(m.DATA_OFFSET + where) data.append(f.read(length)) return defer.succeed({fake_shnum: data}) p = ShareDumper(None, "fake-si", fake_shnum) def extract(func): stash = [] # these methods return Deferreds, but we happen to know that # they run synchronously when not actually talking to a # remote server d = func() d.addCallback(stash.append) return stash[0] verinfo = extract(p.get_verinfo) (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix, offsets) = verinfo print("MDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, seqnum, str(base32.b2a(root_hash), "utf-8"), expiration, quote_output(abs_sharefile)), file=out) else: print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out)
def get_disk_stats(whichdir, reserved_space=0): """Return disk statistics for the storage disk, in the form of a dict with the following fields. total: total bytes on disk free_for_root: bytes actually free on disk free_for_nonroot: bytes free for "a non-privileged user" [Unix] or the current user [Windows]; might take into account quotas depending on platform used: bytes used on disk avail: bytes available excluding reserved space An AttributeError can occur if the OS has no API to get disk information. An EnvironmentError can occur if the OS call fails. whichdir is a directory on the filesystem in question -- the answer is about the filesystem, not about the directory, so the directory is used only to specify which filesystem. reserved_space is how many bytes to subtract from the answer, so you can pass how many bytes you would like to leave unused on this filesystem as reserved_space. """ if have_GetDiskFreeSpaceExW: # If this is a Windows system and GetDiskFreeSpaceExW is available, use it. # (This might put up an error dialog unless # SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX) has been called, # which we do in allmydata.windows.fixups.initialize().) n_free_for_nonroot = c_ulonglong(0) n_total = c_ulonglong(0) n_free_for_root = c_ulonglong(0) retval = GetDiskFreeSpaceExW(whichdir, byref(n_free_for_nonroot), byref(n_total), byref(n_free_for_root)) if retval == 0: raise OSError("WinError: %s\n attempting to get disk statistics for %r" % (WinError(get_last_error()), whichdir)) free_for_nonroot = n_free_for_nonroot.value total = n_total.value free_for_root = n_free_for_root.value else: # For Unix-like systems. # <http://docs.python.org/library/os.html#os.statvfs> # <http://opengroup.org/onlinepubs/7990989799/xsh/fstatvfs.html> # <http://opengroup.org/onlinepubs/7990989799/xsh/sysstatvfs.h.html> s = os.statvfs(whichdir) # on my mac laptop: # statvfs(2) is a wrapper around statfs(2). # statvfs.f_frsize = statfs.f_bsize : # "minimum unit of allocation" (statvfs) # "fundamental file system block size" (statfs) # statvfs.f_bsize = statfs.f_iosize = stat.st_blocks : preferred IO size # on an encrypted home directory ("FileVault"), it gets f_blocks # wrong, and s.f_blocks*s.f_frsize is twice the size of my disk, # but s.f_bavail*s.f_frsize is correct total = s.f_frsize * s.f_blocks free_for_root = s.f_frsize * s.f_bfree free_for_nonroot = s.f_frsize * s.f_bavail # valid for all platforms: used = total - free_for_root avail = max(free_for_nonroot - reserved_space, 0) return { 'total': total, 'free_for_root': free_for_root, 'free_for_nonroot': free_for_nonroot, 'used': used, 'avail': avail, }
def describe_share(abs_sharefile, si_s, shnum_s, now, out): from allmydata import uri from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import ShareFile from allmydata.mutable.layout import unpack_share from allmydata.mutable.common import NeedMoreDataError from allmydata.immutable.layout import ReadBucketProxy from allmydata.util import base32 from allmydata.util.encodingutil import quote_output import struct f = open(abs_sharefile, "rb") prefix = f.read(32) if prefix == MutableShareFile.MAGIC: # mutable share m = MutableShareFile(abs_sharefile) WE, nodeid = m._read_write_enabler_and_nodeid(f) data_length = m._read_data_length(f) expiration_time = min( [lease.expiration_time for (i, lease) in m._enumerate_leases(f)]) expiration = max(0, expiration_time - now) share_type = "unknown" f.seek(m.DATA_OFFSET) version = f.read(1) if version == b"\x00": # this slot contains an SMDF share share_type = "SDMF" elif version == b"\x01": share_type = "MDMF" if share_type == "SDMF": f.seek(m.DATA_OFFSET) data = f.read(min(data_length, 2000)) try: pieces = unpack_share(data) except NeedMoreDataError as e: # retry once with the larger size size = e.needed_bytes f.seek(m.DATA_OFFSET) data = f.read(min(data_length, size)) pieces = unpack_share(data) (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces print("SDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, seqnum, str(base32.b2a(root_hash), "utf-8"), expiration, quote_output(abs_sharefile)), file=out) elif share_type == "MDMF": from allmydata.mutable.layout import MDMFSlotReadProxy fake_shnum = 0 # TODO: factor this out with dump_MDMF_share() class ShareDumper(MDMFSlotReadProxy): def _read(self, readvs, force_remote=False, queue=False): data = [] for (where, length) in readvs: f.seek(m.DATA_OFFSET + where) data.append(f.read(length)) return defer.succeed({fake_shnum: data}) p = ShareDumper(None, "fake-si", fake_shnum) def extract(func): stash = [] # these methods return Deferreds, but we happen to know that # they run synchronously when not actually talking to a # remote server d = func() d.addCallback(stash.append) return stash[0] verinfo = extract(p.get_verinfo) (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix, offsets) = verinfo print("MDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, seqnum, str(base32.b2a(root_hash), "utf-8"), expiration, quote_output(abs_sharefile)), file=out) else: print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out) elif struct.unpack(">L", prefix[:4]) == (1, ): # immutable class ImmediateReadBucketProxy(ReadBucketProxy): def __init__(self, sf): self.sf = sf ReadBucketProxy.__init__(self, None, None, "") def __repr__(self): return "<ImmediateReadBucketProxy>" def _read(self, offset, size): return defer.succeed(sf.read_share_data(offset, size)) # use a ReadBucketProxy to parse the bucket and find the uri extension sf = ShareFile(abs_sharefile) bp = ImmediateReadBucketProxy(sf) expiration_time = min( [lease.expiration_time for lease in sf.get_leases()]) expiration = max(0, expiration_time - now) UEB_data = call(bp.get_uri_extension) unpacked = uri.unpack_extension_readable(UEB_data) k = unpacked["needed_shares"] N = unpacked["total_shares"] filesize = unpacked["size"] ueb_hash = unpacked["UEB_hash"] print("CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize, str( ueb_hash, "utf-8"), expiration, quote_output(abs_sharefile)), file=out) else: print("UNKNOWN really-unknown %s" % quote_output(abs_sharefile), file=out) f.close()
""" # The second field -- the four-byte share data length -- is no longer # used as of Tahoe v1.3.0, but we continue to write it in there in # case someone downgrades a storage server from >= Tahoe-1.3.0 to < # Tahoe-1.3.0, or moves a share file from one server to another, # etc. We do saturation -- a share data length larger than 2**32-1 # (what can fit into the field) is marked as the largest length that # can fit into the field. That way, even if this does happen, the old # < v1.3.0 server will still allow clients to read the first part of # the share. return struct.pack(">LLL", self.version, min(2**32 - 1, max_size), 0) ALL_SCHEMAS = { _Schema(version=2, lease_serializer=v2_immutable), _Schema(version=1, lease_serializer=v1_immutable), } ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) def schema_from_version(version): # (int) -> Optional[type] """ Find the schema object that corresponds to a certain version number. """ for schema in ALL_SCHEMAS: if schema.version == version: return schema return None
def max(self, key, value): self.stats[key] = max(self.stats[key], value)
def check_file(self, path, use_timestamps=True): """I will tell you if a given local file needs to be uploaded or not, by looking in a database and seeing if I have a record of this file having been uploaded earlier. I return a FileResults object, synchronously. If r.was_uploaded() returns False, you should upload the file. When you are finished uploading it, call r.did_upload(filecap), so I can update my database. If was_uploaded() returns a filecap, you might be able to avoid an upload. Call r.should_check(), and if it says False, you can skip the upload and use the filecap returned by was_uploaded(). If should_check() returns True, you should perform a filecheck on the filecap returned by was_uploaded(). If the check indicates the file is healthy, please call r.did_check_healthy(checker_results) so I can update the database, using the de-JSONized response from the webapi t=check call for 'checker_results'. If the check indicates the file is not healthy, please upload the file and call r.did_upload(filecap) when you're done. If use_timestamps=True (the default), I will compare ctime and mtime of the local file against an entry in my database, and consider the file to be unchanged if ctime, mtime, and filesize are all the same as the earlier version. If use_timestamps=False, I will not trust the timestamps, so more files (perhaps all) will be marked as needing upload. A future version of this database may hash the file to make equality decisions, in which case use_timestamps=False will not always imply r.must_upload()==True. 'path' points to a local file on disk, possibly relative to the current working directory. The database stores absolute pathnames. """ path = abspath_expanduser_unicode(path) # TODO: consider using get_pathinfo. s = os.stat(path) size = s[stat.ST_SIZE] ctime = s[stat.ST_CTIME] mtime = s[stat.ST_MTIME] now = time.time() c = self.cursor c.execute( "SELECT size,mtime,ctime,fileid" " FROM local_files" " WHERE path=?", (path, )) row = self.cursor.fetchone() if not row: return FileResult(self, None, False, path, mtime, ctime, size) (last_size, last_mtime, last_ctime, last_fileid) = row c.execute( "SELECT caps.filecap, last_upload.last_checked" " FROM caps,last_upload" " WHERE caps.fileid=? AND last_upload.fileid=?", (last_fileid, last_fileid)) row2 = c.fetchone() if ((last_size != size or not use_timestamps or last_mtime != mtime or last_ctime != ctime) # the file has been changed or (not row2) # we somehow forgot where we put the file last time ): c.execute("DELETE FROM local_files WHERE path=?", (path, )) self.connection.commit() return FileResult(self, None, False, path, mtime, ctime, size) # at this point, we're allowed to assume the file hasn't been changed (filecap, last_checked) = row2 age = now - last_checked probability = ((age - self.NO_CHECK_BEFORE) / (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE)) probability = min(max(probability, 0.0), 1.0) should_check = bool(random.random() < probability) return FileResult(self, to_bytes(filecap), should_check, path, mtime, ctime, size)
def render(self, req): gte = static.getTypeAndEncoding ctype, encoding = gte(self.filename, static.File.contentTypes, static.File.contentEncodings, defaultType="text/plain") req.setHeader("content-type", ctype) if encoding: req.setHeader("content-encoding", encoding) if boolean_of_arg(get_arg(req, "save", "False")): # tell the browser to save the file rather display it we don't # try to encode the filename, instead we echo back the exact same # bytes we were given in the URL. See the comment in # FileNodeHandler.render_GET for the sad details. req.setHeader("content-disposition", b'attachment; filename="%s"' % self.filename) filesize = self.filenode.get_size() assert isinstance(filesize, (int,long)), filesize first, size = 0, None contentsize = filesize req.setHeader("accept-ranges", "bytes") # TODO: for mutable files, use the roothash. For LIT, hash the data. # or maybe just use the URI for CHK and LIT. rangeheader = req.getHeader('range') if rangeheader: ranges = self.parse_range_header(rangeheader) # ranges = None means the header didn't parse, so ignore # the header as if it didn't exist. If is more than one # range, then just return the first for now, until we can # generate multipart/byteranges. if ranges is not None: first, last = ranges[0] if first >= filesize: raise WebError('First beyond end of file', http.REQUESTED_RANGE_NOT_SATISFIABLE) else: first = max(0, first) last = min(filesize-1, last) req.setResponseCode(http.PARTIAL_CONTENT) req.setHeader('content-range',"bytes %s-%s/%s" % (str(first), str(last), str(filesize))) contentsize = last - first + 1 size = contentsize req.setHeader("content-length", b"%d" % contentsize) if req.method == b"HEAD": return b"" d = self.filenode.read(req, first, size) def _error(f): if f.check(defer.CancelledError): # The HTTP connection was lost and we no longer have anywhere # to send our result. Let this pass through. return f if req.startedWriting: # The content-type is already set, and the response code has # already been sent, so we can't provide a clean error # indication. We can emit text (which a browser might # interpret as something else), and if we sent a Size header, # they might notice that we've truncated the data. Keep the # error message small to improve the chances of having our # error response be shorter than the intended results. # # We don't have a lot of options, unfortunately. return b"problem during download\n" else: # We haven't written anything yet, so we can provide a # sensible error message. return f d.addCallbacks( lambda ignored: None, _error, ) return d