def do_test_bound_flat_dirs(self, hashtype): """test directory is single level, with four data files""" check_hashtype(hashtype) (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(ONE, FOUR) tree1 = MerkleTree.create_from_file_system(dir_path1, hashtype) self.assertEqual(dir_name1, tree1.name) nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(FOUR, len(nodes1)) self.verify_tree_sha(tree1, dir_path1, hashtype) tree2 = MerkleTree.create_from_file_system(dir_path2, hashtype) self.assertEqual(dir_name2, tree2.name) nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(FOUR, len(nodes2)) self.verify_tree_sha(tree2, dir_path2, hashtype) self.assertFalse(tree1 is None) self.assertTrue(tree1 == tree1) self.assertFalse(tree1 == tree2) tree1_str = tree1.to_string(0) tree1_rebuilt = MerkleTree.create_from_serialization( tree1_str, hashtype) self.assertTrue(tree1 == tree1_rebuilt)
def __init__( self, u_path='/var/U', limit=64, start_at='00', just_keys=False, hashtype=HashTypes.SHA2, dir_struc=DirStruc.DIR256x256, verbose=False): _ = dir_struc # UNUSED, SUPPRESS WARNING if not os.path.exists(u_path): raise UpaxError("directory '%s' does not exist" % str(u_path)) check_hashtype(hashtype) self._u_path = u_path self._count = 0 if limit > 0: self._limit = limit else: self._limit = 64 # default # we constrain startAt to be two lowercase hex digits start_at = start_at.lower() if not TWO_HEX_RE.match(start_at): print("startAt = '%s' is not valid hex" % start_at) sys.exit(-1) self._just_keys = just_keys self._start_at = start_at self._hashtype = hashtype self._verbose = verbose self._keys = []
def do_test_log_without_entries(self, hashtype): check_hashtype(hashtype) (goodkey_1, goodkey_2, _, _, _, _, _, _) = self.get_good(hashtype) time0 = 1000 * (int(time.time()) - 10000) # the first line of an otherwise empty log file empty_log = "%013u %s %s\n" % (time0, goodkey_1, goodkey_2) reader = StringReader(empty_log, hashtype) log = BoundLog( reader, hashtype, self.u_dir, 'L') # will default to 'L' assert log is not None self.assertEqual(time0, log.timestamp) self.assertEqual(goodkey_1, log.prev_hash) self.assertEqual(goodkey_2, log.prev_master) # only first line should appear, because there are no entries expected = empty_log actual = log.__str__() self.assertEqual(expected, actual) self.assertEqual(0, len(log)) # disk file must exist and must contain just the one line path_to_log = "%s/L" % "dev0/U/" assert os.path.exists(path_to_log) contents = '' with open(path_to_log, "r") as file: contents = file.read() self.assertEqual(empty_log, contents) log.close()
def verify_tree_sha(self, node, path_to_tree, hashtype): # we assume that the node is a MerkleTree check_hashtype(hashtype) if node.nodes is None: self.assertEqual(None, node.bin_hash) else: hash_count = 0 # pylint: disable=redefined-variable-type if hashtype == HashTypes.SHA1: sha = hashlib.sha1() elif hashtype == HashTypes.SHA2: sha = hashlib.sha256() elif hashtype == HashTypes.SHA3: # pylint: disable=no-member sha = hashlib.sha3_256() for node_ in node.nodes: path_to_node = os.path.join(path_to_tree, node_.name) if isinstance(node_, MerkleLeaf): self.verify_leaf_sha(node_, path_to_node, hashtype) elif isinstance(node_, MerkleTree): self.verify_tree_sha(node_, path_to_node, hashtype) else: print("DEBUG: unknown node type!") self.fail("unknown node type!") if node_.bin_hash is not None: hash_count += 1 sha.update(node_.bin_hash) if hash_count == 0: self.assertEqual(None, node.bin_hash) else: self.assertEqual(sha.digest(), node.bin_hash)
def get_good(self, hashtype): check_hashtype(hashtype) if hashtype == HashTypes.SHA1: goodkey_1 = '0123456789012345678901234567890123456789' goodkey_2 = 'fedcba9876543210fedcba9876543210fedcba98' goodkey_3 = '1234567890123456789012345678901234567890' goodkey_4 = 'edcba9876543210fedcba9876543210fedcba98f' goodkey_5 = '2345678901234567890123456789012345678901' goodkey_6 = 'dcba9876543210fedcba9876543210fedcba98fe' goodkey_7 = '3456789012345678901234567890123456789012' goodkey_8 = 'cba9876543210fedcba9876543210fedcba98fed' else: goodkey_1 = '0123456789012345678901234567890123' + \ '456789abcdef3330123456789abcde' goodkey_2 = 'fedcba9876543210fedcba9876543210fe' + \ 'dcba98012345678901234567890123' goodkey_3 = '1234567890123456789012345678901234' + \ '567890abcdef697698768696969696' goodkey_4 = 'edcba9876543210fedcba9876543210fed' + \ 'cba98f012345678901234567890123' goodkey_5 = '2345678901234567890123456789012345' + \ '678901654654647645647654754757' goodkey_6 = 'dcba9876543210fedcba9876543210fedc' + \ 'ba98fe453254323243253274754777' goodkey_7 = '3456789012345678901234567890123456' + \ '789012abcdef696878687686999987' goodkey_8 = 'cba9876543210fedcba9876543210fedcb' + \ 'a98fedfedcab698687669676999988' return (goodkey_1, goodkey_2, goodkey_3, goodkey_4, goodkey_5, goodkey_6, goodkey_7, goodkey_8,)
def create_from_file_system(path_to_dir, hashtype=HashTypes.SHA2, exclusions=None, matches=None): """ Create a MerkleDoc based on the information in the directory at pathToDir. The name of the directory will be the last component of pathToDir. Return the MerkleTree. """ check_hashtype(hashtype) if not path_to_dir: raise RuntimeError("cannot create a MerkleTree, no path set") if not os.path.exists(path_to_dir): raise RuntimeError( "MerkleTree: directory '%s' does not exist" % path_to_dir) path, _, _ = path_to_dir.rpartition('/') if path == '': raise RuntimeError("cannot parse inclusive path " + path_to_dir) path += '/' ex_re = None if exclusions: ex_re = make_ex_re(exclusions) match_re = None if matches: match_re = make_match_re(matches) tree = MerkleTree.create_from_file_system(path_to_dir, hashtype, ex_re, match_re) # creates the hash doc = MerkleDoc(path, hashtype, False, tree, ex_re, match_re) doc.bound = True return doc
def do_test_insert_4_leafs(self, hashtype): """ Create 4 leaf nodes with random but unique names. Insert them into a tree, verifying that the resulting sort is correct. """ check_hashtype(hashtype) name = self.rng.next_file_name(8) tree = NLHTree(name, hashtype) leaf_names = set() a_leaf = self.make_leaf(leaf_names, hashtype) b_leaf = self.make_leaf(leaf_names, hashtype) c_leaf = self.make_leaf(leaf_names, hashtype) d_leaf = self.make_leaf(leaf_names, hashtype) self.assertEqual(len(tree.nodes), 0) tree.insert(a_leaf) self.assertEqual(len(tree.nodes), 1) tree.insert(b_leaf) self.assertEqual(len(tree.nodes), 2) tree.insert(c_leaf) self.assertEqual(len(tree.nodes), 3) tree.insert(d_leaf) self.assertEqual(len(tree.nodes), 4) # we expect the nodes to be sorted for ndx in range(3): self.assertTrue(tree.nodes[ndx].name < tree.nodes[ndx + 1].name) matches = tree.list('*') for ndx, qqq in enumerate(tree.nodes): self.assertEqual(matches[ndx], ' ' + qqq.name) self.assertEqual(tree, tree) tree2 = tree.clone() self.assertEqual(tree2, tree)
def do_test_simple_constructor(self, hashtype): """ Test constructor for specific SHA type. """ check_hashtype(hashtype) # pylint: disable=redefined-variable-type if hashtype == HashTypes.SHA1: sha = hashlib.sha1() elif hashtype == HashTypes.SHA2: sha = hashlib.sha256() elif hashtype == HashTypes.SHA3: sha = hashlib.sha3_256() file_name = self.rng.next_file_name(8) nnn = self.rng.some_bytes(8) sha.update(nnn) hash0 = sha.digest() leaf0 = MerkleLeaf(file_name, hashtype, hash0) self.assertEqual(file_name, leaf0.name) self.assertEqual(hash0, leaf0.bin_hash) file_name2 = file_name while file_name2 == file_name: file_name2 = self.rng.next_file_name(8) nnn = self.rng.some_bytes(8) self.rng.next_bytes(nnn) sha.update(nnn) hash1 = sha.digest() leaf1 = MerkleLeaf(file_name2, hashtype, hash1) self.assertEqual(file_name2, leaf1.name) self.assertEqual(hash1, leaf1.bin_hash) self.assertTrue(leaf0.equal(leaf0)) self.assertFalse(leaf0.equal(leaf1))
def __init__(self, hash_types=HashTypes.SHA2, sk_=None, ck_=None, node_id=None): check_hashtype(hash_types) self._hash_types = hash_types if node_id is None: # we arbitrarily use sk_ to calculate a unique node ID if sk_: if hash_types == HashTypes.SHA1: sha_ = hashes.SHA1 elif hash_types == HashTypes.SHA2: sha_ = hashes.SHA256 sha = hashes.Hash(sha_(), backend=default_backend()) pem = sk_.public_bytes( encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.PKCS1) sha.update(pem) node_id = sha.finalize() # a binary value # DEBUG # AbstractNode.dump_hex( # "SHA%d Abs Calc ID" % hash_types, node_id) # END else: raise ValueError( 'cannot calculate nodeID without public key sk_') self._node_id = node_id self._sk = sk_ self._ck = ck_
def pbkdf2(passwd, salt, hashtype=HashTypes.SHA2, iterations=10000, dklen=None): """ Derive a bytes-like key from a string password using the specified hash. salt is bytes-like. Greater iterations provide greater security. If dklen is specified, it must be an int. """ if isinstance(passwd, str): passwd = passwd.encode() check_hashtype(hashtype) if hashtype == HashTypes.SHA1: hash_name = 'sha1' elif hashtype == HashTypes.SHA2: hash_name = 'sha256' elif hashtype == HashTypes.SHA3: hash_name = 'sha3_256' elif hashtype == HashTypes.BLAKE2B: hash_name = 'blake2b' # blake2b gets "unsupported hash type" if sys.version_info >= (3, 6): # hash_name is str like 'sha1' or 'sha256' # LIMITATION: 'sha3', variations, and 'blake2b' are NOT SUPPORTED # passwd must be bytes-like return _pbkdf2(hash_name, passwd, salt, iterations, dklen) else: if not dklen: dklen = 32 # just playing around return PBKDF2(passwd, salt, iterations=iterations).read(dklen)
def make_some_files(self, hashtype): """ return a map: hash=>path """ # create a random number of unique data files of random length # in myData/; take hash of each as it is created, using # this to verify uniqueness; add hash to list (or use hash # as key to map check_hashtype(hashtype) file_count = 17 + RNG.next_int16(128) files = {} # a map hash->path for _ in range(file_count): d_key = None # create a random file name maxLen minLen (_, d_path) = RNG.next_data_file(DATA_PATH, 16 * 1024, 1) # perhaps more restrictions needed while '.' in d_path: (_, d_path) = RNG.next_data_file(DATA_PATH, 16 * 1024, 1) if hashtype == HashTypes.SHA1: d_key = file_sha1hex(d_path) elif hashtype == HashTypes.SHA2: d_key = file_sha2hex(d_path) elif hashtype == HashTypes.SHA3: d_key = file_sha3hex(d_path) elif hashtype == HashTypes.BLAKE2B: d_key = file_blake2b_hex(d_path) else: raise NotImplementedError files[d_key] = d_path self.assertEqual(file_count, len(files)) return files
def create_from_file_system(path_to_dir, hashtype=HashTypes.SHA2, ex_re=None, match_re=None): """ Create a MerkleTree based on the information in the directory at pathToDir. The name of the directory will be the last component of pathToDir. Return the MerkleTree. """ check_hashtype(hashtype) if not path_to_dir: raise RuntimeError("cannot create a MerkleTree, no path set") if not os.path.exists(path_to_dir): raise RuntimeError( "MerkleTree: directory '%s' does not exist" % path_to_dir) (path, _, name) = path_to_dir.rpartition('/') if not path: raise RuntimeError("can't parse inclusive path '%s'" % path_to_dir) tree = MerkleTree(name, hashtype, ex_re, match_re) tree.bin_hash = None sha = get_hash_func(hashtype) # Create data structures for constituent files and subdirectories # These MUST BE SORTED by the bare name to meet specs. files = sorted(os.listdir(path_to_dir)) if files: sha_count = 0 for file in files: # exclusions take priority over matches if ex_re and ex_re.search(file): continue if match_re and not match_re.search(file): continue node = None path_to_file = os.path.join(path_to_dir, file) string = os.lstat(path_to_file) # ignores symlinks # os.path.isdir(path) follows symbolic links if S_ISDIR(string.st_mode): node = MerkleTree.create_from_file_system( path_to_file, hashtype, ex_re, match_re) # S_ISLNK(mode) is true if symbolic link # isfile(path) follows symbolic links elif os.path.isfile(path_to_file): # S_ISREG(mode): node = MerkleLeaf.create_from_file_system( path_to_file, file, hashtype) # otherwise, just ignore it ;-) if node: # update tree-level hash if node.bin_hash: # note empty file has null hash XXX NOT TRUE sha_count += 1 sha.update(node.bin_hash) # SKIP NEXT TO EASE GARBAGE COLLECTION ??? XXX # but that won't be a good idea if we are # invoking toString() tree.nodes.append(node) if sha_count: tree.bin_hash = bytes(sha.digest()) return tree
def create_from_file_system(path_to_dir, hashtype=HashTypes.SHA2, exclusions=None, matches=None): """ Create a MerkleDoc based on the information in the directory at pathToDir. The name of the directory will be the last component of pathToDir. Return the MerkleTree. """ check_hashtype(hashtype) if not path_to_dir: raise RuntimeError("cannot create a MerkleTree, no path set") if not os.path.exists(path_to_dir): raise RuntimeError( "MerkleTree: directory '%s' does not exist" % path_to_dir) path, _, _ = path_to_dir.rpartition('/') if path == '': raise RuntimeError("cannot parse inclusive path " + path_to_dir) path += '/' ex_re = None if exclusions: ex_re = util.make_ex_re(exclusions) match_re = None if matches: match_re = util.make_match_re(matches) tree = MerkleTree.create_from_file_system(path_to_dir, hashtype, ex_re, match_re) # creates the hash doc = MerkleDoc(path, hashtype, False, tree, ex_re, match_re) doc.bound = True return doc
def do_test_multi_entry_log(self, hashtype): check_hashtype(hashtype) (goodkey_1, goodkey_2, goodkey_3, _, goodkey_5, _, goodkey_7, _,) = self.get_good(hashtype) (time0, time1, _, _, entry1, entry2, entry3, _, log_w_three) = self.setup_the_server(hashtype) reader = StringReader(log_w_three, hashtype) log = BoundLog(reader, hashtype, self.u_dir, 'L') assert log is not None self.assertEqual(time0, log.timestamp) self.assertEqual(goodkey_1, log.prev_hash) self.assertEqual(goodkey_2, log.prev_master) self.assertEqual(3, len(log)) self.assertTrue(goodkey_3 in log) entry = log.get_entry(goodkey_3) self.assertEqual(entry1, entry) self.assertTrue(goodkey_5 in log) entry = log.get_entry(goodkey_5) self.assertEqual(entry2, entry) self.assertTrue(goodkey_7 in log) entry = log.get_entry(goodkey_7) self.assertEqual(entry3, entry) with open(self.path_to_log, 'r') as file: log_contents = file.read() self.assertEqual(log_w_three, log_contents) log.close()
def do_test_serialization(self, hashtype): """ Verify that the serialization of the NLHTree is correct using a specific hash type. """ check_hashtype(hashtype) if hashtype == HashTypes.SHA1: tree = NT.create_from_string_array(self.EXAMPLE1, hashtype) elif hashtype == HashTypes.SHA2: tree = NT.create_from_string_array(self.EXAMPLE2, hashtype) elif hashtype == HashTypes.SHA3: tree = NT.create_from_string_array(self.EXAMPLE3, hashtype) self.assertEqual(tree.hashtype, hashtype) strings = [] tree.to_strings(strings, 0) tree2 = NT.create_from_string_array(strings, hashtype) self.assertEqual(tree, tree2) string = '\n'.join(strings) + '\n' tree3 = NT.parse(string, hashtype) serial3 = tree3.__str__() self.assertEqual(serial3, string) self.assertEqual(tree3, tree) dupe3 = tree3.clone() self.assertEqual(dupe3, tree3)
def create_from_serialization(string, hashtype=HashTypes.SHA2): """ Create a MerkleDoc from string serialization (such as a file). """ check_hashtype(hashtype) if string is None: raise RuntimeError("MerkleDoc.createFromSerialization: no input") s_array = string.split('\n') # note CR-LF return MerkleDoc.create_from_string_array(s_array, hashtype)
def do_test_random_dir(self, hashtype): """ Test building random directories with specific SHA hash type. """ check_hashtype(hashtype) depth = 1 + self.rng.next_int16(3) # so 1 to 3 width = 1 + self.rng.next_int16(16) # so 1 to 16 blk_count = 1 + self.rng.next_int16(3) # so 1 to 3 # last block will usually be only partically populated max_len = BuildList.BLOCK_SIZE * (blk_count - 1) +\ self.rng.next_int16(BuildList.BLOCK_SIZE) min_len = 1 # we want the directory name to be unique path_to_dir = os.path.join('tmp', self.rng.next_file_name(8)) while os.path.exists(path_to_dir): path_to_dir = os.path.join('tmp', self.rng.next_file_name(8)) self.rng.next_data_dir(path_to_dir, depth, width, max_len, min_len) data = bytearray(max_len) # that many null bytes self.rng.next_bytes(data) # fill with random data if hashtype == HashTypes.SHA1: sha = hashlib.sha1() elif hashtype == HashTypes.SHA2: sha = hashlib.sha256() elif hashtype == HashTypes.SHA3: # pylint:disable=no-member sha = hashlib.sha3_256() elif hashtype == HashTypes.BLAKE2B: sha = hashlib.blake2b(digest_size=32) else: raise NotImplementedError sha.update(data) hash_ = sha.hexdigest() file_name = self.rng.next_file_name(8) path_to_file = os.path.join('tmp', file_name) while os.path.exists(path_to_file): file_name = self.rng.next_file_name(8) path_to_file = os.path.join('tmp', file_name) with open(path_to_file, 'wb') as file: file.write(data) if hashtype == HashTypes.SHA1: file_hash = file_sha1hex(path_to_file) elif hashtype == HashTypes.SHA2: file_hash = file_sha2hex(path_to_file) elif hashtype == HashTypes.SHA3: file_hash = file_sha3hex(path_to_file) elif hashtype == HashTypes.BLAKE2B: file_hash = file_blake2b_hex(path_to_file) else: raise NotImplementedError self.assertEqual(hash_, file_hash)
def walk_string(string, hashtype=HashTypes.SHA2): """ string is an NLHTree listing in the form of a single string with lines ending with newlines. There is a newline at the end of the listing. """ check_hashtype(hashtype) lines = string.split('\n') if lines[-1] == '': lines = lines[:-1] # drop the last line if empty return NLHTree._walk_strings(lines, hashtype)
def __init__(self, name, is_leaf=False, hashtype=HashTypes.SHA2): check_hashtype(hashtype) self._bin_hash = None if name is None: raise RuntimeError("MerkleNode: null MerkleNode name") self._name = name.strip() if len(self._name) == 0: raise RuntimeError("MerkleNode: null or empty name") self._is_leaf = is_leaf self._hashtype = hashtype
def __init__(self, name, is_leaf=False, hashtype=HashTypes.SHA2): check_hashtype(hashtype) self._bin_hash = None if name is None: raise RuntimeError("MerkleNode: null MerkleNode name") self._name = name.strip() if not self._name: raise RuntimeError("MerkleNode: null or empty name") self._is_leaf = is_leaf self._hashtype = hashtype
def __init__(self, path, hashtype=HashTypes.SHA2, binding=False, tree=None, ex_re=None, # exclusions, which are Regular Expressions match_re=None): # matches, also Regular Expressions check_hashtype(hashtype) if path is None: raise RuntimeError("null MerkleDoc path") if tree: if not isinstance(tree, MerkleTree): raise RuntimeError('tree is not a MerkleTree') self._name = name = tree.name elif not binding: raise RuntimeError('null MerkleTree and not binding') else: raise RuntimeError("MerkleDoc binding not yet implemented") super().__init__(name, is_leaf=False, hashtype=hashtype) path = path.strip() if len(path) == 0: raise RuntimeError("empty path") if not path.endswith('/'): path += '/' self._path = path self._tree = tree if tree: # DEBUG #print("MerkleDoc.__init__: usingSHA = %s" % str(usingSHA)) # END # pylint:disable=redefined-variable-type if hashtype == HashTypes.SHA1: sha = hashlib.sha1() elif hashtype == HashTypes.SHA2: sha = hashlib.sha256() elif hashtype == HashTypes.SHA3: # pylint: disable=no-member sha = hashlib.sha3_256() sha.update(bytes(tree.bin_hash)) sha.update(path.encode('utf-8')) self._bin_hash = bytes(sha.digest()) # a binary value self._ex_re = ex_re self._match_re = match_re if binding: path_to_dir = os.path.join(path, tree.name) if not os.path.exists(path_to_dir): raise RuntimeError('no directory found at ' + path_to_dir) else: # XXX STUB: BIND THE TREE self._bound = True
def do_build_test(self, title, hashtype): """ Test buildlist functionality for specific hash type. """ check_hashtype(hashtype) sk_priv = RSA.generate(1024) sk_ = sk_priv.publickey() if hashtype == HashTypes.SHA1: path_to_data = os.path.join('example1', 'dataDir') elif hashtype == HashTypes.SHA2: path_to_data = os.path.join('example2', 'dataDir') elif hashtype == HashTypes.SHA3: path_to_data = os.path.join('example3', 'dataDir') elif hashtype == HashTypes.BLAKE2B: path_to_data = os.path.join('example4', 'dataDir') else: raise NotImplementedError blist = BuildList.create_from_file_system( 'a trial list', path_to_data, sk_, hashtype=hashtype) # check properties ------------------------------------------ self.assertEqual(blist.title, 'a trial list') self.assertEqual(blist.public_key, sk_) self.assertEqual(blist.timestamp, timestamp(0)) self.assertEqual(blist.hashtype, hashtype) # check sign() and verify() --------------------------------- self.assertEqual(blist, blist) self.assertFalse(blist.verify()) # not signed yet blist.sign(sk_priv) sig = blist.dig_sig # this is the base64-encoded value self.assertTrue(sig is not None) self.assertTrue(blist.verify()) # it has been signed # equality, serialization, deserialization ------------------ self.assertEqual(blist, blist) bl_string = blist.__str__() tree_string = blist.tree.__str__() # DEBUG # print("SIGNED BUILD LIST:\n%s" % bl_string) # END bl2 = BuildList.parse(bl_string, hashtype) # bl_string2 = bl2.__str__() tree_string2 = bl2.tree.__str__() # DEBUG # print("ROUNDTRIPPED:\n%s" % bl_string2) # END self.assertEqual(tree_string, tree_string2) self.assertEqual(blist, blist) # same list, but signed now
def walk_strings(strings, hashtype=HashTypes.SHA2): """ For each line in the NLHTree listing, return either the relative path to a directory (including the directory name) or the relative path to a file plus its SHA1 hex hash. Each of these is a tuple: the former is a singleton, and the latter is a 2-tuple. The NLHTree listing is in the form of a list of lines. COMMENTS AND BLANK LINES ARE NOT YET SUPPORTED. """ check_hashtype(hashtype) return NLHTree._walk_strings(strings, hashtype)
def do_test_with_opens_and_closes(self, hashtype): check_hashtype(hashtype) (goodkey_1, goodkey_2, goodkey_3, goodkey_4, goodkey_5, goodkey_6, goodkey_7, goodkey_8,) = self.get_good(hashtype) (time0, time1, time2, time3, entry1, entry2, entry3, empty_log, log_w_three) = self.setup_the_server(hashtype) reader = StringReader(empty_log, hashtype) log = BoundLog(reader, hashtype, self.u_dir) assert log is not None self.assertEqual(time0, log.timestamp) self.assertEqual(goodkey_1, log.prev_hash) self.assertEqual(goodkey_2, log.prev_master) self.assertEqual(0, len(log)) log.close() reader = FileReader(self.u_dir, hashtype) log = BoundLog(reader, hashtype) log.add_entry(time1, goodkey_3, goodkey_4, 'jdd', 'e@document1') self.assertEqual(1, len(log)) entry = log.get_entry(goodkey_3) self.assertEqual(entry1, entry) self.assertTrue(goodkey_3 in log) self.assertFalse(goodkey_5 in log) log.close() reader = FileReader(self.u_dir, hashtype) log = BoundLog(reader, hashtype) log.add_entry(time2, goodkey_5, goodkey_6, 'jdd', 'e@document2') self.assertEqual(2, len(log)) entry = log.get_entry(goodkey_5) self.assertEqual(entry2, entry) self.assertTrue(goodkey_5 in log) log.close() reader = FileReader(self.u_dir, hashtype) log = BoundLog(reader, hashtype) log.add_entry(time3, goodkey_7, goodkey_8, 'jdd', 'e@document3') self.assertEqual(3, len(log)) entry = log.get_entry(goodkey_7) self.assertEqual(entry3, entry) self.assertTrue(goodkey_7 in log) log.close() with open(self.path_to_log, 'r') as file: log_contents = file.read() self.assertEqual(log_w_three, log_contents)
def do_test_spot_check_tree(self, hashtype): """ Run spot checks on the example files for the specified hash type. """ check_hashtype(hashtype) # DEBUG # print("\nSPOT CHECKS") # END if hashtype == HashTypes.SHA1: rel_path_to_data = 'example1/dataDir' else: rel_path_to_data = 'example2/dataDir' tree = NLHTree.create_from_file_system(rel_path_to_data, hashtype) self.assertIsNotNone(tree) self.assertEqual(len(tree.nodes), 6) self.assertEqual(tree.name, 'dataDir') node0 = tree.nodes[0] self.assertTrue(isinstance(node0, NLHLeaf)) self.assertEqual(node0.name, 'data1') node1 = tree.nodes[1] self.assertTrue(isinstance(node1, NLHLeaf)) self.assertEqual(node1.name, 'data2') node2 = tree.nodes[2] self.assertFalse(isinstance(node2, NLHLeaf)) self.assertEqual(node2.name, 'subDir1') self.assertEqual(len(node2.nodes), 2) node5 = tree.nodes[5] self.assertFalse(isinstance(node5, NLHLeaf)) self.assertEqual(node5.name, 'subDir4') self.assertEqual(len(node5.nodes), 1) node50 = node5.nodes[0] self.assertFalse(isinstance(node50, NLHLeaf)) self.assertEqual(node50.name, 'subDir41') self.assertEqual(len(node50.nodes), 1) node500 = node50.nodes[0] self.assertFalse(isinstance(node500, NLHLeaf)) self.assertEqual(node500.name, 'subDir411') self.assertEqual(len(node500.nodes), 1) node5000 = node500.nodes[0] self.assertTrue(isinstance(node5000, NLHLeaf)) self.assertEqual(node5000.name, 'data31')
def do_test_consructor(self, hashtype): """ Test the LogEntry constructor using a specific SHA hash type. """ check_hashtype(hashtype) (goodkey_1, goodkey_2) = self.get_keys(hashtype) entry = LogEntry(time.time(), goodkey_1, goodkey_2, 'jdd', 'document1') assert entry is not None delta_t = time.time() - entry.timestamp self.assertTrue(delta_t >= 0 and delta_t <= 5) self.assertEqual(goodkey_1, entry.key) self.assertEqual(goodkey_2, entry.node_id) self.assertEqual('jdd', entry.src) self.assertEqual('document1', entry.path)
def do_test_equals(self, hashtype): """ Test the LogEntry __eq__ function for a specific SHA hash type.""" check_hashtype(hashtype) (goodkey_1, goodkey_2) = self.get_keys(hashtype) time1 = time.time() - 1000 time2 = time1 + 500 entry1 = LogEntry(time1, goodkey_1, goodkey_2, 'jdd', 'document1') entry2 = LogEntry(time2, goodkey_1, goodkey_2, 'jdd', 'document1') entry3 = LogEntry(time1, goodkey_1, goodkey_2, 'jdd', 'document1') self.assertTrue(entry1 == entry1) # identical self.assertTrue(entry1 == entry3) # same attributes self.assertFalse(entry1 == entry2) # times differ self.assertFalse(entry2 == entry3) # times differ
def get_keys(self, hashtype): """ Return a pair of content keys useful in tests involving a specific SHA hash type. """ check_hashtype(hashtype) if hashtype == HashTypes.SHA1: goodkey_1 = '0123456789012345678901234567890123456789' goodkey_2 = 'fedcba9876543210fedcba9876543210fedcba98' else: # dummy data good for any of SHA2, SHA3, BLAKE2B goodkey_1 = '0123456789012345678901234567890123' + \ '456789abcdefghi0123456789abcde' goodkey_2 = 'fedcba9876543210fedcba9876543210fe' + \ 'dcba98012345678901234567890123' return (goodkey_1, goodkey_2)
def verify_leaf_sha(self, node, path_to_file, hashtype): check_hashtype(hashtype) self.assertTrue(os.path.exists(path_to_file)) with open(path_to_file, "rb") as file: data = file.read() self.assertFalse(data is None) # pylint: disable=redefined-variable-type if hashtype == HashTypes.SHA1: sha = hashlib.sha1() elif hashtype == HashTypes.SHA2: sha = hashlib.sha256() elif hashtype == HashTypes.SHA3: # pylint: disable=no-member sha = hashlib.sha3_256() sha.update(data) hash_ = sha.digest() self.assertEqual(hash_, node.bin_hash)
def do_test_import(self, hashtype): check_hashtype(hashtype) src_path = os.path.join(DATA_PATH, RNG.next_file_name(16)) while os.path.exists(src_path): src_path = os.path.join(DATA_PATH, RNG.next_file_name(16)) dest_path = os.path.join(DATA_PATH, RNG.next_file_name(16)) while os.path.exists(dest_path): dest_path = os.path.join(DATA_PATH, RNG.next_file_name(16)) # create a collection of data files file_map = self.make_some_files(hashtype) # file_count = len(file_map) # create an empty source directory, populate it, shut down the server try: u_dir0 = self.construct_empty_u_dir(src_path, hashtype) self.populate_empty(u_dir0, file_map, hashtype) finally: u_dir0.close() # create an empty destination dir u_dir1 = self.construct_empty_u_dir(dest_path, hashtype) u_dir1.close() # create and invoke the importer importer = Importer(src_path, dest_path, 'testImport ' + __version__, hashtype) importer.do_import_u_dir() # verify that the files got there server2 = BlockingServer(dest_path, hashtype) self.assertIsNotNone(server2) self.assertTrue(os.path.exists(server2.u_path)) self.assertEqual(server2.hashtype, hashtype) log = server2.log for key in file_map: server2.exists(key) entry = log.get_entry(key) self.assertIsNotNone(entry) server2.close() self.assertTrue(os.path.exists(os.path.join(dest_path, 'L')))
def do_test_simple_constructor(self, hashtype): """ Test constructor for specific hash. """ check_hashtype(hashtype) if hashtype == HashTypes.SHA1: sha = hashlib.sha1() elif hashtype == HashTypes.SHA2: sha = hashlib.sha256() elif hashtype == HashTypes.SHA3: sha = hashlib.sha3_256() elif hashtype == HashTypes.BLAKE2B: sha = hashlib.blake2b(digest_size=32) else: raise NotImplementedError name = self.rng.next_file_name(8) nnn = self.rng.some_bytes(8) self.rng.next_bytes(nnn) sha.update(nnn) hash0 = sha.digest() leaf0 = NLHLeaf(name, hash0, hashtype) self.assertEqual(name, leaf0.name) self.assertEqual(hash0, leaf0.bin_hash) name2 = name while name2 == name: name2 = self.rng.next_file_name(8) nnn = self.rng.some_bytes(8) self.rng.next_bytes(nnn) sha.update(nnn) hash1 = sha.digest() leaf1 = NLHLeaf(name2, hash1, hashtype) self.assertEqual(name2, leaf1.name) self.assertEqual(hash1, leaf1.bin_hash) self.assertEqual(leaf0, leaf0) self.assertEqual(leaf1, leaf1) self.assertFalse(leaf0 == leaf1) leaf0c = leaf0.clone() self.assertEqual(leaf0c, leaf0) leaf1c = leaf1.clone() self.assertEqual(leaf1c, leaf1)
def do_test_pathless_unbound(self, hashtype): """ Test basic characteristics of very simple MerkleTrees created using a specific SHA hash type. """ (dir_name1, dir_name2) = self.get_two_unique_directory_names() check_hashtype(hashtype) tree1 = MerkleTree(dir_name1, hashtype) self.assertEqual(dir_name1, tree1.name) if hashtype == HashTypes.SHA1: self.assertEqual(SHA1_HEX_NONE, tree1.hex_hash) elif hashtype == HashTypes.SHA2: self.assertEqual(SHA2_HEX_NONE, tree1.hex_hash) elif hashtype == HashTypes.SHA3: self.assertEqual(SHA3_HEX_NONE, tree1.hex_hash) elif hashtype == HashTypes.BLAKE2B_256: self.assertEqual(BLAKE2B_256_HEX_NONE, tree1.hex_hash) else: raise NotImplementedError tree2 = MerkleTree(dir_name2, hashtype) self.assertEqual(dir_name2, tree2.name) # these tests remain skimpy self.assertFalse(tree1 is None) self.assertTrue(tree1 == tree1) self.assertFalse(tree1 == tree2) tree1_str = tree1.to_string(0) # there should be no indent on the first line self.assertFalse(tree1_str[0] == ' ') # no extra lines should be added lines = tree1_str.split('\n') # this split generates an extra blank line, because the serialization # ends with CR-LF if lines[-1] == '': lines = lines[:-1] self.assertEqual(1, len(lines)) tree1_rebuilt = MerkleTree.create_from_serialization( tree1_str, hashtype) self.assertTrue(tree1 == tree1_rebuilt)
def do_test_add_entry(self, hashtype): """ Add sample entries to log using selected SHA hash type. """ check_hashtype(hashtype) (goodkey_1, goodkey_2, goodkey_3, goodkey_4, goodkey_5, goodkey_6, goodkey_7, goodkey_8,) = self.get_good(hashtype) time0 = int(time.time()) - 10000 time1 = time0 + 100 time2 = time1 + 100 time3 = time2 + 100 empty_log = "%013u %s %s\n" % (time0, goodkey_1, goodkey_2) entry1 = LogEntry(time1, goodkey_3, goodkey_4, 'jdd', 'document1') entry2 = LogEntry(time2, goodkey_5, goodkey_6, 'jdd', 'document2') entry3 = LogEntry(time3, goodkey_7, goodkey_8, 'jdd', 'document3') # test_log = empty_log + str(entry1) + str(entry2) + str(entry3) reader = StringReader(empty_log, hashtype) log = Log(reader, hashtype) assert log is not None self.assertEqual(time0, log.timestamp) self.assertEqual(goodkey_1, log.prev_hash) self.assertEqual(goodkey_2, log.prev_master) self.assertEqual(0, len(log)) log.add_entry(time1, goodkey_3, goodkey_4, 'jdd', 'document1') self.assertEqual(1, len(log)) entry = log.get_entry(goodkey_3) self.assertEqual(entry1, entry) self.assertTrue(goodkey_3 in log) self.assertFalse(goodkey_5 in log) log.add_entry(time2, goodkey_5, goodkey_6, 'jdd', 'document2') self.assertEqual(2, len(log)) entry = log.get_entry(goodkey_5) self.assertEqual(entry2, entry) self.assertTrue(goodkey_5 in log) log.add_entry(time3, goodkey_7, goodkey_8, 'jdd', 'document3') self.assertEqual(3, len(log)) entry = log.get_entry(goodkey_7) self.assertEqual(entry3, entry) self.assertTrue(goodkey_7 in log)
def __init__(self, path, hashtype=HashTypes.SHA2, binding=False, tree=None, ex_re=None, # exclusions, which are Regular Expressions match_re=None): # matches, also Regular Expressions check_hashtype(hashtype) if path is None: raise RuntimeError("null MerkleDoc path") if tree: if not isinstance(tree, MerkleTree): raise RuntimeError('tree is not a MerkleTree') self._name = name = tree.name elif not binding: raise RuntimeError('null MerkleTree and not binding') else: raise RuntimeError("MerkleDoc binding not yet implemented") super().__init__(name, is_leaf=False, hashtype=hashtype) path = path.strip() if not path: raise RuntimeError("empty path") if not path.endswith('/'): path += '/' self._path = path self._tree = tree if tree: # DEBUG # print("MerkleDoc.__init__: usingSHA = %s" % str(usingSHA)) # END sha = get_hash_func(hashtype) sha.update(bytes(tree.bin_hash)) sha.update(path.encode('utf-8')) self._bin_hash = bytes(sha.digest()) # a binary value self._ex_re = ex_re self._match_re = match_re if binding: path_to_dir = os.path.join(path, tree.name) if not os.path.exists(path_to_dir): raise RuntimeError('no directory found at ' + path_to_dir) else: # XXX STUB: BIND THE TREE self._bound = True
def do_test_peer(self, hashtype): """ simple integrity checks on Peer type""" check_hashtype(hashtype) if hashtype == HashTypes.SHA1: node_id = bytearray(20) else: # 32-byte key node_id = bytearray(32) RNG.next_bytes(node_id) pub_key = bytearray(162) # number not to be taken seriously RNG.next_bytes(pub_key) peer = Peer(node_id, pub_key, hashtype) self.assertEqual(node_id, peer.node_id) self.assertEqual(pub_key, peer.rsa_pub_key) self.assertIsNone(peer.node_ndx) peer.node_ndx = 42 try: peer.node_ndx = 43 self.fail("changed existing nodeID") except BaseException: pass self.assertEqual(42, peer.node_ndx) # expect three empty lists self.assertEqual(0, len(peer.cnx)) self.assertEqual(0, len(peer.ip_addr)) self.assertEqual(0, len(peer.fqdn)) # verify that nodeNdx must be non-negative integer peer2 = Peer(node_id, pub_key, hashtype) # True = sha1 try: peer2.node_ndx = 'sugar' self.fail('successfully set nodeNdx to string value') except BaseException: pass peer3 = Peer(node_id, pub_key, hashtype) try: peer3.node_ndx = -19 self.fail('successfully set nodeNdx to negative number') except BaseException: pass
def verify_tree_hash(self, node, path_to_tree, hashtype): """ Given a MerkleTree, verify that it correctly describes the directory whose path is passed. """ # we assume that the node is a MerkleTree check_hashtype(hashtype) if node.nodes is None: self.assertEqual(None, node.bin_hash) else: hash_count = 0 if hashtype == HashTypes.SHA1: sha = XLSHA1() elif hashtype == HashTypes.SHA2: sha = XLSHA2() elif hashtype == HashTypes.SHA3: # pylint: disable=no-member sha = XLSHA3() elif hashtype == HashTypes.BLAKE2B_256: sha = XLBLAKE2B_256() else: raise NotImplementedError for node_ in node.nodes: path_to_node = os.path.join(path_to_tree, node_.name) if isinstance(node_, MerkleLeaf): self.verify_leaf_hash(node_, path_to_node, hashtype) elif isinstance(node_, MerkleTree): self.verify_tree_hash(node_, path_to_node, hashtype) else: print("DEBUG: unknown node type!") self.fail("unknown node type!") if node_.bin_hash is not None: hash_count += 1 sha.update(node_.bin_hash) if hash_count == 0: self.assertEqual(None, node.bin_hash) else: self.assertEqual(sha.digest(), node.bin_hash)
def calc_id_and_pub_keys_for_node(hash_types, sk_priv, ck_priv): """ Calculate the nodeID from the ck_ public key. """ check_hashtype(hash_types) (node_id, ck_) = (None, None) sk_ = sk_priv.public_key() ck_ = ck_priv.public_key() pem_ck = sk_.public_bytes( encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.PKCS1) if hash_types == HashTypes.SHA1: sha_ = hashes.SHA1 elif hash_types == HashTypes.SHA2: sha_ = hashes.SHA256 sha = hashes.Hash(sha_(), backend=default_backend()) sha.update(pem_ck) node_id = sha.finalize() # DEBUG # Node.dump_hex("Get SHA%d Node" % hash_types, node_id) # END return (node_id, # nodeID = 160/256 bit BINARY value sk_, ck_) # public keys, from private keys
def do_test_bound_needle_dirs(self, hashtype): """ Run tests on two deeper directories. """ check_hashtype(hashtype) (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(FOUR, ONE) doc1 = MerkleDoc.create_from_file_system(dir_path1, hashtype) tree1 = doc1.tree # pylint: disable=no-member self.assertEqual(dir_name1, tree1.name) self.assertTrue(doc1.bound) self.assertEqual(("tmp/%s" % dir_name1), dir_path1) # pylint: disable=no-member nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(ONE, len(nodes1)) self.verify_tree_hash(tree1, dir_path1, hashtype) doc2 = MerkleDoc.create_from_file_system(dir_path2, hashtype) tree2 = doc2.tree # pylint: disable=no-member self.assertEqual(dir_name2, tree2.name) self.assertTrue(doc2.bound) self.assertEqual(("tmp/%s" % dir_name2), dir_path2) # pylint: disable=no-member nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(ONE, len(nodes2)) self.verify_tree_hash(tree2, dir_path2, hashtype) self.assertTrue(doc1 == doc1) self.assertFalse(doc1 == doc2) doc1_str = doc1.to_string() doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str, hashtype) # # DEBUG # print "needle doc:\n" + doc1Str # print "rebuilt needle doc:\n" + doc1Rebuilt.toString() # # END self.assertTrue(doc1 == doc1_rebuilt)
def create_from_string_array(string, hashtype=HashTypes.SHA2): """ The string array is expected to follow conventional indentation rules, with zero indentation on the first line and some number of leading spaces on all successive lines. """ check_hashtype(hashtype) if string is None: raise RuntimeError('null argument') # XXX check TYPE - must be array of strings if not string: raise RuntimeError("empty string array") (doc_hash, doc_path) =\ MerkleDoc.parse_first_line(string[0].rstrip()) len_hash = len(doc_hash) if len_hash == SHA1_BIN_LEN: if hashtype != HashTypes.SHA1: raise RuntimeError("hash length %d inconsistent with %s" % ( len_hash, hashtype)) elif len_hash != SHA2_BIN_LEN: raise RuntimeError("hash length %d inconsistent with %s" % ( len_hash, hashtype)) # DEBUG # print("MerkleDoc.createFromStringArray:") # print(" docHash = %s" % str(binascii.b2a_hex(docHash),'ascii')) # print(" docPath = %s" % docPath) # print(" usingSHA=%s" % str(usingSHA)) # END tree = MerkleTree.create_from_string_array(string[1:], hashtype) # def __init__ (self, path, binding = False, tree = None, # exRE = None, # exclusions, which are Regular Expressions # matchRE = None): # matches, also Regular Expressions doc = MerkleDoc(doc_path, hashtype=hashtype, tree=tree) return doc
def do_test_simple_constructor(self, hashtype): """ Test constructor for specific SHA type. """ check_hashtype(hashtype) if hashtype == HashTypes.SHA1: sha = XLSHA1() elif hashtype == HashTypes.SHA2: sha = XLSHA2() elif hashtype == HashTypes.SHA3: sha = XLSHA3() elif hashtype == HashTypes.BLAKE2B: sha = XLBLAKE2B_256() else: raise NotImplementedError file_name = self.rng.next_file_name(8) nnn = self.rng.some_bytes(8) sha.update(nnn) hash0 = sha.digest() leaf0 = MerkleLeaf(file_name, hashtype, hash0) self.assertEqual(file_name, leaf0.name) self.assertEqual(hash0, leaf0.bin_hash) file_name2 = file_name while file_name2 == file_name: file_name2 = self.rng.next_file_name(8) nnn = self.rng.some_bytes(8) self.rng.next_bytes(nnn) sha.update(nnn) hash1 = sha.digest() leaf1 = MerkleLeaf(file_name2, hashtype, hash1) self.assertEqual(file_name2, leaf1.name) self.assertEqual(hash1, leaf1.bin_hash) self.assertTrue(leaf0 == leaf0) self.assertFalse(leaf0 == leaf1)
def verify_leaf_hash(self, node, path_to_file, hashtype): """ Verify that a MerkleLeaf correctly describes a file, given a hash type. """ check_hashtype(hashtype) self.assertTrue(os.path.exists(path_to_file)) with open(path_to_file, "rb") as file: data = file.read() self.assertFalse(data is None) if hashtype == HashTypes.SHA1: sha = XLSHA1() elif hashtype == HashTypes.SHA2: sha = XLSHA2() elif hashtype == HashTypes.SHA3: # pylint: disable=no-member sha = XLSHA3() elif hashtype == HashTypes.BLAKE2B_256: # pylint: disable=no-member sha = XLBLAKE2B_256() else: raise NotImplementedError sha.update(data) hash_ = sha.digest() self.assertEqual(hash_, node.bin_hash)