def testCreateAndAppendSinglePathImage(self): try: try: os.unlink(self.containerName) except: pass container_urn = rdfvalue.URN.FromFileName(self.containerName) resolver = data_store.MemoryDataStore() urn = None frag1path = os.path.join(self.testImagesPath, "paper-hash_based_disk_imaging_using_aff4.pdf.frag.1") with container.Container.createURN(resolver, container_urn) as volume: with open(frag1path, "rb") as src: stream = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1]) urn = volume.writeLogicalStreamHashBased(frag1path, stream, 32768, False) for h in stream.hashes: hh = hashes.newImmutableHash(h.hexdigest(), stream.hashToType[h]) self.assertEqual("deb3fa3b60c6107aceb97f684899387c78587eae", hh.value) resolver.Add(volume.urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) frag2path = os.path.join(self.testImagesPath, "paper-hash_based_disk_imaging_using_aff4.pdf.frag.2") with container.Container.openURNtoContainer(container_urn, mode="+") as volume: with open(frag2path, "rb") as src: stream = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1, lexicon.HASH_MD5 ]) urn = volume.writeLogicalStreamHashBased(frag2path, stream, 2*32768, False) for h in stream.hashes: hh = hashes.newImmutableHash(h.hexdigest(), stream.hashToType[h]) resolver.Add(volume.urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) with container.Container.openURNtoContainer(container_urn) as volume: images = list(volume.images()) images = sorted(images, key=lambda x: utils.SmartUnicode(x.pathName), reverse=False) self.assertEqual(2, len(images), "Only two logical images") fragmentA = escaping.member_name_for_urn(images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) fragmentB = escaping.member_name_for_urn(images[1].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) self.assertTrue(fragmentA.endswith("paper-hash_based_disk_imaging_using_aff4.pdf.frag.1")) self.assertTrue(fragmentB.endswith("paper-hash_based_disk_imaging_using_aff4.pdf.frag.2")) hasher = linear_hasher.LinearHasher2(volume.resolver, self) for image in volume.images(): print("\t%s <%s>" % (image.name(), image.urn)) hasher.hash(image) except: traceback.print_exc() self.fail() finally: #os.unlink(containerName) pass
def calculateBlockMapHash(self, mapStreamURI, imageStreamURI, storedHashDataType): storedBlockHashesHash = self.getStoredBlockHashes(str(imageStreamURI)) storedBlockHashesHash = sorted(storedBlockHashesHash, cmp=hashLengthComparator) calculatedHash = hashes.new(storedHashDataType) for hash in storedBlockHashesHash: bytes = hash.digest() calculatedHash.update(bytes) bytes = self.resolver.QuerySubjectPredicate( mapStreamURI, self.lexicon.mapPointHash).next().digest() calculatedHash.update(bytes) bytes = self.resolver.QuerySubjectPredicate( mapStreamURI, self.lexicon.mapIdxHash).next().digest() # bytes = self.calculateMapIdxHash(mapStreamURI).digest() calculatedHash.update(bytes) try: bytes = self.resolver.QuerySubjectPredicate( mapStreamURI, self.lexicon.mapPathHash).next().digest() # bytes = self.calculateMapPathHash(mapStreamURI).digest() calculatedHash.update(bytes) except: pass return hashes.newImmutableHash(calculatedHash.hexdigest(), storedHashDataType)
def calculateBlockMapHash(self, mapStreamURI, imageStreamURI, storedHashDataType): storedBlockHashesHash = sorted( self.getStoredBlockHashes(str(imageStreamURI)), key=lambda x: hashOrderingMap[x.blockHashAlgo]) calculatedHash = hashes.new(storedHashDataType) for hash in storedBlockHashesHash: bytes = hash.digest() calculatedHash.update(bytes) for hash in self.resolver.QuerySubjectPredicate( self.volume_arn, mapStreamURI, self.lexicon.mapPointHash): calculatedHash.update(hash.digest()) for hash in self.resolver.QuerySubjectPredicate( self.volume_arn, mapStreamURI, self.lexicon.mapIdxHash): calculatedHash.update(hash.digest()) for hash in self.resolver.QuerySubjectPredicate( self.volume_arn, mapStreamURI, self.lexicon.mapPathHash): calculatedHash.update(hash.digest()) return hashes.newImmutableHash(calculatedHash.hexdigest(), storedHashDataType)
def createAndReadSinglePathImagePush(self, containerName, pathName, arnPathFragment, minImageStreamSize): try: hasher = linear_hasher.PushHasher( [lexicon.HASH_SHA1, lexicon.HASH_MD5]) container_urn = rdfvalue.URN.FromFileName(containerName) with data_store.MemoryDataStore() as resolver: with container.Container.createURN(resolver, container_urn) as volume: volume.maxSegmentResidentSize = minImageStreamSize with volume.newLogicalStream(pathName, 20) as writer: writer_arn = writer.urn # add in some data using the Push API, hashing while we go data = u"helloworld" data_bytes = data.encode("utf-8") writer.Write(data_bytes) hasher.update(data_bytes) writer.Write(data_bytes) hasher.update(data_bytes) # write in the hashes before auto-close for h in hasher.hashes: hh = hashes.newImmutableHash( h.hexdigest(), hasher.hashToType[h]) volume.resolver.Add( volume.urn, writer_arn, rdfvalue.URN(lexicon.standard.hash), hh) with container.Container.openURNtoContainer( container_urn) as volume: images = list(volume.images()) self.assertEqual(1, len(images), "Only one logical image") self.assertEqual(pathName, images[0].name(), "unicode filename should be preserved") fragment = escaping.member_name_for_urn(images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) self.assertEqual(arnPathFragment, fragment) try: with volume.resolver.AFF4FactoryOpen(images[0].urn) as fd: txt = fd.ReadAll() self.assertEqual(b"helloworldhelloworld", txt, "content should be same") except Exception: traceback.print_exc() self.fail() except Exception: traceback.print_exc() self.fail() finally: os.unlink(containerName)
def calculateSegmentHash(self, parentURI, subSegment, hashDataType): calculatedHash = hashes.new(hashDataType) data = self.readSegment(parentURI, subSegment) if data != None: calculatedHash.update(data) b = calculatedHash.hexdigest() return hashes.newImmutableHash(b, hashDataType) else: raise Exception
def ingestZipfile(container_name, zipfiles, append, check_bytes): # TODO: check path in exists start = time.time() with data_store.MemoryDataStore() as resolver: container_urn = rdfvalue.URN.FromFileName(container_name) urn = None if not os.path.exists(container_name): volume = container.Container.createURN(resolver, container_urn) print("Creating AFF4Container: file://%s <%s>" % (container_name, volume.urn)) else: volume = container.Container.openURNtoContainer(container_urn, mode="+") print("Appending to AFF4Container: file://%s <%s>" % (container_name, volume.urn)) resolver = volume.resolver with volume as volume: for zipfile in zipfiles: basefilename = os.path.basename(zipfile) if basefilename.endswith(".bag.zip"): basefilename = basefilename[0:len(basefilename) - len(".bag.zip")] filename_arn = rdfvalue.URN.FromFileName(zipfile) # the following coaxes our ZIP implementation to treat this file # as a regular old zip result = zip.BasicZipFile(resolver, urn=None, version=version.basic_zip) resolver.Set(lexicon.transient_graph, result.urn, lexicon.AFF4_TYPE, rdfvalue.URN("StandardZip")) resolver.Set(lexicon.transient_graph, result.urn, lexicon.AFF4_STORED, rdfvalue.URN(filename_arn)) with resolver.AFF4FactoryOpen(result.urn, version=version.basic_zip) as zip_file: for member in zip_file.members: info = zip_file.members[member] pathname = basefilename + member.SerializeToString()[len(result.urn.SerializeToString()):] print(pathname) with resolver.AFF4FactoryOpen(member, version=version.aff4v10) as src: hasher = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1, lexicon.HASH_MD5]) if volume.containsLogicalImage(pathname): print("\tCollision: this ARN is already present in this volume.") continue urn = volume.writeLogicalStreamRabinHashBased(pathname, hasher, info.file_size, check_bytes) #fsmeta.urn = urn #fsmeta.store(resolver) for h in hasher.hashes: hh = hashes.newImmutableHash(h.hexdigest(), hasher.hashToType[h]) resolver.Add(container_urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) print ("Finished in %d (s)" % int(time.time() - start)) return urn
def addPathNamesToVolume(resolver, volume, pathnames, recursive, hashbased): for pathname in pathnames: if not os.path.exists(pathname): print("Path %s not found. Skipping.") continue pathname = utils.SmartUnicode(pathname) print("\tAdding: %s" % pathname) fsmeta = logical.FSMetadata.create(pathname) if os.path.isdir(pathname): image_urn = None if volume.isAFF4Collision(pathname): image_urn = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: image_urn = volume.urn.Append( escaping.arnPathFragment_from_path(pathname), quote=False) fsmeta.urn = image_urn fsmeta.store(resolver) resolver.Set(volume.urn, image_urn, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(pathname)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FolderImage)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) if recursive: for child in os.listdir(pathname): pathnames.append(os.path.join(pathname, child)) else: with open(pathname, "rb") as src: hasher = linear_hasher.StreamHasher( src, [lexicon.HASH_SHA1, lexicon.HASH_MD5, lexicon.HASH_SHA256]) if hashbased == False: urn = volume.writeLogicalStream(pathname, hasher, fsmeta.length) else: urn = volume.writeLogicalStreamRabinHashBased( pathname, hasher, fsmeta.length) fsmeta.urn = urn fsmeta.store(resolver) bc_writer = blockchain.BlockChainWriter.getBlockchainWriter() hash_dict = {} for h in hasher.hashes: hh = hashes.newImmutableHash(h.hexdigest(), hasher.hashToType[h]) resolver.Add(urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) hash_dict[h.name] = hh if bc_writer: bc_writer.Set_hash(hash_dict["md5"], hash_dict["sha1"], hash_dict["sha256"])
def calculateMapHash(self, mapURI, storedHashDataType): calculatedHash = hashes.new(storedHashDataType) calculatedHash.update(self.readSegment(mapURI, "map")) calculatedHash.update(self.readSegment(mapURI, "idx")) try: calculatedHash.update(self.readSegment(mapURI, "mapPath")) except: pass return hashes.newImmutableHash(calculatedHash.hexdigest(), storedHashDataType)
def readBlockHash(self, chunk_id, hash_datatype): bevy_id = old_div(chunk_id, self.chunks_per_segment) bevy_blockHash_urn = self._get_block_hash_urn(bevy_id, hash_datatype) blockLength = hashes.length(hash_datatype) with self.resolver.AFF4FactoryOpen( bevy_blockHash_urn) as bevy_blockHashes: idx = chunk_id * blockLength bevy_blockHashes.SeekRead(idx) hash_value = bevy_blockHashes.Read(blockLength) return hashes.newImmutableHash(binascii.hexlify(hash_value), hash_datatype)
def addPathNames(container_name, pathnames, recursive, append, hashbased): with data_store.MemoryDataStore() as resolver: container_urn = rdfvalue.URN.FromFileName(container_name) urn = None if append == False: volume = container.Container.createURN(resolver, container_urn) print("Creating AFF4Container: file://%s <%s>" % (container_name, volume.urn)) else: volume = container.Container.openURNtoContainer(container_urn, mode="+", resolver=resolver) print("Appending to AFF4Container: file://%s <%s>" % (container_name, volume.urn)) with volume as volume: for pathname in pathnames: if not os.path.exists(pathname): print("Path %s not found. Skipping.") continue pathname = utils.SmartUnicode(pathname) print ("\tAdding: %s" % pathname) fsmeta = logical.FSMetadata.create(pathname) if os.path.isdir(pathname): image_urn = None if volume.isAFF4Collision(pathname): image_urn = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: image_urn = volume.urn.Append(escaping.arnPathFragment_from_path(pathname), quote=False) fsmeta.urn = image_urn fsmeta.store(resolver) resolver.Set(volume.urn, image_urn, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(pathname)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FolderImage)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) if recursive: for child in os.listdir(pathname): pathnames.append(os.path.join(pathname, child)) else: with open(pathname, "rb") as src: hasher = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1, lexicon.HASH_MD5]) if hashbased == False: urn = volume.writeLogicalStream(pathname, hasher, fsmeta.length) else: urn = volume.writeLogicalStreamRabinHashBased(pathname, hasher, fsmeta.length) fsmeta.urn = urn fsmeta.store(resolver) for h in hasher.hashes: hh = hashes.newImmutableHash(h.hexdigest(), hasher.hashToType[h]) resolver.Add(urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) return urn
def readBlockHash(self, chunk_id, hash_datatype): bevy_id = old_div(chunk_id, self.chunks_per_segment) bevy_blockHash_urn = self.urn.Append( "%08d.blockHash.%s" % (bevy_id, hashes.toShortAlgoName(hash_datatype))) blockLength = hashes.length(hash_datatype) with self.resolver.AFF4FactoryOpen( bevy_blockHash_urn) as bevy_blockHashes: idx = chunk_id * blockLength bevy_blockHashes.Seek(idx) hash_value = bevy_blockHashes.Read(blockLength) return hashes.newImmutableHash(hash_value.encode('hex'), hash_datatype)
def doHash(self, mapURI, hashDataType): hash = hashes.new(hashDataType) if self.isMap(mapURI): with self.resolver.AFF4FactoryOpen(mapURI) as mapStream: remaining = mapStream.Size() count = 0 while remaining > 0: toRead = min(32 * 1024, remaining) data = mapStream.Read(toRead) assert len(data) == toRead remaining -= len(data) hash.update(data) count = count + 1 b = hash.hexdigest() return hashes.newImmutableHash(b, hashDataType) raise Exception("IllegalState")
def testFuzz(self): chunksize = 512 for length in [ chunksize - 1, chunksize, chunksize + 1, chunksize * 2 - 1, chunksize * 2, chunksize * 2 + 1, chunksize * 1000, 0 ]: for maxSegmentResidentSize in [ 0, 1, chunksize - 1, chunksize, chunksize + 1 ]: try: containerName = tempfile.gettempdir( ) + "/testfuzz-length-%d-maxresident%d.aff4" % ( length, maxSegmentResidentSize) print(containerName) hasher = linear_hasher.PushHasher( [lexicon.HASH_SHA1, lexicon.HASH_MD5]) container_urn = rdfvalue.URN.FromFileName(containerName) with data_store.MemoryDataStore() as resolver: with container.Container.createURN( resolver, container_urn) as volume: volume.maxSegmentResidentSize = maxSegmentResidentSize with volume.newLogicalStream("/foo", length) as writer: with open("/dev/random", "rb") as randomStream: writer.chunk_size = chunksize writer_arn = writer.urn pos = 0 while pos < length: toread = int( min( math.ceil(1024 * random.random()), length - pos)) data = randomStream.read(toread) writer.Write(data) hasher.update(data) pos += toread # write in the hashes before auto-close for h in hasher.hashes: hh = hashes.newImmutableHash( h.hexdigest(), hasher.hashToType[h]) volume.resolver.Add( volume.urn, writer_arn, rdfvalue.URN(lexicon.standard.hash), hh) print() with container.Container.openURNtoContainer( container_urn) as volume: images = list(volume.images()) self.assertEqual(1, len(images), "Only one logical image") self.assertEqual( "/foo", images[0].name(), "unicode filename should be preserved") fragment = escaping.member_name_for_urn( images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) hasher = linear_hasher.LinearHasher2( volume.resolver, self) for image in volume.images(): hasher.hash(image) os.unlink(containerName) except Exception: traceback.print_exc() self.fail() continue