def rawHashFile(archPath): archIterator = uar.ArchiveReader(archPath) fnames = [item[0] for item in archIterator] fset = set(fnames) if len(fnames) != len(fset): print(fnames) print(fset) raise ValueError("Wat?") try: for fName, fp in archIterator: fCont = fp.read() fName, hexHash, pHash, imX, imY = hasher.hashFile( archPath, fName, fCont) insertArgs = { "internalPath": fName.rjust(25), "itemHash": hexHash, "pHash": str(pHash).rjust(25), } print(insertArgs) except: print(archPath) raise
def test_hashImage2_c(self): cwd = os.path.dirname(os.path.realpath(__file__)) imPath = os.path.join(cwd, 'testimages', 'Lolcat_this_is_mah_job_small.jpg') with open(imPath, "rb") as fp: fCont = fp.read() basePath, intName = "LOL", "WAT.jpg" fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont) self.assertEqual(intName, fname) self.assertEqual(hexHash, "40d39c436e14282dcda06e8aff367307" ) self.assertEqual(pHash, -4992890192511777340 ) self.assertEqual(imX, 300 ) self.assertEqual(imY, 237)
def test_hashImage3(self): cwd = os.path.dirname(os.path.realpath(__file__)) imPath = os.path.join(cwd, 'testimages', 'lolcat-crocs.jpg') with open(imPath, "rb") as fp: fCont = fp.read() basePath, intName = "LOL", "WAT.jpg" fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont) self.assertEqual(intName, fname) self.assertEqual(hexHash, "6d0a977694630ac9d1d33a7f068e10f8" ) self.assertEqual(pHash, -7472365462264617431 ) self.assertEqual(imX, 500 ) self.assertEqual(imY, 363)
def test_hashImage2(self): cwd = os.path.dirname(os.path.realpath(__file__)) imPath = os.path.join(cwd, 'testimages', 'Lolcat_this_is_mah_job.jpg') with open(imPath, "rb") as fp: fCont = fp.read() basePath, intName = "LOL", "WAT.jpg" fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont) self.assertEqual(intName, fname) self.assertEqual(hexHash, "d9ceeb6b43c2d7d096532eabfa6cf482" ) self.assertEqual(pHash, -4992890192511777340 ) self.assertEqual(imX, 493 ) self.assertEqual(imY, 389)
def test_hashImage2_b(self): cwd = os.path.dirname(os.path.realpath(__file__)) imPath = os.path.join(cwd, 'testimages', 'Lolcat_this_is_mah_job.png') with open(imPath, "rb") as fp: fCont = fp.read() basePath, intName = "LOL", "WAT.jpg" fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont) self.assertEqual(intName, fname) self.assertEqual(hexHash, "1268e704908cc39299d73d6caafc23a0" ) self.assertEqual(pHash, -4992890192511777340 ) self.assertEqual(imX, 493 ) self.assertEqual(imY, 389)
def test_hashImage1(self): cwd = os.path.dirname(os.path.realpath(__file__)) imPath = os.path.join(cwd, 'testimages', 'dangerous-to-go-alone.jpg') with open(imPath, "rb") as fp: fCont = fp.read() basePath, intName = "LOL", "WAT.jpg" fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont) self.assertEqual(intName, fname) self.assertEqual(hexHash, "dcd6097eeac911efed3124374f44085b" ) self.assertEqual(pHash, -7813072021139921681 ) self.assertEqual(imX, 325 ) self.assertEqual(imY, 307)
def test_hashImage6(self): cwd = os.path.dirname(os.path.realpath(__file__)) imPath = os.path.join(cwd, 'testimages', 'lolcat-oregon-trail.jpg') with open(imPath, "rb") as fp: fCont = fp.read() basePath, intName = "LOL", "WAT.jpg" fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont, shouldPhash=False) self.assertEqual(intName, fname) self.assertEqual(hexHash, "7227289a017988b6bdcf61fd4761f6b9") self.assertEqual(pHash, None) self.assertEqual(imX, None) self.assertEqual(imY, None)
def test_hashImage4(self): cwd = os.path.dirname(os.path.realpath(__file__)) imPath = os.path.join(cwd, 'testimages', 'lolcat-oregon-trail.jpg') with open(imPath, "rb") as fp: fCont = fp.read() basePath, intName = "LOL", "WAT.jpg" fname, hexHash, pHash, dHash, imX, imY = hashFile.hashFile(basePath, intName, fCont) self.assertEqual(intName, fname) self.assertEqual(hexHash, "7227289a017988b6bdcf61fd4761f6b9") self.assertEqual(pHash, -4955310669995365332) self.assertEqual(dHash, -8660145558008088574) self.assertEqual(imX, 501) self.assertEqual(imY, 356)
def test_hashImage5(self): cwd = os.path.dirname(os.path.realpath(__file__)) imPath = os.path.join(cwd, 'testimages', 'lolcat-oregon-trail.jpg') with open(imPath, "rb") as fp: fCont = fp.read() basePath, intName = "LOL", "WAT" fname, hexHash, pHash, imX, imY = hashFile.hashFile( basePath, intName, fCont) self.assertEqual(intName, fname) self.assertEqual(hexHash, "7227289a017988b6bdcf61fd4761f6b9") self.assertEqual(pHash, None) self.assertEqual(imX, None) self.assertEqual(imY, None)
def hashBareFile(self, wholePath, dbPath, doPhash=True): with open(wholePath, "rb") as fp: fCont = fp.read() fName, hexHash, pHash, imX, imY = hasher.hashFile(wholePath, "", fCont) insertArgs = { "fsPath" :wholePath, "internalPath" :fName, # fname == '' in this case "itemHash" :hexHash, "pHash" :pHash, "imgX" :imX, "imgY" :imY } self.dbApi.insertIntoDb(**insertArgs) self.putProgQueue("processed")
def scanArchive(self, archPath, archData): # print("Scanning archive", archPath) archIterator = uar.ArchiveReader(archPath, fileContents=archData) fnames = [item[0] for item in archIterator] fset = set(fnames) if len(fnames) != len(fset): print(fnames) print(fset) raise ValueError("Wat?") self.dbApi.begin() try: for fName, fp in archIterator: fCont = fp.read() fName, hexHash, pHash, imX, imY = hasher.hashFile(archPath, fName, fCont) insertArgs = { "fsPath" :archPath, "internalPath" :fName, "itemHash" :hexHash, "pHash" :pHash, "imgX" :imX, "imgY" :imY } self.dbApi.insertIntoDb(**insertArgs) self.putProgQueue("processed") if not scanner.runState.run: break except: print(archPath) self.dbApi.rollback() raise self.dbApi.commit() archIterator.close()
def processImageFile(self, wholePath, dbFilePath): scan = True have = self.dbApi.getItem(fspath=dbFilePath, wantCols = ['phash', 'imgx', 'imgy']) if have and all(have): scan = False # print("Have hashes - ", dummy_itemHash, pHash, dHash) if scan: with open(wholePath, "rb") as fp: fCont = fp.read() try: fName, hexHash, pHash, imX, imY = hasher.hashFile(wholePath, "", fCont) insertArgs = { "fsPath" :wholePath, "internalPath" :fName, # fname == '' in this case "itemHash" :hexHash, "pHash" :pHash, "imgX" :imX, "imgY" :imY } # insert or update data row self.dbApi.upsert(**insertArgs) self.outQ.put("processed") except (IndexError, UnboundLocalError): self.tlog.error("Error while processing fileN") self.tlog.error("%s", wholePath) self.tlog.error("%s", traceback.format_exc()) # self.log.info("Scanned bare image %s, %s, %s", fileN, pHash, dHash) else: self.outQ.put("skipped")