Пример #1
0
 def addFiles(self, files):
     nbfiles = len(files)
     # make sources struct
     hashlist = pHash.DPptrArray(nbfiles)
     if (hashlist is None):
         self.log.error("mem alloc error")
         raise MemoryError("mem alloc error")
     # make a datapoint for each file
     count = 0
     tmphash = 0x00000000
     for f in files:
         tmpdp = self.makeDatapoint(f)
         tmpdp.thisown = 0
         hashlist[count] = tmpdp
         self.log.debug("file[%d] = %s" % (count, f))
         count += 1
     #end for files
     self.log.debug("add %d files to file %s" % (count, self.db))
     nbsaved = 0
     if (not self.__DbExists()):
         # add all files to MVPTree
         ret = pHash.ph_save_mvptree(self.mvpfile, hashlist.cast(), count)
         (retcode, nbsaved) = ret, count
     else:
         # add all files to MVPTree
         ret = pHash.ph_add_mvptree(self.mvpfile, hashlist.cast(), count)
         if (type(ret) is int):
             self.log.error("error on ph_add_mvptree")
             raise PHashException("error on ph_add_mvptree")
         (retcode, nbsaved) = ret
     # common error handling
     if (retcode != pHash.PH_SUCCESS and retcode != pHash.PH_ERRCAP):
         self.log.warning("could not complete query, %d" % (retcode))
         raise PHashException("could not complete query, %d" % (retcode))
     self.log.debug("number saved %d out of %d, ret code %d" %
                    (nbsaved, count, retcode))
     return
Пример #2
0
 def addFiles(self,files):
   nbfiles=len(files)
   # make sources struct
   hashlist=pHash.DPptrArray(nbfiles)
   if ( hashlist is None):
     self.log.error("mem alloc error")
     raise MemoryError("mem alloc error")
   # make a datapoint for each file    
   count=0
   tmphash=0x00000000
   for f in files:
     tmpdp=self.makeDatapoint(f)
     tmpdp.thisown=0
     hashlist[count]=tmpdp
     self.log.debug("file[%d] = %s"%( count, f ) )      
     count+=1
   #end for files
   self.log.debug("add %d files to file %s"%(count,self.db))
   nbsaved=0
   if (not self.__DbExists()):
   # add all files to MVPTree
     ret = pHash.ph_save_mvptree(self.mvpfile, hashlist.cast(), count)
     (retcode,nbsaved)=ret,count
   else:
     # add all files to MVPTree
     ret = pHash.ph_add_mvptree(self.mvpfile, hashlist.cast(), count)
     if (type(ret) is int):
       self.log.error("error on ph_add_mvptree")
       raise PHashException("error on ph_add_mvptree")
     (retcode,nbsaved)=ret
   # common error handling
   if (retcode != pHash.PH_SUCCESS and retcode != pHash.PH_ERRCAP):
     self.log.warning("could not complete query, %d"%(retcode))
     raise PHashException("could not complete query, %d"%(retcode))
   self.log.debug("number saved %d out of %d, ret code %d"%( nbsaved,count,retcode))
   return
Пример #3
0
def main(argv):
    '''
  '''
    #locale.setlocale(locale.LC_ALL,'fr_FR')
    #logger=logging.getLogger('root')
    #logger.setLevel(logging.DEBUG)
    #logging.basicConfig(level=logging.INFO)
    logging.basicConfig(level=logging.DEBUG)
    print pHash.ph_about()

    if (len(argv) < 2):
        print "not enough input args"
        print "usage: %s dirname filename" % (sys.argv[0])
        return -1

    dir_name = argv[0]  #;/* name of dir to retrieve image files */
    filename = argv[1]  #;/* name of file to save db */

    mvpfile = pHash.MVPFile()
    mvpfile.branchfactor = 2
    mvpfile.pathlength = 5
    mvpfile.leafcapacity = 23  #50
    mvpfile.pgsize = 4096  #8192
    mvpfile.filename = filename
    #mvpfile.hashdist = distancefunc #save: ret code 17
    pHash.my_set_callback(mvpfile, distancefunc)
    mvpfile.hash_type = pHash.UINT64ARRAY

    nbfiles = 0
    print "dir name: %s" % (dir_name)

    for root, dirs, files in os.walk(dir_name):
        nbfiles = len(files)
        print "nbfiles = %d" % nbfiles
        #allocate a list of nbfiles elements # hashlist = (DP**)malloc(nbfiles*sizeof(DP*));
        hashlist = pHash.DPptrArray(nbfiles)
        count = 0
        for i in range(0, nbfiles):
            filename = os.path.normpath(os.path.join(root, files[i]))
            # malloc DP or use pHash.DP() // pHash.ph_malloc_datapoint(mvpfile.hash_type)
            #tmp=pHash.ph_malloc_datapoint(mvpfile.hash_type)
            tmp = pHash.DP()
            if (tmp is None):
                print "mem alloc error"
                return -4
            tmp.hash_type = mvpfile.hash_type
            # say to python, to NOT garbage collect DP reference.
            tmp.thisown = 0
            # store the ref.
            hashlist[count] = tmp
            # calculate image hash
            ret, tmphash = pHash.ph_dct_imagehash(filename)
            if (ret < 0):
                print "unable to get hash"
                continue
            # we can't assign .hash to hashlist[count].hash = tmphash because .hash is a pointer
            # we use ulong64Ptr instead of voidPtr because .. it's a ulong64 ? casting is dynamic
            hashlist[count].hash = pHash.copy_ulong64Ptr(tmphash)
            #
            print "files[%d]: %s hash = %x" % (i, filename, tmphash)
            hashlist[count].id = filename
            hashlist[count].hash_length = 1
            count += 1
    # method with DPptrArray being a DP **
    hashlistf = hashlist.cast()
    ret = pHash.ph_save_mvptree(mvpfile, hashlistf, count)
    # ret 11 is null hash distance function
    # save: ret code 17 has callback func. -> not enought hashdist ?
    print "save: ret code %d" % (ret)

    #free is done by GC .. ?
    #for i in range(0,nbfiles):
    #  pHash.free(hashlist[i].hash)

    #free is NOT done by GC
    for i in range(0, nbfiles):
        pHash.ph_free_datapoint(hashlist[i])
Пример #4
0
def main(argv):
  '''
  '''
  #locale.setlocale(locale.LC_ALL,'fr_FR')
  #logger=logging.getLogger('root')
  #logger.setLevel(logging.DEBUG)
  #logging.basicConfig(level=logging.INFO)
  logging.basicConfig(level=logging.DEBUG)	
  print pHash.ph_about()

  if (len(argv) < 2):
    print "not enough input args"
    print "usage: %s dirname filename"% (sys.argv[0])
    return -1
  
  dir_name = argv[0] #;/* name of dir to retrieve image files */
  filename = argv[1] #;/* name of file to save db */

  mvpfile= pHash.MVPFile() 
  mvpfile.branchfactor = 2
  mvpfile.pathlength = 5
  mvpfile.leafcapacity = 23 #50
  mvpfile.pgsize = 4096 #8192
  mvpfile.filename = filename
  #mvpfile.hashdist = distancefunc #save: ret code 17
  pHash.my_set_callback(mvpfile,distancefunc)
  mvpfile.hash_type =  pHash.UINT64ARRAY

  nbfiles = 0
  print "dir name: %s"%( dir_name)

  for root, dirs, files in os.walk(dir_name):
    nbfiles=len(files)
    print "nbfiles = %d"% nbfiles
    #allocate a list of nbfiles elements # hashlist = (DP**)malloc(nbfiles*sizeof(DP*));
    hashlist=pHash.DPptrArray(nbfiles)
    count = 0
    for i in range(0,nbfiles):
      filename=os.path.normpath(os.path.join(root,files[i]) )
      # malloc DP or use pHash.DP() // pHash.ph_malloc_datapoint(mvpfile.hash_type)
      #tmp=pHash.ph_malloc_datapoint(mvpfile.hash_type)
      tmp=pHash.DP()
      if (tmp is None):
        print "mem alloc error"
        return -4
      tmp.hash_type = mvpfile.hash_type
      # say to python, to NOT garbage collect DP reference.
      tmp.thisown = 0
      # store the ref.
      hashlist[count]=tmp
      # calculate image hash
      ret,tmphash=pHash.ph_dct_imagehash(filename)
      if ( ret < 0):
        print "unable to get hash"
        continue
      # we can't assign .hash to hashlist[count].hash = tmphash because .hash is a pointer
      # we use ulong64Ptr instead of voidPtr because .. it's a ulong64 ? casting is dynamic
      hashlist[count].hash = pHash.copy_ulong64Ptr(tmphash)
      #
      print "files[%d]: %s hash = %x"%( i, filename, tmphash )
      hashlist[count].id = filename
      hashlist[count].hash_length = 1
      count+=1
  # method with DPptrArray being a DP **
  hashlistf=hashlist.cast()
  ret = pHash.ph_save_mvptree(mvpfile, hashlistf, count)
  # ret 11 is null hash distance function
  # save: ret code 17 has callback func. -> not enought hashdist ?
  print "save: ret code %d"%(ret)
    
  
  #free is done by GC .. ?
  #for i in range(0,nbfiles):
  #  pHash.free(hashlist[i].hash)

  #free is NOT done by GC 
  for i in range(0,nbfiles):
    pHash.ph_free_datapoint(hashlist[i])