Exemplos de dbHash em Python, exemplos de hashlib.dbHash em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: MagicMirror.py Projeto: pombredanne/magic-mirror-crawler

 def serve(self, host, path):
     (hostName, urlHash) = self.processMirrorURL(host, path)
     if hostName:
         self.database.setLocation(hostName)
         (url, contentType, contentLength, contentHash) = self.database.loadURL(urlHash)
         if url:
             contentLength = int(contentLength)
             if contentLength == 0:
                 assert contentHash == self.ZERO_HASH
                 return (url, contentType, 0, None)
             else:
                 gzipped = contentHash.endswith(self.GZIP_SUFFIX)
                 assert len(contentHash) == 2 * dbHash().digest_size + len(self.GZIP_SUFFIX) * gzipped # pylint: disable=E1101
                 (contentSize, contentStream) = self.database.loadData(contentHash)
                 if gzipped:
                     assert contentSize < contentLength
                     contentStream = GzipFile(fileobj = contentStream)
                 else:
                     assert contentSize == contentLength
                 if contentLength < DATA_CHUNK and contentType.lower().split(';')[0] in self.TYPES_TO_PROCESS:
                     contentStream = BytesIO(self.processContent(hostName, contentStream.read()))
                 return (url, contentType, contentLength, contentStream)
     return (None, None, None, None)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: MagicMirror.py Projeto: pombredanne/magic-mirror-crawler

 def downloadURL(self, url):
     try:
         print(url, end = ' ', flush = True)
         request = requests.get(url, stream = True)
         contentType = request.headers['content-type']
         contentLength = request.headers.get('content-length', '')
         print(':: %s :: %s ::' % (contentType, ('%s bytes' % contentLength) if contentLength else 'no content-length'), end = ' ', flush = True)
         tempHash = dbHash()
         with SpooledTemporaryFile(DATA_CHUNK) as tempFile:
             for chunk in request.iter_content(DATA_CHUNK):
                 tempFile.write(chunk)
                 tempHash.update(chunk)
             size = tempFile.tell()
             if contentLength:
                 if size != int(contentLength):
                     print("ACTUALLY %d bytes ::" % size, end = ' ', flush = True)
             else:
                 print("%d bytes ::" % size, end = ' ', flush = True)
             contentLength = size
             if contentLength:
                 contentHash = self.dataHash(tempHash)
                 (dataSize, _dataStream) = self.database.loadData(contentHash)
                 if contentLength == dataSize:
                     print("exists, match", end = ' ', flush = True)
                 else:
                     print("DAMAGED, OVERWRITING" if dataSize else "new, saving", end = ' ', flush = True)
                     gzipped = False
                     if contentLength >= self.MIN_SIZE_FOR_GZIP:
                         tempFile.seek(0)
                         with SpooledTemporaryFile(DATA_CHUNK) as gzipFile:
                             with GzipFile(contentHash, 'wb', fileobj = gzipFile) as gzip:
                                 while True:
                                     data = tempFile.read(DATA_CHUNK)
                                     if not data:
                                         break
                                     gzip.write(data)
                             zipLength = gzipFile.tell()
                             if zipLength * 100 < contentLength * self.MIN_GZIP_EFFECTIVENESS:
                                 contentHash += self.GZIP_SUFFIX
                                 gzipFile.seek(0)
                                 written = self.database.saveData(contentHash, gzipFile)
                                 assert written == zipLength
                                 gzipped = True
                     if not gzipped:
                         tempFile.seek(0)
                         written = self.database.saveData(contentHash, tempFile)
                         assert written == contentLength
             else:
                 contentHash = self.ZERO_HASH
         print("OK")
         urlHash = self.processOriginalURL(url)
         (oldURL, oldContentType, oldContentLength, oldContentHash) = self.database.loadURL(urlHash)
         if oldURL:
             print("Previous URL %s :: %s :: %s bytes :: content %s" % (oldURL, oldContentType, oldContentLength, 'matches' if contentHash == oldContentHash else 'DIFFERENT'))
             if oldContentHash != self.ZERO_HASH or contentHash == self.ZERO_HASH:
                 return
             print("Previous URL contained empty page, overwriting")
         self.database.saveURL(urlHash, url, contentType, str(contentLength), contentHash)
     except Exception as e:
         print("\nERROR: %s" % e)
         print(format_exc())
         raise

Exemplo n.º 3

0

Exibir arquivo

Arquivo: MagicMirror.py Projeto: pombredanne/magic-mirror-crawler

 def dataHash(data):
     """Returns a hexlified hash digest for the specified block of data or already existing hash object."""
     ret = (data if hasattr(data, 'digest') else dbHash(data.encode(UTF8))).hexdigest()
     assert len(ret) == 2 * dbHash().digest_size # pylint: disable=E1101
     return ret