Exemplo n.º 1
0
    def digest(self):
        """Digest of the data read from the source file.

        return (string): digest.

        """
        return bin_to_hex(self.hasher.digest())
Exemplo n.º 2
0
    def digest(self):
        """Digest of the data written in the file.

        return (string): digest.

        """
        return bin_to_hex(self.hasher.digest())
Exemplo n.º 3
0
    def check_backend_integrity(self, delete=False):
        """Check the integrity of the backend.

        Request all the files from the backend. For each of them the
        digest is recomputed and checked against the one recorded in
        the backend.

        If mismatches are found, they are reported with ERROR
        severity. The method returns False if at least a mismatch is
        found, True otherwise.

        delete (bool): if True, files with wrong digest are deleted.

        """
        clean = True
        for digest, _ in self.list():
            fobj = self.backend.get_file(digest)
            hasher = hashlib.sha1()
            try:
                buf = fobj.read(self.CHUNK_SIZE)
                while len(buf) > 0:
                    hasher.update(buf)
                    buf = fobj.read(self.CHUNK_SIZE)
            finally:
                fobj.close()
            computed_digest = bin_to_hex(hasher.digest())
            if digest != computed_digest:
                logger.error("File with hash %s actually has hash %s", digest,
                             computed_digest)
                if delete:
                    self.delete(digest)
                clean = False

        return clean
Exemplo n.º 4
0
    def put_file_from_fobj(self, src, desc=""):
        """Store a file in the storage.

        If it's already (for some reason...) in the cache send that
        copy to the backend. Otherwise store it in the file-system
        cache first.

        The file is obtained from a file-object. Other interfaces are
        available as `put_file_content', `put_file_from_path'.

        src (fileobj): a readable binary file-like object from which
            to read the contents of the file.
        desc (unicode): the (optional) description to associate to the
            file.

        return (unicode): the digest of the stored file.

        """
        logger.debug("Reading input file to store on the database.")

        # Unfortunately, we have to read the whole file-obj to compute
        # the digest but we take that chance to save it to a temporary
        # path so that we then just need to move it. Hoping that both
        # locations will be on the same filesystem, that should be way
        # faster than reading the whole file-obj again (as it could be
        # compressed or require network communication).
        # XXX We're *almost* reimplementing copyfileobj.
        with tempfile.NamedTemporaryFile('wb', delete=False,
                                         dir=self.temp_dir) as dst:
            hasher = hashlib.sha1()
            buf = src.read(self.CHUNK_SIZE)
            while len(buf) > 0:
                hasher.update(buf)
                while len(buf) > 0:
                    written = dst.write(buf)
                    # Cooperative yield.
                    gevent.sleep(0)
                    if written is None:
                        break
                    buf = buf[written:]
                buf = src.read(self.CHUNK_SIZE)
            digest = bin_to_hex(hasher.digest())
            dst.flush()

            logger.debug("File has digest %s.", digest)

            cache_file_path = os.path.join(self.file_dir, digest)

            if not os.path.exists(cache_file_path):
                move(dst.name, cache_file_path)
            else:
                os.unlink(dst.name)

        # Store the file in the backend. We do that even if the file
        # was already in the cache (that is, we ignore the check above)
        # because there's a (small) chance that the file got removed
        # from the backend but somehow remained in the cache.
        self.save(digest, desc)

        return digest
Exemplo n.º 5
0
Arquivo: __init__.py Projeto: fph/cms
def sha1sum(path):
    """Calculates the SHA1 sum of a file, given by its path.

    path (string): path of the file we are interested in.

    return (string): SHA1 sum of the file in path.

    """
    buffer_length = 8192
    with io.open(path, 'rb') as fin:
        hasher = hashlib.new("sha1")
        buf = fin.read(buffer_length)
        while len(buf) > 0:
            hasher.update(buf)
            buf = fin.read(buffer_length)
        return bin_to_hex(hasher.digest())
Exemplo n.º 6
0
    def test_file_duplicates(self):
        """Send multiple copies of the a file into FileCacher.

        Generates a random file and attempts to store them into the FileCacher.
        FC should handle this gracefully and only end up with one copy.

        """
        content = os.urandom(100)
        h = hashlib.sha1()
        h.update(content)
        digest = bin_to_hex(h.digest())

        # Test writing the same file to the DB in parallel.
        # Create empty files.
        num_files = 4
        fobjs = []
        for _ in range(num_files):
            fobj = self.file_cacher.backend.create_file(digest)
            # As the file contains random data, we don't expect to have put
            # this into the DB previously.
            assert fobj is not None
            fobjs.append(fobj)

        # Close them in a different order. Seed to make the shuffle
        # deterministic.
        r = random.Random()
        r.seed(num_files)
        r.shuffle(fobjs)

        # Write the files and commit them.
        for i, fobj in enumerate(fobjs):
            fobj.write(content)
            # Ensure that only one copy made it into the database.
            commit_ok = \
                self.file_cacher.backend.commit_file(fobj,
                                                     digest,
                                                     desc='Copy %d' % i)
            # Only the first commit should succeed.
            assert commit_ok == (i == 0), \
                "Commit of %d was %s unexpectedly" % (i, commit_ok)

        # Check that the file was stored correctly.
        self.check_stored_file(digest)
Exemplo n.º 7
0
def get_hex_random_key():
    """Generate 16 random bytes, safe to be used as AES key.
    Return it encoded in hexadecimal.

    """
    return bin_to_hex(get_random_key())
Exemplo n.º 8
0
def unique_digest():
    """Return a unique digest-like string."""
    hasher = hashlib.sha1()
    hasher.update(str(unique_long_id()).encode('ascii'))
    return bin_to_hex(hasher.digest())
Exemplo n.º 9
0
 def test_string(self):
     with self.assertRaises(TypeError):
         bin_to_hex("cms")
Exemplo n.º 10
0
 def test_success(self):
     self.assertEqual(bin_to_hex(b"\x32\x00\xa0"), "3200a0")
     self.assertEqual(bin_to_hex(b"\xFF\xFF\xFF\xFF"), "ffffffff")
     self.assertEqual(bin_to_hex(b"\x00" * 1000), "0" * 2000)
Exemplo n.º 11
0
Arquivo: digest.py Projeto: Nyrio/cms
 def digest(self):
     """Return the digest as an hex string."""
     return bin_to_hex(self._hasher.digest())
Exemplo n.º 12
0
def get_hex_random_key():
    """Generate 16 random bytes, safe to be used as AES key.
    Return it encoded in hexadecimal.

    """
    return bin_to_hex(get_random_key())
Exemplo n.º 13
0
 def digest(self):
     """Return the digest as an hex string."""
     return bin_to_hex(self._hasher.digest())
Exemplo n.º 14
0
def unique_digest():
    """Return a unique digest-like string."""
    hasher = hashlib.sha1()
    hasher.update(str(unique_long_id()).encode('ascii'))
    return bin_to_hex(hasher.digest())