def test_register_with_checksum(self): # skip if server is remote if self.sess.host not in ('localhost', socket.gethostname()): self.skipTest('Requires access to server-side file(s)') # test vars test_dir = '/tmp' filename = 'register_test_file' test_file = os.path.join(test_dir, filename) collection = self.coll.path obj_path = '{collection}/{filename}'.format(**locals()) # make random 4K binary file with open(test_file, 'wb') as f: f.write(os.urandom(1024 * 4)) # register file in test collection options = {kw.VERIFY_CHKSUM_KW: ''} self.sess.data_objects.register(test_file, obj_path, **options) # confirm object presence and verify checksum obj = self.sess.data_objects.get(obj_path) # don't use obj.path (aka logical path) phys_path = obj.replicas[0].path digest = helpers.compute_sha256_digest(phys_path) self.assertEqual(obj.checksum, "sha2:{}".format(digest)) # leave physical file on disk obj.unregister() # delete file os.remove(test_file)
def test_register_collection_with_checksums(self): tmp_dir = helpers.irods_shared_tmp_dir() loc_server = self.sess.host in ('localhost', socket.gethostname()) if not(tmp_dir) and not(loc_server): self.skipTest('Requires access to server-side file(s)') # test vars file_count = 10 dir_name = 'register_test_dir_with_chksums' dir_path = os.path.join((tmp_dir or '/tmp'), dir_name) coll_path = '{}/{}'.format(self.test_coll.path, dir_name) # make test dir helpers.make_flat_test_dir(dir_path, file_count) # register test dir options = {kw.VERIFY_CHKSUM_KW: ''} self.sess.collections.register(dir_path, coll_path, **options) # confirm collection presence coll = self.sess.collections.get(coll_path) # confirm object count in collection query = self.sess.query().count(DataObject.id).filter(Collection.name == coll_path) obj_count = next(query.get_results())[DataObject.id] self.assertEqual(file_count, int(obj_count)) # confirm object checksums objs = next(coll.walk())[2] for obj in objs: # don't use obj.path (aka logical path) phys_path = obj.replicas[0].path digest = helpers.compute_sha256_digest(phys_path) self.assertEqual(obj.checksum, "sha2:{}".format(digest)) # remove coll but leave directory on disk coll.unregister() # delete test dir shutil.rmtree(dir_path)
def test_register_collection_with_checksums(self): if self.sess.host not in ('localhost', socket.gethostname()): self.skipTest('Requires access to server-side file(s)') # test vars file_count = 10 dir_name = 'register_test_dir' dir_path = os.path.join('/tmp', dir_name) coll_path = '{}/{}'.format(self.test_coll.path, dir_name) # make test dir helpers.make_flat_test_dir(dir_path, file_count) # register test dir options = {kw.VERIFY_CHKSUM_KW: ''} self.sess.collections.register(dir_path, coll_path, **options) # confirm collection presence coll = self.sess.collections.get(coll_path) # confirm object count in collection query = self.sess.query().count(DataObject.id).filter(Collection.name == coll_path) obj_count = next(query.get_results())[DataObject.id] self.assertEqual(file_count, int(obj_count)) # confirm object checksums objs = next(coll.walk())[2] for obj in objs: # don't use obj.path (aka logical path) phys_path = obj.replicas[0].path digest = helpers.compute_sha256_digest(phys_path) self.assertEqual(obj.checksum, "sha2:{}".format(digest)) # remove coll but leave directory on disk coll.unregister() # delete test dir shutil.rmtree(dir_path)
def doPut(self, dirname, collname, filename, purge_cache=True, register_checksum=False, check='ausent'): """Puts file in iRODS. Puts the file in the compund resource compResc, and purges the cache. If the put is executed, the checksum is registered at the same time in iCAT. The hashing algorithm used to compute the checksum is SHA256. Parameters ---------- dirname : `str` Full path of the file's directory in the local filesystem. collname : `str` iRODS collection where the file should be put. filename : `str` The name of both the local file and the iRODS data object. purge_cache : `bool`, optional Whether or not to purge the cache, equivalent to the `--purgec` option of iput (default True). register_checksum : `bool`, optional Whether or not to register the SHA256 checksum in iRODS along the data object (default False). check : {'none', 'ausent', 'updated'}, optional If 'none', always puts the file. When set to 'ausent', check first if file is registered in iRODS, and only puts it in case it's not. When set to 'updated', puts the file if it's ausent or if its SHA256 checksum is different from the one in iCAT. Make sure to only use 'updated' if the data objects checksums are correctly registered (default 'ausent'). """ self._irodsConnect() obj_file = os.path.join(dirname, filename) obj_path = '{collname}/{filename}'.format(**locals()) self._checkCollExist(collname) self.log.info("check obj_file : "+obj_file) self.log.info("check obj_path : "+obj_path) if check == 'ausent' or check == 'updated': # Check whether the file is already in irods query = self.session.query(DataObject.name, DataObject.checksum).filter(DataObject.name == filename) if len(query.execute()) > 0: if check == 'ausent': self.log.info("File already in iRODS. Put canceled.") return # Compare checksums, exit if they're equal for result in query: obj_hash = result[DataObject.checksum] file_hash = "sha2:" + helpers.compute_sha256_digest(obj_file) self.log.info("DataObject.checksum: " + obj_hash) self.log.info("File checksum: " + file_hash) if obj_hash == file_hash: self.log.info("File already in iRODS. Put canceled.") return # Set put options options = {kw.RESC_NAME_KW: "compResc"} if purge_cache: options[kw.PURGE_CACHE_KW] = 1 if register_checksum: options[kw.REG_CHKSUM_KW] = '' try: self.session.data_objects.put(obj_file, obj_path, **options) self.log.info("file put! : "+obj_path) except Exception as ex: self.log.error("Could not put a file_obj ") self.log.error(ex) pass