def test_constructor(sips): """Test the archiver constructor.""" s = BaseArchiver(sips[0].model).sip s2 = BaseArchiver(sips[0]).sip assert isinstance(s, SIPApi) assert isinstance(s2, SIPApi) a = BagItArchiver(sips[1], patch_of=sips[0]) a2 = BagItArchiver(sips[1].model, patch_of=sips[0].model) assert isinstance(a.sip, SIPApi) assert isinstance(a.patch_of, SIPApi) assert isinstance(a2.sip, SIPApi) assert isinstance(a2.patch_of, SIPApi)
def transfer_demo(uuid, config): """Transfer the files contained in the sip to the destination. Very similar to the rsync transfer. However, because of time, I use the VERY UNSECURE sshpass package for rsync authentication. DO NOT USE IN PROD!!! :param str uuid: the id of the sip containing files to transfer :param dict config: here config must be a dict with the following keys: - user - the SSH user - password_file - a path where the password is stored - remote - the URL or IP of the remote - remote_path - where to store files on the remote - args - the args for rsync """ # we retrieve the archive and the SIP associated sip = SIP.get_sip(uuid) ark = Archive.get_from_sip(uuid) # we export it to the temp folder archiver = BaseArchiver(sip) archiver.write_all_files() # we rsync it to the remote src_path = archiver.get_fullpath('') dest_path = join(config['remote_path'], ark.accession_id) dest_path = '{}:{}'.format(config['remote'], dest_path) ssh_command = 'sshpass -f {filename} ssh -l {user}'.format( filename=config['password_file'], user=config['user']) return call([ 'rsync', config['args'], '--rsh={}'.format(ssh_command), src_path, dest_path ])
def test_getters(db, sips, sip_metadata_types, locations): """Test the constructor and the getters.""" sip = sips[0] archiver = BaseArchiver(sip) assert archiver.get_archive_base_uri() == locations['archive'].uri assert archiver.sip is sip # get data files data_files_info = archiver._get_data_files() sip_id = str(sip.id) abs_path_fmt = "{root}/{c1}/{c2}/{cn}/".format( root=locations['archive'].uri, c1=sip_id[:2], c2=sip_id[2:4], cn=sip_id[4:]) + "{filepath}" abs_path = abs_path_fmt.format(filepath="files/foobar.txt") fi = { 'file_uuid': str(sip.files[0].file_id), 'filepath': 'files/foobar.txt', 'filename': 'foobar.txt', 'sipfilepath': 'foobar.txt', 'size': 4, 'fullpath': abs_path, 'checksum': 'md5:098f6bcd4621d373cade4e832627b4f6' } assert data_files_info == [ fi, ] metafiles_info = archiver._get_metadata_files() assert len(metafiles_info) == 2 m1_abs_path = abs_path_fmt.format(filepath="metadata/json-test.json") m2_abs_path = abs_path_fmt.format(filepath="metadata/marcxml-test.xml") m1 = { 'checksum': 'md5:da4ab7e4c4b762d8e2f3ec3b9f801b1f', 'fullpath': m1_abs_path, 'metadata_id': sip_metadata_types['json-test'].id, 'filepath': 'metadata/json-test.json', 'size': 19 } m2 = { 'checksum': 'md5:498d1ce86c2e9b9eb85f1e8105affdf6', 'fullpath': m2_abs_path, 'metadata_id': sip_metadata_types['marcxml-test'].id, 'filepath': 'metadata/marcxml-test.xml', 'size': 12 } assert m1 in metafiles_info assert m2 in metafiles_info all_files_info = archiver.get_all_files() assert len(all_files_info) == 3 assert fi in all_files_info assert m1 in all_files_info assert m2 in all_files_info
def transfer_rsync(uuid, config): """Transfer the files contained in the sip to the destination. The transfer is done with a rsync. If transfer to remote, you need a valid ssh setup. This method is automatically called by the module to transfer the files. Depending on your installation, you may want to have a different behavior (copy among servers...). Then, you can create your own factory and link it into the config variable :py:data:`invenio_archivematica.config.ARCHIVEMATICA_TRANSFER_FACTORY`. The config needs to include at least the destination folder. If transfer to remote, it needs to include the user and the server. In either cases, you can include usual rsync parameters. See :py:data:`invenio_archivematica.config.ARCHIVEMATICA_TRANSFER_FOLDER`: .. code-block:: python ARCHIVEMATICA_TRANSFER_FOLDER = { 'server': 'localhost', 'user': '******', 'destination': '/tmp', 'args': '-az' } :param str uuid: the id of the sip containing files to transfer :param config: the config for rsync """ sip = SIP.get_sip(uuid) # first we copy everything in a temp folder archiver = BaseArchiver(sip) archiver.write_all_files() # then we rsync to the final dest src_path = archiver.get_fullpath('') dest_path = config['destination'] if config.get('server', None) and config.get('user', None): dest_path = '{user}@{server}:{dest}'.format(user=config['user'], server=config['server'], dest=dest_path) try: ret = call(['rsync', config['args'], src_path, dest_path]) # we remove the temp folder finally: rmtree(src_path) return ret
def test_write_all(db, sips, sip_metadata_types, locations, archive_fs): """Test the public "write_all_files" method.""" sip = sips[0] archiver = BaseArchiver(sip) assert not archive_fs.listdir() archiver.write_all_files() assert len(archive_fs.listdir()) == 1 fs = archive_fs.opendir(archiver.get_archive_subpath()) assert len(fs.listdir()) == 2 assert len(fs.listdir('metadata')) == 2 assert len(fs.listdir('files')) == 1 expected = { ('metadata/marcxml-test.xml', '<p>XML 1</p>'), ('metadata/json-test.json', '{"title": "JSON 1"}'), ('files/foobar.txt', 'test'), } for fn, content in expected: with fs.open(fn, 'r') as fp: c = fp.read() assert c == content
def transfer_cp(uuid, config): """Transfer the files contained in the sip to a local destination. The transfer is done with a simple copy of files. This method is automatically called by the module to transfer the files. Depending on your installation, you may want to have a different behavior (copy among servers...). Then, you can create your own factory and link it into the config variable :py:data:`invenio_archivematica.config.ARCHIVEMATICA_TRANSFER_FACTORY`. :param str uuid: the id of the sip containing files to transfer :param config: can be empty. It will have the content of the variable :py:data:`invenio_archivematica.config.ARCHIVEMATICA_TRANSFER_FOLDER`. However, it will use the export folder set in :py:data:`invenio_sipstore.config.SIPSTORE_ARCHIVER_LOCATION_NAME` """ sip = SIP.get_sip(uuid) archiver = BaseArchiver(sip) archiver.write_all_files() return 0
def test_write(db, sips, sip_metadata_types, locations, archive_fs): """Test writing of the SIPFiles and SIPMetadata files to archive.""" sip = sips[0] archiver = BaseArchiver(sip) data_files_info = archiver._get_data_files() assert not archive_fs.listdir() # Empty archive archiver._write_sipfile(data_files_info[0]) assert len(archive_fs.listdir()) == 1 fs = archive_fs.opendir(archiver.get_archive_subpath()) assert fs.isfile('files/foobar.txt') assert not fs.isfile('metadata/json-test.json') assert not fs.isfile('metadata/marcxml-test.xml') metadata_files_info = archiver._get_metadata_files() archiver._write_sipmetadata(metadata_files_info[0]) archiver._write_sipmetadata(metadata_files_info[1]) assert fs.isfile('metadata/json-test.json') assert fs.isfile('metadata/marcxml-test.xml') assert not fs.isfile('test.txt') archiver._write_extra(content='test raw content', filename='test.txt') assert fs.isfile('test.txt') with fs.open('test.txt', 'r') as fp: cnt = fp.read() assert cnt == 'test raw content' assert not fs.isfile('test2.txt') extra_file_info = dict( checksum=('md5:' + str(md5('test'.encode('utf-8')).hexdigest())), size=len('test'), filepath='test2.txt', fullpath=fs.getsyspath('test2.txt'), content='test') archiver._write_extra(fileinfo=extra_file_info) assert fs.isfile('test.txt') with fs.open('test.txt', 'r') as fp: cnt = fp.read() assert cnt == 'test raw content'
def test_name_formatters(db, app, sips, sip_metadata_types, locations, archive_fs, secure_sipfile_name_formatter, custom_sipmetadata_name_formatter): """Test archiving with custom filename formatter.""" sip = sips[3] # SIP with some naughty filenames archiver = BaseArchiver(sip, filenames_mapping_file='files/filenames.txt') assert not archive_fs.listdir() archiver.write_all_files() assert len(archive_fs.listdir()) == 1 fs = archive_fs.opendir(archiver.get_archive_subpath()) assert set(fs.listdir()) == set(['metadata', 'files']) assert len(fs.listdir('metadata')) == 2 # inside 'files/' there should be 'filenames.txt' file with the mappings assert len(fs.listdir('files')) == 4 uuid1 = next(f.file.id for f in sip.files if f.filepath.endswith('txt')) uuid2 = next(f.file.id for f in sip.files if f.filepath.endswith('js')) uuid3 = next(f.file.id for f in sip.files if f.filepath.endswith('dat')) expected = [('metadata/marcxml-test-metadata.xml', '<p>XML 4 żółć</p>'), ('metadata/json-test-metadata.json', '{"title": "JSON 4 żółć"}'), ('files/{0}-foobar.txt'.format(uuid1), 'test-fourth żółć'), ('files/{0}-http_maliciouswebsite.com_hack.js'.format(uuid2), 'test-fifth ąęćźə'), ('files/{0}-ozzcae.dat'.format(uuid3), 'test-sixth π'), ('files/filenames.txt', set([ '{0}-foobar.txt ../../foobar.txt'.format(uuid1), '{0}-http_maliciouswebsite.com_hack.js ' 'http://maliciouswebsite.com/hack.js'.format(uuid2), '{0}-ozzcae.dat łóżźćąę.dat'.format(uuid3), ]))] for fn, content in expected: with fs.open(fn, 'r') as fp: if isinstance(content, set): # Compare as set of lines c = set(fp.read().splitlines()) else: c = fp.read() assert c == content