示例#1
0
def CopyFile(self, src, dst, remote_credentials=None, block_size=DEFAULT_BLOCK_SIZE):
    """
    Copies the given file to the given destination

    Args:
        src: The file to copy
        dst: Where the file should be copied to
        remote_credentials: Credentials for remote server
        block_size: Size of each block to copy
    Returns:
        None
    """

    src, dst = self.parse_params(src, dst)
    requests_session = None
    if remote_credentials:
        user, passw = decrypt_remote_credentials(remote_credentials)
        requests_session = requests.Session()
        requests_session.verify = settings.REQUESTS_VERIFY
        requests_session.auth = (user, passw)

    copy_file(src, dst, requests_session=requests_session, block_size=block_size)

    msg = "Copied %s to %s" % (src, dst)
    self.create_success_event(msg)
示例#2
0
    def run(self, delete_source=False, update_path=True):
        ip = InformationPackage.objects.get(pk=self.ip)

        reception = Path.objects.get(entity="ingest_reception").value
        container_format = ip.get_container_format()
        src = ip.object_path

        try:
            remote = ip.get_profile_data('transfer_project').get(
                'preservation_organization_receiver_url')
        except AttributeError:
            remote = None

        session = None

        if remote:
            if update_path:
                raise ValueError(
                    'Cannot update path when submitting to remote host')

            dst, remote_user, remote_pass = remote.split(',')
            dst = urljoin(dst, 'api/ip-reception/upload/')

            session = requests.Session()
            session.verify = settings.REQUESTS_VERIFY
            session.auth = (remote_user, remote_pass)
        else:
            dst = os.path.join(
                reception,
                ip.object_identifier_value + ".%s" % container_format)

        block_size = 8 * 1000000  # 8MB
        copy_file(src, dst, requests_session=session, block_size=block_size)

        src_xml = os.path.join(os.path.dirname(src),
                               ip.object_identifier_value + ".xml")
        if not remote:
            dst_xml = os.path.join(reception,
                                   ip.object_identifier_value + ".xml")
        else:
            dst_xml = dst
        copy_file(src_xml,
                  dst_xml,
                  requests_session=session,
                  block_size=block_size)

        if update_path:
            ip.object_path = dst
            ip.package_mets_path = dst_xml
            ip.save()

        if delete_source:
            delete_path(src)
            delete_path(src_xml)

        self.set_progress(100, total=100)
示例#3
0
    def test_copy_file_locally(self):
        src = os.path.join(self.datadir, 'foo.txt')
        with open(src, 'w') as f:
            f.write('test')

        dst = os.path.join(self.datadir, 'bar.txt')
        copy_file(src, dst)

        self.assertTrue(os.path.isfile(src))
        self.assertTrue(os.path.isfile(dst))
        self.assertTrue(cmp(src, dst, shallow=False))
示例#4
0
    def test_copy_file(self, mock_local, mock_remote):
        src = 'foo'
        dst = 'bar'
        copy_file(src, dst)
        mock_local.assert_called_once_with(src, dst)

        session = requests.Session()
        copy_file(src, dst, requests_session=session)
        mock_remote.assert_called_once_with(src,
                                            dst,
                                            session,
                                            block_size=mock.ANY)
示例#5
0
    def run(self, src, dst, requests_session=None, block_size=65536):
        """
        Copies the given file to the given destination

        Args:
            src: The file to copy
            dst: Where the file should be copied to
            requests_session: The request session to be used
            block_size: Size of each block to copy
        Returns:
            None
        """

        copy_file(src, dst, requests_session=requests_session, block_size=block_size)
示例#6
0
    def test_copy_with_not_enough_space_at_dst(self):
        src = os.path.join(self.datadir, 'foo.txt')
        with open(src, 'w') as f:
            f.write('test')

        dst = os.path.join(self.datadir, 'bar.txt')

        mock_size = mock.patch(
            'ESSArch_Core.storage.copy.get_tree_size_and_count',
            return_value=(10, 1))

        ntuple_free = namedtuple('usage', 'free')
        mock_free = mock.patch('ESSArch_Core.storage.copy.shutil.disk_usage',
                               return_value=ntuple_free(free=5))

        with mock_size, mock_free:
            with self.assertRaises(NoSpaceLeftError):
                copy_file(src, dst)
示例#7
0
    def run(self):
        ip = InformationPackage.objects.get(pk=self.ip)

        srcdir = Path.objects.get(entity="path_preingest_reception").value
        reception = Path.objects.get(entity="path_ingest_reception").value
        container_format = ip.get_container_format()
        src = os.path.join(srcdir, ip.object_identifier_value + ".%s" % container_format)

        try:
            remote = ip.get_profile('transfer_project').specification_data.get(
                'preservation_organization_receiver_url'
            )
        except AttributeError:
            remote = None

        session = None

        if remote:
            try:
                dst, remote_user, remote_pass = remote.split(',')
                dst = urljoin(dst, 'api/ip-reception/upload/')

                session = requests.Session()
                session.verify = False
                session.auth = (remote_user, remote_pass)
            except ValueError:
                remote = None
        else:
            dst = os.path.join(reception, ip.object_identifier_value + ".%s" % container_format)
        block_size = 8 * 1000000 # 8MB
        copy_file(src, dst, requests_session=session, block_size=block_size)

        src = os.path.join(srcdir, ip.object_identifier_value + ".xml")
        if not remote:
            dst = os.path.join(reception, ip.object_identifier_value + ".xml")
        copy_file(src, dst, requests_session=session, block_size=block_size)

        self.set_progress(100, total=100)
示例#8
0
    def test_copy_file_remotely(self, mock_copy, _mock_req):
        src = os.path.join(self.datadir, 'foo.txt')
        with open(src, 'w') as f:
            f.write('test')
        dst = 'bar'
        session = requests.Session()

        copy_file(src, dst, requests_session=session, block_size=1)
        mock_copy.assert_has_calls([
            mock.call(src,
                      dst,
                      0,
                      block_size=1,
                      file_size=4,
                      requests_session=session)
        ] + [
            mock.call(src,
                      dst,
                      i,
                      block_size=1,
                      file_size=4,
                      requests_session=session,
                      upload_id='test_upload_id') for i in range(1, 5)
        ])
示例#9
0
    def run(self):
        ip = InformationPackage.objects.get(pk=self.ip)
        src = ip.object_path
        srcdir, srcfile = os.path.split(src)
        dst = Path.objects.get(entity="gate_reception").value

        try:
            remote = ip.get_profile_data('transfer_project').get(
                'preservation_organization_receiver_url_epp'
            )
        except AttributeError:
            remote = None

        session = None

        if remote:
            try:
                dst, remote_user, remote_pass = remote.split(',')

                session = requests.Session()
                session.verify = settings.REQUESTS_VERIFY
                session.auth = (remote_user, remote_pass)
            except ValueError:
                remote = None
        else:
            dst = os.path.join(dst, srcfile)

        block_size = 8 * 1000000  # 8MB

        copy_file(src, dst, requests_session=session, block_size=block_size)

        self.set_progress(50, total=100)

        objid = ip.object_identifier_value
        src = ip.get_events_file_path()
        if os.path.isfile(src):
            if not remote:
                xml_dst = os.path.join(os.path.dirname(dst), "%s_ipevents.xml" % objid)
            else:
                xml_dst = dst
            copy_file(src, xml_dst, requests_session=session, block_size=block_size)

        self.set_progress(75, total=100)

        src = os.path.join(srcdir, "%s.xml" % objid)
        if not remote:
            xml_dst = os.path.join(dst, "%s.xml" % objid)
        copy_file(src, xml_dst, requests_session=session, block_size=block_size)

        self.set_progress(100, total=100)

        return dst
示例#10
0
def TransferIP(self):
    ip = InformationPackage.objects.get(pk=self.ip)
    src = ip.object_path
    srcdir, srcfile = os.path.split(src)

    remote = ip.get_profile_data('transfer_project').get(
        'transfer_destination_url')
    session = None
    if remote:
        dst, remote_user, remote_pass = remote.split(',')

        session = requests.Session()
        session.verify = settings.REQUESTS_VERIFY
        session.auth = (remote_user, remote_pass)

    if not remote:
        dst = Path.objects.get(entity="ingest_transfer").value

    block_size = 8 * 1000000  # 8MB
    copy_file(src, dst, requests_session=session, block_size=block_size)

    self.set_progress(50, total=100)

    objid = ip.object_identifier_value
    src = ip.get_events_file_path()
    if os.path.isfile(src):
        if not remote:
            xml_dst = os.path.join(os.path.dirname(dst),
                                   "%s_ipevents.xml" % objid)
        else:
            xml_dst = dst
        copy_file(src,
                  xml_dst,
                  requests_session=session,
                  block_size=block_size)

    self.set_progress(75, total=100)

    src = os.path.join(srcdir, "%s.xml" % objid)
    if remote:
        xml_dst = dst
    else:
        xml_dst = os.path.join(dst, "%s.xml" % objid)

    copy_file(src, xml_dst, requests_session=session, block_size=block_size)
    self.set_progress(100, total=100)
    self.create_success_event("Transferred IP")
    return dst
示例#11
0
 def test_copy_file(self, mock_copy_file_locally):
     copy_file(self.src, self.dst)
     mock_copy_file_locally.assert_called_once_with(self.src, self.dst, block_size=mock.ANY)
示例#12
0
def StorageMigration(self, storage_method, temp_path):
    ip = self.get_information_package()
    container_format = ip.get_container_format()
    storage_method = StorageMethod.objects.get(pk=storage_method)

    try:
        storage_target = storage_method.enabled_target
    except StorageTarget.DoesNotExist:
        raise ValueError('No writeable target available for {}'.format(storage_method))

    dir_path = os.path.join(temp_path, ip.object_identifier_value)
    container_path = os.path.join(temp_path, ip.object_identifier_value + '.{}'.format(container_format))
    aip_xml_path = os.path.join(temp_path, ip.object_identifier_value + '.xml')
    aic_xml_path = os.path.join(temp_path, ip.aic.object_identifier_value + '.xml')

    if storage_target.master_server and not storage_target.remote_server:
        # we are on remote host
        src_container = True
    else:
        # we are not on master, access from existing storage object
        storage_object = ip.get_fastest_readable_storage_object()
        if storage_object.container:
            storage_object.read(container_path, self.get_processtask())
        else:
            storage_object.read(dir_path, self.get_processtask())

        src_container = storage_object.container

    dst_container = storage_method.containers

    # If storage_object is "long term" and storage_method is not (or vice versa),
    # then we have to do some "conversion" before we go any further

    if src_container and not dst_container:
        # extract container
        if container_format == 'tar':
            with tarfile.open(container_path) as tar:
                tar.extractall(temp_path)
        elif container_format == 'zip':
            with zipfile.ZipFile(container_path) as zipf:
                zipf.extractall(temp_path)
        else:
            raise ValueError('Invalid container format: {}'.format(container_format))

    elif not src_container and dst_container:
        # create container, aip xml and aic xml
        if container_format == 'tar':
            with tarfile.open(container_path, 'w') as new_tar:
                new_tar.format = settings.TARFILE_FORMAT
                new_tar.add(dir_path)
        elif container_format == 'zip':
            zip_directory(dirname=dir_path, zipname=container_path, compress=False)
        else:
            raise ValueError('Invalid container format: {}'.format(container_format))

        generate_package_mets(ip, container_path, aip_xml_path)
        generate_aic_mets(ip, aic_xml_path)

    if dst_container or storage_target.remote_server:
        src = [
            container_path,
            aip_xml_path,
            aic_xml_path,
        ]
    else:
        src = [dir_path]

    if storage_target.remote_server:
        # we are on master, copy files to remote

        host, user, passw = storage_target.remote_server.split(',')
        dst = urljoin(host, reverse('informationpackage-add-file-from-master'))
        requests_session = requests.Session()
        requests_session.verify = settings.REQUESTS_VERIFY
        requests_session.auth = (user, passw)

        for s in src:
            copy_file(s, dst, requests_session=requests_session)

    obj_id = ip.preserve(src, storage_target, dst_container, self.get_processtask())

    Notification.objects.create(
        message="Migrated {} to {}".format(ip.object_identifier_value, storage_method.name),
        level=logging.INFO,
        user_id=self.responsible,
        refresh=True,
    )

    return obj_id
示例#13
0
    def read(self, dst, task, extract=False):
        ip = self.ip
        is_cached_storage_object = self.is_cache_for_ip(ip)

        storage_medium = self.storage_medium
        storage_target = storage_medium.storage_target

        if storage_target.remote_server:
            host, user, passw = storage_target.remote_server.split(',')
            session = requests.Session()
            session.verify = settings.REQUESTS_VERIFY
            session.auth = (user, passw)

            # if the remote server already has completed
            # then we only want to get the result from it,
            # not run it again. If it has failed then
            # we want to retry it

            r = task.get_remote_copy(session, host)
            if r.status_code == 404:
                # the task does not exist
                task.create_remote_copy(session, host)
                task.run_remote_copy(session, host)
            else:
                remote_data = r.json()
                task.status = remote_data['status']
                task.progress = remote_data['progress']
                task.result = remote_data['result']
                task.traceback = remote_data['traceback']
                task.exception = remote_data['exception']
                task.save()

                if task.status in celery_states.EXCEPTION_STATES:
                    task.retry_remote_copy(session, host)

            while task.status not in celery_states.READY_STATES:
                r = task.get_remote_copy(session, host)

                remote_data = r.json()
                task.status = remote_data['status']
                task.progress = remote_data['progress']
                task.result = remote_data['result']
                task.traceback = remote_data['traceback']
                task.exception = remote_data['exception']
                task.save()

                sleep(5)

            if task.status in celery_states.EXCEPTION_STATES:
                task.reraise()
        else:
            storage_backend = self.get_storage_backend()
            storage_medium.prepare_for_read()

            if storage_target.master_server:
                # we are on a remote host that has been requested
                # by master to write to its temp directory
                temp_dir = Path.objects.get(entity='temp').value

                user, passw, host = storage_target.master_server.split(',')
                session = requests.Session()
                session.verify = settings.REQUESTS_VERIFY
                session.auth = (user, passw)
                session.params = {'dst': dst}

                temp_object_path = ip.get_temp_object_path()
                temp_container_path = ip.get_temp_container_path()
                temp_mets_path = ip.get_temp_container_xml_path()
                temp_aic_mets_path = ip.get_temp_container_aic_xml_path()
                dst = urljoin(
                    host, reverse('informationpackage-add-file-from-master'))

                storage_backend.read(self, temp_dir, extract=extract)

                if is_cached_storage_object or not self.container:
                    with tarfile.open(temp_container_path, 'w') as new_tar:
                        new_tar.format = settings.TARFILE_FORMAT
                        new_tar.add(temp_object_path)
                    copy_file(temp_container_path,
                              dst,
                              requests_session=session)

                else:
                    copy_file(temp_container_path,
                              dst,
                              requests_session=session)
                    copy_file(temp_mets_path, dst, requests_session=session)
                    copy_file(temp_aic_mets_path,
                              dst,
                              requests_session=session)

            else:
                storage_backend.read(self, dst, extract=extract)