示例#1
0
def _copy_from_transfer_sources(paths, relative_destination):
    """Copy files from source locations to the currently processing location.

    Any files in locations not associated with this pipeline will be ignored.

    :param list paths: List of paths. Each path should be formatted
                       <uuid of location>:<full path in location>
    :param str relative_destination: Path relative to the currently processing
                                     space to move the files to.
    """
    processing_location = storage_service.get_location(purpose="CP")[0]
    transfer_sources = storage_service.get_location(purpose="TS")
    files = {l["uuid"]: {"location": l, "files": []} for l in transfer_sources}

    for item in paths:
        location, path = LocationPath(item).parts()
        if location is None:
            location = _default_transfer_source_location_uuid()
        if location not in files:
            raise Exception(
                "Location %(location)s is not associated"
                " with this pipeline" % {"location": location}
            )

        # ``path`` will be a UTF-8 bytestring but the replacement pattern path
        # from ``files`` will be a Unicode object. Therefore, the latter must
        # be UTF-8 encoded prior. Same reasoning applies to ``destination``
        # below. This allows transfers to be started on UTF-8-encoded directory
        # names.
        source = path.replace(
            files[location]["location"]["path"].encode("utf8"), "", 1
        ).lstrip("/")
        # Use the last segment of the path for the destination - basename for a
        # file, or the last folder if not. Keep the trailing / for folders.
        last_segment = (
            os.path.basename(source.rstrip("/")) + "/"
            if source.endswith("/")
            else os.path.basename(source)
        )
        destination = os.path.join(
            processing_location["path"].encode("utf8"),
            relative_destination,
            last_segment,
        ).replace("%sharedPath%", "")
        files[location]["files"].append({"source": source, "destination": destination})
        logger.debug("source: %s, destination: %s", source, destination)

    message = []
    for item in files.values():
        reply, error = storage_service.copy_files(
            item["location"], processing_location, item["files"]
        )
        if reply is None:
            message.append(str(error))
    if message:
        raise Exception(
            "The following errors occurred: %(message)s"
            % {"message": ", ".join(message)}
        )
示例#2
0
def copy_from_transfer_sources(paths, relative_destination):
    """
    Helper to copy files from transfer source locations to the currently processing location.

    Any files in locations not associated with this pipeline will be ignored.

    :param list paths: List of paths.  Each path should be formatted <uuid of location>:<full path in location>
    :param str relative_destination: Path relative to the currently processing space to move the files to.
    :returns: Tuple of (boolean error, message)
    """
    processing_location = storage_service.get_location(purpose='CP')[0]
    transfer_sources = storage_service.get_location(purpose='TS')
    files = {l['uuid']: {'location': l, 'files': []} for l in transfer_sources}

    for p in paths:
        try:
            location, path = p.split(':', 1)
        except ValueError:
            logger.warning('Path %s cannot be split into location:path', p)
            return True, 'Path' + p + 'cannot be split into location:path'
        if location not in files:
            logger.warning('Location %s is not associated with this pipeline.',
                           location)
            return True, 'Location' + location + 'is not associated with this pipeline'

        # ``path`` will be a UTF-8 bytestring but the replacement pattern path
        # from ``files`` will be a Unicode object. Therefore, the latter must
        # be UTF-8 encoded prior. Same reasoning applies to ``destination``
        # below. This allows transfers to be started on UTF-8-encoded directory
        # names.
        source = path.replace(
            files[location]['location']['path'].encode('utf8'), '',
            1).lstrip('/')
        # Use the last segment of the path for the destination - basename for a
        # file, or the last folder if not. Keep the trailing / for folders.
        last_segment = os.path.basename(source.rstrip(
            '/')) + '/' if source.endswith('/') else os.path.basename(source)
        destination = os.path.join(processing_location['path'].encode('utf8'),
                                   relative_destination,
                                   last_segment).replace('%sharedPath%', '')
        files[location]['files'].append({
            'source': source,
            'destination': destination
        })
        logger.debug('source: %s, destination: %s', source, destination)

    message = []
    for pl in files.values():
        reply, error = storage_service.copy_files(pl['location'],
                                                  processing_location,
                                                  pl['files'])
        if reply is None:
            message.append(str(error))
    if message:
        return True, 'The following errors occured: {}'.format(
            ', '.join(message))
    else:
        return False, 'Files added successfully.'
示例#3
0
def ingest_grid(request):
    polling_interval = django_settings.POLLING_INTERVAL
    microservices_help = django_settings.MICROSERVICES_HELP
    uid = request.user.id

    try:
        storage_service.get_location(purpose="BL")
    except:
        messages.warning(request, 'Error retrieving originals/arrange directory locations: is the storage server running? Please contact an administrator.')

    return render(request, 'ingest/grid.html', locals())
示例#4
0
def _copy_from_transfer_sources(paths, relative_destination):
    """Copy files from source locations to the currently processing location.

    Any files in locations not associated with this pipeline will be ignored.

    :param list paths: List of paths. Each path should be formatted
                       <uuid of location>:<full path in location>
    :param str relative_destination: Path relative to the currently processing
                                     space to move the files to.
    """
    processing_location = storage_service.get_location(purpose='CP')[0]
    transfer_sources = storage_service.get_location(purpose='TS')
    files = {l['uuid']: {'location': l, 'files': []} for l in transfer_sources}

    for item in paths:
        location, path = Path(item).parts()
        if location is None:
            location = _default_transfer_source_location_uuid()
        if location not in files:
            raise Exception('Location %(location)s is not associated'
                            ' with this pipeline' % {'location': location})

        # ``path`` will be a UTF-8 bytestring but the replacement pattern path
        # from ``files`` will be a Unicode object. Therefore, the latter must
        # be UTF-8 encoded prior. Same reasoning applies to ``destination``
        # below. This allows transfers to be started on UTF-8-encoded directory
        # names.
        source = path.replace(
            files[location]['location']['path'].encode('utf8'), '',
            1).lstrip('/')
        # Use the last segment of the path for the destination - basename for a
        # file, or the last folder if not. Keep the trailing / for folders.
        last_segment = os.path.basename(source.rstrip('/')) + '/' \
            if source.endswith('/') else os.path.basename(source)
        destination = os.path.join(processing_location['path'].encode('utf8'),
                                   relative_destination,
                                   last_segment).replace('%sharedPath%', '')
        files[location]['files'].append({
            'source': source,
            'destination': destination
        })
        logger.debug('source: %s, destination: %s', source, destination)

    message = []
    for item in files.values():
        reply, error = storage_service.copy_files(item['location'],
                                                  processing_location,
                                                  item['files'])
        if reply is None:
            message.append(str(error))
    if message:
        raise Exception('The following errors occured: %(message)s' %
                        {'message': ', '.join(message)})
示例#5
0
def ingest_grid(request):
    try:
        storage_service.get_location(purpose="BL")
    except:
        messages.warning(
            request,
            _('Error retrieving originals/arrange directory locations: is the storage server running? Please contact an administrator.'
              ))
    return render(
        request, 'ingest/grid.html', {
            "polling_interval": django_settings.POLLING_INTERVAL,
            "microservices_help": django_settings.MICROSERVICES_HELP,
            "job_statuses": dict(models.Job.STATUS),
        })
示例#6
0
def main(job, transfer_id, transfer_path, created_at):
    current_location = storage_service.get_location(purpose="CP")[0]
    backlog = storage_service.get_location(purpose="BL")[0]

    logger.info("Creating events...")
    _record_backlog_event(transfer_id, transfer_path, created_at)

    logger.info("Creating bag...")
    _create_bag(transfer_id, transfer_path)

    logger.info("Indexing the transfer...")
    _index_transfer(job, transfer_id, transfer_path)

    logger.info("Calculating size...")
    size = 0
    for dirpath, _, filenames in os.walk(transfer_path):
        for filename in filenames:
            file_path = os.path.join(dirpath, filename)
            size += os.path.getsize(file_path)

    # Make Transfer path relative to Location
    shared_path = os.path.join(current_location["path"], "")
    relative_transfer_path = transfer_path.replace(shared_path, "")

    # TODO this should use the same value as
    # dashboard/src/components/filesystem_ajax/views.py DEFAULT_BACKLOG_PATH
    transfer_name = os.path.basename(transfer_path.rstrip("/"))
    backlog_path = os.path.join("originals", transfer_name)

    logger.info("Moving transfer to backlog...")
    try:
        new_file = _create_file(
            transfer_id,
            current_location,
            relative_transfer_path,
            backlog,
            backlog_path,
            size,
        )
    except StorageServiceCreateFileError as err:
        errmsg = "Moving to backlog failed: {}.".format(err)
        logger.warning(errmsg)
        raise Exception(errmsg + " See logs for more details.")
    logger.info("Transfer moved (%s).", pprint.pformat(new_file))

    logger.info("Deleting transfer from processing space (%s)...", transfer_path)
    shutil.rmtree(transfer_path)
示例#7
0
def main(transfer_uuid, transfer_path):
    current_location = storage_service.get_location(purpose="CP")[0]
    backlog = storage_service.get_location(purpose="BL")[0]

    # Get size recursively for Transfer
    size = 0
    for dirpath, _, filenames in os.walk(transfer_path):
        for filename in filenames:
            file_path = os.path.join(dirpath, filename)
            size += os.path.getsize(file_path)

    # Make Transfer path relative to Location
    shared_path = os.path.join(current_location['path'], '')
    relative_transfer_path = transfer_path.replace(shared_path, '')

    # TODO this should use the same value as
    # dashboard/src/components/filesystem_ajax/views.py DEFAULT_BACKLOG_PATH
    transfer_name = os.path.basename(transfer_path.rstrip('/'))
    backlog_path = os.path.join('originals', transfer_name)

    (new_file, error_msg) = storage_service.create_file(
        uuid=transfer_uuid,
        origin_location=current_location['resource_uri'],
        origin_path=relative_transfer_path,
        current_location=backlog['resource_uri'],
        current_path=backlog_path,
        package_type='transfer',  # TODO use constant from storage service
        size=size,
    )
    if new_file is not None and new_file.get('status', '') != "FAIL":
        message = "Transfer moved to backlog: {}".format(new_file)
        logging.info(message)
        print(message)
        # TODO update transfer location?  Files location?

        # Delete transfer from processing space
        shutil.rmtree(transfer_path)
        return 0
    else:
        print(
            "Moving to backlog failed.  See Storage Service logs for more details",
            file=sys.stderr)
        print(error_msg or "Package status: Failed", file=sys.stderr)
        logging.warning(
            "Moving to backlog failed: {}.  See logs for more details.".format(
                error_msg))
        return 1
def get_aip_storage_locations(purpose):
    """ Return a dict of AIP Storage Locations and their descriptions."""
    storage_directories = storage_service.get_location(purpose=purpose)
    logging.debug("Storage Directories: {}".format(storage_directories))
    choices = {}
    for storage_dir in storage_directories:
        choices[storage_dir['description']] = storage_dir['resource_uri']
    print choices
示例#9
0
def sources(request):
    try:
        locations = storage_service.get_location(purpose="TS")
    except:
        messages.warning(request, _('Error retrieving locations: is the storage server running? Please contact an administrator.'))

    system_directory_description = 'Available transfer source'
    return render(request, 'administration/locations.html', locals())
示例#10
0
def transfer_source_locations(request):
    try:
        return helpers.json_response(storage_service.get_location(purpose="TS"))
    except:
        message = _("Error retrieving source directories")
        logger.exception(message)
        response = {"message": message, "status": "Failure"}
        return helpers.json_response(response, status_code=500)
示例#11
0
def get_storage_locations(purpose):
    try:
        dirs = storage_service.get_location(purpose=purpose)
        if len(dirs) == 0:
            raise Exception("Storage server improperly configured.")
    except Exception:
        dirs = []
    return dirs
示例#12
0
def storage(request):
    """Return storage service locations related with this pipeline.

    Exclude locations for currently processing, AIP recovery and SS internal
    purposes and disabled locations. Format used, quota and purpose values to
    human readable form.
    """
    try:
        response_locations = storage_service.get_location()
    except:
        messages.warning(
            request,
            _(
                "Error retrieving locations: is the storage server running? "
                "Please contact an administrator."
            ),
        )
        return render(request, "administration/locations.html")

    # Currently processing, AIP recovery and SS internal locations
    # are intentionally not included to not display them in the table.
    purposes = {
        "AS": _("AIP Storage"),
        "DS": _("DIP Storage"),
        "SD": _("FEDORA Deposits"),
        "BL": _("Transfer Backlog"),
        "TS": _("Transfer Source"),
        "RP": _("Replicator"),
    }

    # Filter and format locations
    locations = []
    for loc in response_locations:
        # Skip disabled locations
        if not loc["enabled"]:
            continue
        # Skip unwanted purposes
        if not loc["purpose"] or loc["purpose"] not in list(purposes.keys()):
            continue
        # Only show usage of AS and DS locations
        loc["show_usage"] = loc["purpose"] in ["AS", "DS"]
        if loc["show_usage"]:
            # Show unlimited for unset quotas
            if not loc["quota"]:
                loc["quota"] = _("unlimited")
            # Format bytes to human readable filesize
            else:
                loc["quota"] = filesizeformat(loc["quota"])
            if loc["used"]:
                loc["used"] = filesizeformat(loc["used"])
        # Format purpose
        loc["purpose"] = purposes[loc["purpose"]]
        locations.append(loc)

    # Sort by purpose
    locations.sort(key=lambda loc: loc["purpose"])

    return render(request, "administration/locations.html", {"locations": locations})
def get_aip_storage_locations(purpose, job):
    """ Return a dict of AIP Storage Locations and their descriptions."""
    storage_directories = storage_service.get_location(purpose=purpose)
    logger.debug("Storage Directories: {}".format(storage_directories))
    choices = {}
    for storage_dir in storage_directories:
        choices[storage_dir['description']] = storage_dir['resource_uri']
    choices['Default location'] = '/api/v2/location/default/{}/'.format(
        purpose)
    job.pyprint(choices)
示例#14
0
def copy_from_transfer_sources(paths, relative_destination):
    """
    Helper to copy files from transfer source locations to the currently processing location.

    Any files in locations not associated with this pipeline will be ignored.

    :param list paths: List of paths.  Each path should be formatted <uuid of location>:<full path in location>
    :param str relative_destination: Path relative to the currently processing space to move the files to.
    :returns: Tuple of (boolean error, message)
    """
    processing_location = storage_service.get_location(purpose='CP')[0]
    transfer_sources = storage_service.get_location(purpose='TS')
    files = {l['uuid']: {'location': l, 'files': []} for l in transfer_sources}

    for p in paths:
        try:
            location, path = p.split(':', 1)
        except ValueError:
            logging.debug('Path %s cannot be split into location:path', p)
            continue
        if location not in files:
            logging.debug('Location %s is not associated with this pipeline.', location)
            continue

        source = path.replace(files[location]['location']['path'], '', 1).lstrip('/')
        # Use the last segment of the path for the destination - basename for a
        # file, or the last folder if not. Keep the trailing / for folders.
        last_segment = os.path.basename(source.rstrip('/')) + '/' if source.endswith('/') else os.path.basename(source)
        destination = os.path.join(processing_location['path'],
            relative_destination, last_segment).replace('%sharedPath%', '')
        files[location]['files'].append({'source': source, 'destination': destination})
        logging.debug('source: %s, destination: %s', source, destination)

    message = []
    for pl in files.itervalues():
        reply, error = storage_service.copy_files(pl['location'], processing_location, pl['files'])
        if reply is None:
            message.append(str(error))
    if message:
        return True, 'The following errors occured: {}'.format(', '.join(message))
    else:
        return False, 'Files added successfully.'
示例#15
0
def main(transfer_uuid, transfer_path):
    current_location = storage_service.get_location(purpose="CP")[0]
    backlog = storage_service.get_location(purpose="BL")[0]

    # Get size recursively for Transfer
    size = 0
    for dirpath, _, filenames in os.walk(transfer_path):
        for filename in filenames:
            file_path = os.path.join(dirpath, filename)
            size += os.path.getsize(file_path)

    # Make Transfer path relative to Location
    shared_path = os.path.join(current_location['path'], '')
    relative_transfer_path = transfer_path.replace(shared_path, '')

    # TODO this should use the same value as
    # dashboard/src/components/filesystem_ajax/views.py DEFAULT_BACKLOG_PATH
    transfer_name = os.path.basename(transfer_path.rstrip('/'))
    backlog_path = os.path.join('originals', transfer_name)

    try:
        new_file = _create_file(transfer_uuid, current_location,
                                relative_transfer_path, backlog, backlog_path,
                                size)
    except Exception as e:
        print(
            'Moving to backlog failed.'
            ' See Storage Service logs for more details',
            file=sys.stderr)
        print(e, file=sys.stderr)
        logging.warning('Moving to backlog failed: {}. See Storage Service'
                        ' logs for more details.'.format(e))
        return 1

    message = "Transfer moved to backlog: {}".format(new_file)
    logging.info(message)
    print(message)
    # TODO update transfer location?  Files location?

    # Delete transfer from processing space
    shutil.rmtree(transfer_path)
    return 0
示例#16
0
def transfer_source_locations(request):
    try:
        return helpers.json_response(
            storage_service.get_location(purpose="TS"))
    except:
        message = _('Error retrieving source directories')
        logger.exception(message)
        response = {
            'message': message,
            'status': 'Failure',
        }
        return helpers.json_response(response, status_code=500)
示例#17
0
def storage(request):
    try:
        locations = storage_service.get_location(purpose="AS")
    except:
        messages.warning(
            request,
            _(
                "Error retrieving locations: is the storage server running? Please contact an administrator."
            ),
        )

    system_directory_description = "Available storage"
    return render(request, "administration/locations.html", locals())
示例#18
0
def ingest_metadata_add_files(request, sip_uuid):
    try:
        source_directories = storage_service.get_location(purpose="TS")
    except:
        messages.warning(request, 'Error retrieving source directories: is the storage server running? Please contact an administrator.')
    else:
        logging.debug("Source directories found: {}".format(source_directories))
        if not source_directories:
            msg = "No transfer source locations are available. Please contact an administrator."
            messages.warning(request, msg)
    # Get name of SIP from directory name of most recent job
    # Making list and slicing for speed: http://stackoverflow.com/questions/5123839/fastest-way-to-get-the-first-object-from-a-queryset-in-django
    jobs = list(models.Job.objects.filter(sipuuid=sip_uuid, subjobof='')[:1])
    name = utils.get_directory_name_from_job(jobs[0])

    return render(request, 'ingest/metadata_add_files.html', locals())
def get_aip_storage_locations(purpose, job):
    """ Return a dict of AIP Storage Locations and their descriptions."""
    storage_directories = storage_service.get_location(purpose=purpose)
    logger.debug("Storage Directories: {}".format(
        json.dumps(storage_directories, indent=4, sort_keys=True)))
    choices = {}
    for storage_dir in storage_directories:
        label = storage_dir['description']
        if not label:
            label = storage_dir['relative_path']
        choices[storage_dir['uuid']] = {
            "description": label,
            "uri": storage_dir['resource_uri']
        }
    choices['default'] = {
        "description": "Default Location",
        "uri": "/api/v2/location/default/{}/".format(purpose)
    }
    job.pyprint(json.dumps(choices, indent=4, sort_keys=True))
示例#20
0
def ingest_metadata_add_files(request, sip_uuid):
    try:
        source_directories = storage_service.get_location(purpose="TS")
    except:
        messages.warning(
            request,
            _('Error retrieving source directories: is the storage server running? Please contact an administrator.'
              ))
    else:
        logging.debug(
            "Source directories found: {}".format(source_directories))
        if not source_directories:
            messages.warning(
                request,
                _("No transfer source locations are available. Please contact an administrator."
                  ))
    # Get name of SIP from directory name of most recent job
    jobs = models.Job.objects.filter(sipuuid=sip_uuid, subjobof='')
    name = jobs.get_directory_name()

    return render(request, 'ingest/metadata_add_files.html', locals())
示例#21
0
def grid(request):
    try:
        source_directories = storage_service.get_location(purpose="TS")
    except:
        messages.warning(
            request,
            'Error retrieving source directories: is the storage server running? Please contact an administrator.'
        )
    else:
        logging.debug(
            "Source directories found: {}".format(source_directories))
        if not source_directories:
            msg = "No <a href='{source_admin}'>transfer source locations</a> are available. Please contact an administrator.".format(
                source_admin=reverse(
                    'components.administration.views.sources'))
            messages.warning(request, mark_safe(msg))

    polling_interval = django_settings.POLLING_INTERVAL
    microservices_help = django_settings.MICROSERVICES_HELP
    uid = request.user.id
    hide_features = helpers.hidden_features()
    return render(request, 'transfer/grid.html', locals())
示例#22
0
def _copy_files_to_arrange(
    sourcepath, destination, fetch_children=False, backlog_uuid=None
):
    sourcepath = sourcepath.lstrip("/")  # starts with 'originals/', not '/originals/'
    # Insert each file into the DB

    # Lots of error checking:
    if not sourcepath or not destination:
        raise ValueError(_("GET parameter 'filepath' or 'destination' was blank."))
    if not destination.startswith(DEFAULT_ARRANGE_PATH):
        raise ValueError(
            _("%(path)s must be in arrange directory.") % {"path": destination}
        )

    try:
        leaf_dir = sourcepath.split("/")[-2]
    except IndexError:
        leaf_dir = ""
    # Files cannot go into the top level folder,
    # and neither can the "objects" directory
    if destination == DEFAULT_ARRANGE_PATH and not (
        sourcepath.endswith("/") or leaf_dir == "objects"
    ):
        raise ValueError(
            _("%(path1)s must go in a SIP, cannot be dropped onto %(path2)s")
            % {"path1": sourcepath, "path2": DEFAULT_ARRANGE_PATH}
        )

    # Create new SIPArrange entry for each object being copied over
    if not backlog_uuid:
        backlog_uuid = storage_service.get_location(purpose="BL")[0]["uuid"]
    to_add = []

    # Construct the base arrange_path differently for files vs folders
    if sourcepath.endswith("/"):
        # If dragging objects/ folder, actually move the contents of (not
        # the folder itself)
        if leaf_dir == "objects":
            arrange_path = os.path.join(destination, "")
        else:
            # Strip UUID from transfer name
            uuid_regex = r"-[\w]{8}(-[\w]{4}){3}-[\w]{12}$"
            leaf_dir = re.sub(uuid_regex, "", leaf_dir)
            arrange_path = os.path.join(destination, leaf_dir) + "/"
            to_add.append(
                {
                    "original_path": None,
                    "arrange_path": arrange_path,
                    "file_uuid": None,
                    "transfer_uuid": None,
                }
            )
        if fetch_children:
            try:
                to_add.extend(
                    _get_arrange_directory_tree(backlog_uuid, sourcepath, arrange_path)
                )
            except storage_service.ResourceNotFound as e:
                raise ValueError(
                    _("Storage Service failed with the message: %(messsage)s")
                    % {"message": str(e)}
                )
    else:
        if destination.endswith("/"):
            arrange_path = os.path.join(destination, os.path.basename(sourcepath))
        else:
            arrange_path = destination
        relative_path = sourcepath.replace(DEFAULT_BACKLOG_PATH, "", 1)
        try:
            file_info = storage_service.get_file_metadata(relative_path=relative_path)[
                0
            ]
        except storage_service.ResourceNotFound:
            raise ValueError(
                _(
                    "No file information returned from the Storage Service for file at relative_path: %(path)s"
                )
                % {"path": relative_path}
            )
        file_uuid = file_info.get("fileuuid")
        transfer_uuid = file_info.get("sipuuid")
        to_add.append(
            {
                "original_path": sourcepath,
                "arrange_path": arrange_path,
                "file_uuid": file_uuid,
                "transfer_uuid": transfer_uuid,
            }
        )

    logger.info("arrange_path: %s", arrange_path)
    logger.debug("files to be added: %s", to_add)

    return to_add
示例#23
0
def copy_to_arrange(request):
    """ Add files from backlog to in-progress SIPs being arranged.

    sourcepath: GET parameter, path relative to this pipelines backlog. Leading
        '/'s are stripped
    destination: GET parameter, path within arrange folder, should start with
        DEFAULT_ARRANGE_PATH ('/arrange/')
    """
    # Insert each file into the DB

    error = None
    sourcepath  = base64.b64decode(request.POST.get('filepath', '')).lstrip('/')
    destination = base64.b64decode(request.POST.get('destination', ''))
    logging.info('copy_to_arrange: sourcepath: {}'.format(sourcepath))
    logging.info('copy_to_arrange: destination: {}'.format(destination))

    # Lots of error checking:
    if not sourcepath or not destination:
        error = "GET parameter 'filepath' or 'destination' was blank."
    if not destination.startswith(DEFAULT_ARRANGE_PATH):
        error = '{} must be in arrange directory.'.format(destination)
    # If drop onto a file, drop it into its parent directory instead
    if not destination.endswith('/'):
        destination = os.path.dirname(destination)
    # Files cannot go into the top level folder
    if destination == DEFAULT_ARRANGE_PATH and not sourcepath.endswith('/'):
        error = '{} must go in a SIP, cannot be dropped onto {}'.format(
            sourcepath, DEFAULT_ARRANGE_PATH)

    # Create new SIPArrange entry for each object being copied over
    if not error:
        # IDEA memoize the backlog location?
        backlog_uuid = storage_service.get_location(purpose='BL')[0]['uuid']
        to_add = []

        # Construct the base arrange_path differently for files vs folders
        if sourcepath.endswith('/'):
            leaf_dir = sourcepath.split('/')[-2]
            # If dragging objects/ folder, actually move the contents of (not
            # the folder itself)
            if leaf_dir == 'objects':
                arrange_path = os.path.join(destination, '')
            else:
                # Strip UUID from transfer name
                uuid_regex = r'-[\w]{8}(-[\w]{4}){3}-[\w]{12}$'
                leaf_dir = re.sub(uuid_regex, '', leaf_dir)
                arrange_path = os.path.join(destination, leaf_dir) + '/'
                to_add.append({'original_path': None,
                   'arrange_path': arrange_path,
                   'file_uuid': None,
                   'transfer_uuid': None
                })
            to_add.extend(_get_arrange_directory_tree(backlog_uuid, sourcepath, arrange_path))
        else:
            arrange_path = os.path.join(destination, os.path.basename(sourcepath))
            relative_path = sourcepath.replace(DEFAULT_BACKLOG_PATH, '', 1)
            try:
                file_info = storage_service.get_file_metadata(relative_path=relative_path)[0]
            except storage_service.ResourceNotFound:
                logging.warning('No file information returned from the Storage Service for file at relative_path: %s', relative_path)
                raise
            file_uuid = file_info.get('fileuuid')
            transfer_uuid = file_info.get('sipuuid')
            to_add.append({'original_path': sourcepath,
               'arrange_path': arrange_path,
               'file_uuid': file_uuid,
               'transfer_uuid': transfer_uuid
            })

        logging.info('copy_to_arrange: arrange_path: {}'.format(arrange_path))
        logging.debug('copy_to_arrange: files to be added: {}'.format(to_add))

        for entry in to_add:
            try:
                # TODO enforce uniqueness on arrange panel?
                models.SIPArrange.objects.create(
                    original_path=entry['original_path'],
                    arrange_path=entry['arrange_path'],
                    file_uuid=entry['file_uuid'],
                    transfer_uuid=entry['transfer_uuid'],
                )
            except IntegrityError:
                # FIXME Expecting this to catch duplicate original_paths, which
                # we want to ignore since a file can only be in one SIP.  Needs
                # to be updated not to ignore other classes of IntegrityErrors.
                logging.exception('Integrity error inserting: %s', entry)

    if error is not None:
        response = {
            'message': error,
            'error': True,
        }
    else:
        response = {'message': 'Files added to the SIP.'}

    return helpers.json_response(response)
示例#24
0
def store_aip(job, aip_destination_uri, aip_path, sip_uuid, sip_name,
              sip_type):
    """ Stores an AIP with the storage service.

    aip_destination_uri = storage service destination URI, should be of purpose
        AIP Store (AS)
    aip_path = Full absolute path to the AIP's current location on the local
        filesystem
    sip_uuid = UUID of the SIP, which will become the UUID of the AIP
    sip_name = SIP name.  Not used directly, but part of the AIP name

    Example inputs:
    storeAIP.py
        "/api/v1/location/9c2b5bb7-abd6-477b-88e0-57107219dace/"
        "/var/archivematica/sharedDirectory/currentlyProcessing/ep6-0737708e-9b99-471a-b331-283e2244164f/ep6-0737708e-9b99-471a-b331-283e2244164f.7z"
        "0737708e-9b99-471a-b331-283e2244164f"
        "ep6"
    """

    # FIXME Assume current Location is the one set up by default until location
    # is passed in properly, or use Agent to make sure is correct CP
    current_location = storage_service.get_location(purpose="CP")[0]

    # If ``aip_path`` does not exist, this may be a DIP that was not uploaded.
    # In that case, it will be in the uploadDIP/ directory instead of the
    # uploadedDIPs/ directory.
    if not os.path.exists(aip_path):
        aip_path = get_upload_dip_path(aip_path)

    # Make aip_path relative to the Location
    shared_path = os.path.join(current_location["path"],
                               "")  # Ensure ends with /
    relative_aip_path = aip_path.replace(shared_path, "")

    # Get the package type: AIC or AIP
    if "SIP" in sip_type or "AIP" in sip_type:  # Also matches AIP-REIN
        package_type = "AIP"
    elif "AIC" in sip_type:  # Also matches AIC-REIN
        package_type = "AIC"
    elif "DIP" in sip_type:
        package_type = "DIP"

    # Uncompressed directory AIPs must be terminated in a /,
    # otherwise the storage service will place the directory
    # inside another directory of the same name.
    current_path = os.path.basename(aip_path)
    if os.path.isdir(aip_path) and not aip_path.endswith("/"):
        relative_aip_path = relative_aip_path + "/"

    # DIPs cannot share the AIP UUID, as the storage service depends on
    # having a unique UUID; assign a new one before uploading.
    # TODO allow mapping the AIP UUID to the DIP UUID for retrieval.
    related_package_uuid = None
    if sip_type == "DIP":
        uuid = str(uuid4())
        job.pyprint(
            "Checking if DIP {} parent AIP has been created...".format(uuid))

        # Set related package UUID, so a relationship to the parent AIP can be
        # created if if AIP has been stored. If the AIP hasn't yet been stored
        # take note of the DIP's UUID so it the relationship can later be
        # created when the AIP is stored.
        try:
            storage_service.get_file_info(uuid=sip_uuid)[0]  # Check existence
            related_package_uuid = sip_uuid
            job.pyprint("Parent AIP exists so relationship can be created.")
        except IndexError:
            UnitVariable.objects.create(
                unittype="SIP",
                unituuid=sip_uuid,
                variable="relatedPackage",
                variablevalue=uuid,
            )
            job.pyprint(
                "Noting DIP UUID {} related to AIP so relationship can be created when AIP is stored."
                .format(uuid))
    else:
        uuid = sip_uuid
        try:
            related_package = UnitVariable.objects.get(
                unituuid=sip_uuid, variable="relatedPackage")
        except UnitVariable.DoesNotExist:
            pass
        else:
            related_package_uuid = related_package.variablevalue

    # If AIP is a directory, calculate size recursively
    if os.path.isdir(aip_path):
        size = 0
        for dirpath, _, filenames in os.walk(aip_path):
            for filename in filenames:
                file_path = os.path.join(dirpath, filename)
                size += os.path.getsize(file_path)
    else:
        size = os.path.getsize(aip_path)

    # Get the AIP subtype from any DC type attribute supplied by the user for
    # the AIP. If found, this will replace 'Archival Information Package' in
    # ``<mets:div TYPE='Archival Information Package'>`` in the pointer file.
    sip_metadata_uuid = "3e48343d-e2d2-4956-aaa3-b54d26eb9761"
    try:
        dc = DublinCore.objects.get(metadataappliestotype_id=sip_metadata_uuid,
                                    metadataappliestoidentifier=uuid)
    except DublinCore.DoesNotExist:
        aip_subtype = "Archival Information Package"
    else:
        aip_subtype = dc.type

    # Store the AIP
    try:
        new_file = _create_file(
            uuid,
            current_location,
            relative_aip_path,
            aip_destination_uri,
            current_path,
            package_type,
            aip_subtype,
            size,
            sip_type,
            related_package_uuid,
        )
    except StorageServiceCreateFileError as err:
        errmsg = "{} creation failed: {}.".format(sip_type, err)
        logger.warning(errmsg)
        raise Exception(errmsg + " See logs for more details.")

    message = "Storage Service created {}:\n{}".format(sip_type,
                                                       pformat(new_file))
    logger.info(message)
    job.pyprint(message)

    # Once the DIP is stored, remove it from the uploadDIP watched directory as
    # it will no longer need to be referenced from there by the user or the
    # system.
    rmtree_upload_dip_transitory_loc(package_type, aip_path)
    return 0
示例#25
0
def _copy_from_transfer_sources(paths, relative_destination):
    """
    Helper to copy files from transfer source locations to the currently processing location.

    Any files in locations not associated with this pipeline will be ignored.

    :param list paths: List of paths.  Each path should be formatted <uuid of location>:<full path in location>
    :param str relative_destination: Path relative to the currently processing space to move the files to.
    :returns: Tuple of (boolean error, message)
    """
    processing_location = storage_service.get_location(purpose="CP")[0]
    transfer_sources = storage_service.get_location(purpose="TS")
    files = {l["uuid"]: {"location": l, "files": []} for l in transfer_sources}

    for p in paths:
        try:
            location, path = p.split(":", 1)
        except ValueError:
            logger.warning("Path %s cannot be split into location:path", p)
            return True, "Path" + p + "cannot be split into location:path"
        if location not in files:
            logger.warning(
                "Location %s is not associated with this pipeline.", location
            )
            return (
                True,
                _("Location %(location)s is not associated with this pipeline")
                % {"location": location},
            )

        # ``path`` will be a UTF-8 bytestring but the replacement pattern path
        # from ``files`` will be a Unicode object. Therefore, the latter must
        # be UTF-8 encoded prior. Same reasoning applies to ``destination``
        # below. This allows transfers to be started on UTF-8-encoded directory
        # names.
        source = path.replace(
            files[location]["location"]["path"].encode("utf8"), "", 1
        ).lstrip("/")
        # Use the last segment of the path for the destination - basename for a
        # file, or the last folder if not. Keep the trailing / for folders.
        last_segment = (
            os.path.basename(source.rstrip("/")) + "/"
            if source.endswith("/")
            else os.path.basename(source)
        )
        destination = os.path.join(
            processing_location["path"].encode("utf8"),
            relative_destination,
            last_segment,
        ).replace("%sharedPath%", "")
        files[location]["files"].append({"source": source, "destination": destination})
        logger.debug("source: %s, destination: %s", source, destination)

    message = []
    for pl in files.values():
        reply, error = storage_service.copy_files(
            pl["location"], processing_location, pl["files"]
        )
        if reply is None:
            message.append(str(error))
    if message:
        return (
            True,
            _("The following errors occured: %(message)s")
            % {"message": ", ".join(message)},
        )
    else:
        return False, _("Files added successfully.")
示例#26
0
def store_aip(aip_destination_uri, aip_path, sip_uuid, sip_name, sip_type):
    """ Stores an AIP with the storage service.

    aip_destination_uri = storage service destination URI, should be of purpose
        AIP Store (AS)
    aip_path = Full absolute path to the AIP's current location on the local
        filesystem
    sip_uuid = UUID of the SIP, which will become the UUID of the AIP
    sip_name = SIP name.  Not used directly, but part of the AIP name

    Example inputs:
    storeAIP.py
        "/api/v1/location/9c2b5bb7-abd6-477b-88e0-57107219dace/"
        "/var/archivematica/sharedDirectory/currentlyProcessing/ep6-0737708e-9b99-471a-b331-283e2244164f/ep6-0737708e-9b99-471a-b331-283e2244164f.7z"
        "0737708e-9b99-471a-b331-283e2244164f"
        "ep6"
    """

    # FIXME Assume current Location is the one set up by default until location
    # is passed in properly, or use Agent to make sure is correct CP
    current_location = storage_service.get_location(purpose="CP")[0]

    # If ``aip_path`` does not exist, this may be a DIP that was not uploaded.
    # In that case, it will be in the uploadDIP/ directory instead of the
    # uploadedDIPs/ directory.
    if not os.path.exists(aip_path):
        aip_path = get_upload_dip_path(aip_path)

    # Make aip_path relative to the Location
    shared_path = os.path.join(current_location['path'],
                               '')  # Ensure ends with /
    relative_aip_path = aip_path.replace(shared_path, '')

    # Get the package type: AIC or AIP
    if 'SIP' in sip_type or 'AIP' in sip_type:  # Also matches AIP-REIN
        package_type = "AIP"
    elif 'AIC' in sip_type:  # Also matches AIC-REIN
        package_type = 'AIC'
    elif 'DIP' in sip_type:
        package_type = 'DIP'

    # Uncompressed directory AIPs must be terminated in a /,
    # otherwise the storage service will place the directory
    # inside another directory of the same name.
    current_path = os.path.basename(aip_path)
    if os.path.isdir(aip_path) and not aip_path.endswith('/'):
        relative_aip_path = relative_aip_path + '/'

    # DIPs cannot share the AIP UUID, as the storage service depends on
    # having a unique UUID; assign a new one before uploading.
    # TODO allow mapping the AIP UUID to the DIP UUID for retrieval.
    related_package_uuid = None
    if sip_type == 'DIP':
        uuid = str(uuid4())
        print('Checking if DIP {} parent AIP has been created...'.format(uuid))

        # Set related package UUID, so a relationship to the parent AIP can be
        # created if if AIP has been stored. If the AIP hasn't yet been stored
        # take note of the DIP's UUID so it the relationship can later be
        # created when the AIP is stored.
        try:
            storage_service.get_file_info(uuid=sip_uuid)[0]  # Check existence
            related_package_uuid = sip_uuid
            print('Parent AIP exists so relationship can be created.')
        except IndexError:
            UnitVariable.objects.create(unittype='SIP',
                                        unituuid=sip_uuid,
                                        variable='relatedPackage',
                                        variablevalue=uuid)
            print(
                'Noting DIP UUID {} related to AIP so relationship can be created when AIP is stored.'
                .format(uuid))
    else:
        uuid = sip_uuid
        related_package = get_object_or_None(UnitVariable,
                                             unituuid=sip_uuid,
                                             variable='relatedPackage')
        related_package_uuid = related_package.variablevalue if related_package is not None else None

    # If AIP is a directory, calculate size recursively
    if os.path.isdir(aip_path):
        size = 0
        for dirpath, _, filenames in os.walk(aip_path):
            for filename in filenames:
                file_path = os.path.join(dirpath, filename)
                size += os.path.getsize(file_path)
    else:
        size = os.path.getsize(aip_path)

    # Get the AIP subtype from any DC type attribute supplied by the user for
    # the AIP. If found, this will replace 'Archival Information Package' in
    # ``<mets:div TYPE='Archival Information Package'>`` in the pointer file.
    sip_metadata_uuid = '3e48343d-e2d2-4956-aaa3-b54d26eb9761'
    try:
        dc = DublinCore.objects.get(metadataappliestotype_id=sip_metadata_uuid,
                                    metadataappliestoidentifier=uuid)
    except DublinCore.DoesNotExist:
        aip_subtype = 'Archival Information Package'
    else:
        aip_subtype = dc.type

    # Store the AIP
    (new_file, error_msg) = storage_service.create_file(
        uuid=uuid,
        origin_location=current_location['resource_uri'],
        origin_path=relative_aip_path,
        current_location=aip_destination_uri,
        current_path=current_path,
        package_type=package_type,
        aip_subtype=aip_subtype,
        size=size,
        update='REIN' in sip_type,
        related_package_uuid=related_package_uuid,
        events=get_events_from_db(uuid),
        agents=get_agents_from_db(uuid))

    if new_file is not None and new_file.get('status', '') != "FAIL":
        message = "Storage service created {}: {}".format(sip_type, new_file)
        LOGGER.info(message)
        print(message)
        sys.exit(0)
    else:
        print("{} creation failed.  See Storage Service logs for more details".
              format(sip_type),
              file=sys.stderr)
        print(error_msg or "Package status: Failed", file=sys.stderr)
        LOGGER.warning(
            "{} unabled to be created: {}.  See logs for more details.".format(
                sip_type, error_msg))
        sys.exit(1)
示例#27
0
def copy_files_to_arrange(sourcepath,
                          destination,
                          fetch_children=False,
                          backlog_uuid=None):
    sourcepath = sourcepath.lstrip(
        '/')  # starts with 'originals/', not '/originals/'
    # Insert each file into the DB

    # Lots of error checking:
    if not sourcepath or not destination:
        raise ValueError(
            _("GET parameter 'filepath' or 'destination' was blank."))
    if not destination.startswith(DEFAULT_ARRANGE_PATH):
        raise ValueError(
            _('%(path)s must be in arrange directory.') %
            {'path': destination})

    try:
        leaf_dir = sourcepath.split('/')[-2]
    except IndexError:
        leaf_dir = ''
    # Files cannot go into the top level folder,
    # and neither can the "objects" directory
    if destination == DEFAULT_ARRANGE_PATH and not (sourcepath.endswith('/')
                                                    or leaf_dir == 'objects'):
        raise ValueError(
            _('%(path1)s must go in a SIP, cannot be dropped onto %(path2)s') %
            {
                'path1': sourcepath,
                'path2': DEFAULT_ARRANGE_PATH
            })

    # Create new SIPArrange entry for each object being copied over
    if not backlog_uuid:
        backlog_uuid = storage_service.get_location(purpose='BL')[0]['uuid']
    to_add = []

    # Construct the base arrange_path differently for files vs folders
    if sourcepath.endswith('/'):
        # If dragging objects/ folder, actually move the contents of (not
        # the folder itself)
        if leaf_dir == 'objects':
            arrange_path = os.path.join(destination, '')
        else:
            # Strip UUID from transfer name
            uuid_regex = r'-[\w]{8}(-[\w]{4}){3}-[\w]{12}$'
            leaf_dir = re.sub(uuid_regex, '', leaf_dir)
            arrange_path = os.path.join(destination, leaf_dir) + '/'
            to_add.append({
                'original_path': None,
                'arrange_path': arrange_path,
                'file_uuid': None,
                'transfer_uuid': None,
            })
        if fetch_children:
            try:
                to_add.extend(
                    _get_arrange_directory_tree(backlog_uuid, sourcepath,
                                                arrange_path))
            except storage_service.ResourceNotFound as e:
                raise ValueError(
                    _('Storage Service failed with the message: %(messsage)s')
                    % {'message': str(e)})
    else:
        if destination.endswith('/'):
            arrange_path = os.path.join(destination,
                                        os.path.basename(sourcepath))
        else:
            arrange_path = destination
        relative_path = sourcepath.replace(DEFAULT_BACKLOG_PATH, '', 1)
        try:
            file_info = storage_service.get_file_metadata(
                relative_path=relative_path)[0]
        except storage_service.ResourceNotFound:
            raise ValueError(
                _('No file information returned from the Storage Service for file at relative_path: %(path)s'
                  ) % {'path': relative_path})
        file_uuid = file_info.get('fileuuid')
        transfer_uuid = file_info.get('sipuuid')
        to_add.append({
            'original_path': sourcepath,
            'arrange_path': arrange_path,
            'file_uuid': file_uuid,
            'transfer_uuid': transfer_uuid
        })

    logger.info('arrange_path: %s', arrange_path)
    logger.debug('files to be added: %s', to_add)

    for entry in to_add:
        try:
            # TODO enforce uniqueness on arrange panel?
            models.SIPArrange.objects.create(
                original_path=entry['original_path'],
                arrange_path=entry['arrange_path'],
                file_uuid=entry['file_uuid'],
                transfer_uuid=entry['transfer_uuid'],
            )
        except IntegrityError:
            # FIXME Expecting this to catch duplicate original_paths, which
            # we want to ignore since a file can only be in one SIP.  Needs
            # to be updated not to ignore other classes of IntegrityErrors.
            logger.exception('Integrity error inserting: %s', entry)
示例#28
0
def copy_to_arrange(request,
                    sources=None,
                    destinations=None,
                    fetch_children=False):
    """
    Add files to in-progress SIPs being arranged.

    Files being copied can be located in either the backlog or in another SIP being arranged.

    If sources or destinations are strs not a list, they will be converted into a list and fetch_children will be set to True.

    :param list sources: List of paths relative to this pipelines backlog. If None, will look for filepath[] or filepath
    :param list destinations: List of paths within arrange folder. All paths should start with DEFAULT_ARRANGE_PATH
    :param bool fetch_children: If True, will fetch all children of the provided path(s) to copy to the destination.
    """
    if isinstance(sources, six.string_types) or isinstance(
            destinations, six.string_types):
        fetch_children = True
        sources = [sources]
        destinations = [destinations]

    if sources is None or destinations is None:
        # List of sources & destinations
        if 'filepath[]' in request.POST or 'destination[]' in request.POST:
            sources = map(base64.b64decode,
                          request.POST.getlist('filepath[]', []))
            destinations = map(base64.b64decode,
                               request.POST.getlist('destination[]', []))
        # Single path representing tree
        else:
            fetch_children = True
            sources = [base64.b64decode(request.POST.get('filepath', ''))]
            destinations = [
                base64.b64decode(request.POST.get('destination', ''))
            ]
    logger.info('sources: %s', sources)
    logger.info('destinations: %s', destinations)

    # The DEFAULT_BACKLOG_PATH constant is missing a leading slash for
    # historical reasons; TODO change this at some point.
    # External paths passed into these views are in the format
    # /originals/, whereas copy_from_arrange_to_completed constructs
    # paths without a leading slash as an implementation detail
    # (to communicate with the Storage Service).
    # Possibly the constant used to refer to externally-constructed
    # paths and the one used solely internally should be two different
    # constants.
    if sources[0].startswith('/' + DEFAULT_BACKLOG_PATH):
        action = 'copy'
        backlog_uuid = storage_service.get_location(purpose='BL')[0]['uuid']
    elif sources[0].startswith(DEFAULT_ARRANGE_PATH):
        action = 'move'
    else:
        logger.error(
            'Filepath %s is not in base backlog path nor arrange path',
            sources[0])
        return helpers.json_response({
            'error': True,
            'message':
            _('%(path)s is not in base backlog path nor arrange path') % {
                'path': sources[0]
            }
        })

    try:
        for source, dest in zip(sources, destinations):
            if action == 'copy':
                copy_files_to_arrange(source,
                                      dest,
                                      fetch_children=fetch_children,
                                      backlog_uuid=backlog_uuid)
                response = {'message': _('Files added to the SIP.')}
                status_code = 201
            elif action == 'move':
                move_files_within_arrange(source, dest)
                response = {'message': _('SIP files successfully moved.')}
                status_code = 200
    except ValueError as e:
        logger.exception('Failed copying %s to %s', source, dest)
        response = {
            'message': str(e),
            'error': True,
        }
        status_code = 400
    else:
        response = {'message': _('Files added to the SIP.')}
        status_code = 201

    return helpers.json_response(response, status_code=status_code)
示例#29
0
def index(request):
    file_path = helpers.default_processing_config_path()

    # Lists of dicts declare what options to display, and where to look for
    # the options
    # name: Value of the `name` attribute in the <input> HTML element
    # choice_uuid: UUID of the microservice chainlink at which the choice occurs
    # label: Human-readable label to be displayed to the user
    # yes_option and no_option: UUIDs for the yes and no choice chains, respectively
    boolean_select_fields = [
        {
            "name": "quarantine_transfer",
            "choice_uuid":
            "755b4177-c587-41a7-8c52-015277568302",  # Workflow decision - send transfer to quarantine
            "label": "Send transfer to quarantine",
            "yes_option": "97ea7702-e4d5-48bc-b4b5-d15d897806ab",  # Quarantine
            "no_option":
            "d4404ab1-dc7f-4e9e-b1f8-aa861e766b8e"  # Skip quarantine
        },
        {
            "name": "normalize_transfer",
            "choice_uuid": "de909a42-c5b5-46e1-9985-c031b50e9d30",
            "label": "Approve normalization",
            "yes_option": "1e0df175-d56d-450d-8bee-7df1dc7ae815",  # Approve
            "action": "Approve"
        },
        {
            "name": "store_aip",
            "choice_uuid": "2d32235c-02d4-4686-88a6-96f4d6c7b1c3",
            "label": "Store AIP",
            "yes_option": "9efab23c-31dc-4cbd-a39d-bb1665460cbe",  # Store AIP
            "action": "Store AIP"
        },
        {
            "name": "transcribe_file",
            "choice_uuid": "7079be6d-3a25-41e6-a481-cee5f352fe6e",
            "label": "Transcribe files (OCR)",
            "yes_option": "5a9985d3-ce7e-4710-85c1-f74696770fa9",
            "no_option": "1170e555-cd4e-4b2f-a3d6-bfb09e8fcc53",
        },
        {
            "name": "tree",
            "choice_uuid": "56eebd45-5600-4768-a8c2-ec0114555a3d",
            "label": "Generate transfer structure report",
            "yes_option":
            "df54fec1-dae1-4ea6-8d17-a839ee7ac4a7",  # Generate transfer structure report
            "no_option": "e9eaef1e-c2e0-4e3b-b942-bfb537162795",
            "action": "Generate transfer structure report"
        },
    ]

    # name: Value of the `name` attribute in the <input> HTML element
    # label: Human-readable label to be displayed to the user
    # choice_uuid: UUID of the microservice chainlink at which the choice occurs
    chain_choice_fields = [
        {
            "name": "create_sip",
            "label": "Create SIP(s)",
            "choice_uuid": "bb194013-597c-4e4a-8493-b36d190f8717"
        },
        {
            "name": "extract_packages",
            "label": "Extract packages",
            "choice_uuid": "dec97e3c-5598-4b99-b26e-f87a435a6b7f",
            "uuid": "01d80b27-4ad1-4bd1-8f8d-f819f18bf685"
        },
        {
            "name": "normalize",
            "label": "Normalize",
            "choice_uuid": "cb8e5706-e73f-472f-ad9b-d1236af8095f",
        },
        {
            "name": "reminder",
            "label": "Reminder: add metadata if desired",
            "choice_uuid": "eeb23509-57e2-4529-8857-9d62525db048",
        },
        {
            "name": "examine",
            "label": "Examine contents",
            "choice_uuid": "accea2bf-ba74-4a3a-bb97-614775c74459"
        },
    ]

    populate_select_fields_with_chain_choice_options(chain_choice_fields)

    # name: Value of the `name` attribute in the <input> HTML element
    # choice_uuid: UUID of the microservice chainlink at which the choice occurs
    replace_dict_fields = [{
        "name":
        "select_format_id_tool_transfer",
        "label":
        "Select file format identification command (Transfer)",
        "choice_uuid":
        'f09847c2-ee51-429a-9478-a860477f6b8d'
    }, {
        "name":
        "select_format_id_tool_ingest",
        "label":
        "Select file format identification command (Ingest)",
        "choice_uuid":
        '7a024896-c4f7-4808-a240-44c87c762bc5'
    }, {
        "name":
        "select_format_id_tool_submissiondocs",
        "label":
        "Select file format identification command (Submission documentation & metadata)",
        "choice_uuid":
        '087d27be-c719-47d8-9bbb-9a7d8b609c44'
    }, {
        "name": "delete_packages",
        "label": "Delete packages after extraction",
        "choice_uuid": "f19926dd-8fb5-4c79-8ade-c83f61f55b40",
        "uuid": "85b1e45d-8f98-4cae-8336-72f40e12cbef"
    }, {
        "name":
        "compression_algo",
        "label":
        "Select compression algorithm",
        "choice_uuid":
        "01d64f58-8295-4b7b-9cab-8f1b153a504f"
    }, {
        "name":
        "compression_level",
        "label":
        "Select compression level",
        "choice_uuid":
        "01c651cb-c174-4ba4-b985-1d87a44d6754"
    }]

    def storage_dir_cb(storage_dir):
        return {
            'value': storage_dir['resource_uri'],
            'label': storage_dir['description']
        }

    """ Return a dict of AIP Storage Locations and their descriptions."""
    storage_directory_options = [{'value': '', 'label': '--Actions--'}]
    dip_directory_options = [{'value': '', 'label': '--Actions--'}]
    try:
        storage_directories = storage_service.get_location(purpose="AS")
        dip_directories = storage_service.get_location(purpose="DS")
        if None in (storage_directories, dip_directories):
            raise Exception("Storage server improperly configured.")
    except Exception:
        messages.warning(
            request,
            'Error retrieving AIP/DIP storage locations: is the storage server running? Please contact an administrator.'
        )
    else:
        storage_directory_options += [
            storage_dir_cb(d) for d in storage_directories
        ]
        dip_directory_options += [storage_dir_cb(d) for d in dip_directories]

    storage_service_options = [
        {
            "name": "store_aip_location",
            "label": "Store AIP location",
            "choice_uuid": "b320ce81-9982-408a-9502-097d0daa48fa",
            "options": storage_directory_options,
            # Unlike other options, the correct value here is a literal string,
            # not a pointer to a chain or dict in the database.
            "do_not_lookup": True
        },
        {
            "name": "store_dip_location",
            "label": "Store DIP location",
            "choice_uuid": "b7a83da6-ed5a-47f7-a643-1e9f9f46e364",
            "options": dip_directory_options,
            # Unlike other options, the correct value here is a literal string,
            # not a pointer to a chain or dict in the database.
            "do_not_lookup": True
        }
    ]

    populate_select_fields_with_replace_dict_options(replace_dict_fields)

    select_fields = chain_choice_fields + replace_dict_fields + storage_service_options

    if request.method == 'POST':
        # render XML using request data
        xmlChoices = PreconfiguredChoices()

        # use toggle field submissions to add to XML
        for field in boolean_select_fields:
            enabled = request.POST.get(field['name'])
            if enabled == 'yes':
                if 'yes_option' in field:
                    # can be set to either yes or no
                    toggle = request.POST.get(field['name'] + '_toggle', '')
                    if toggle == 'yes':
                        go_to_chain_text = field['yes_option']
                    elif 'no_option' in field:
                        go_to_chain_text = field['no_option']

                    if 'no_option' in field:
                        xmlChoices.add_choice(field['choice_uuid'],
                                              go_to_chain_text)
                    else:
                        if toggle == 'yes':
                            xmlChoices.add_choice(field['choice_uuid'],
                                                  go_to_chain_text)

        # set quarantine duration if applicable
        quarantine_expiry_enabled = request.POST.get(
            'quarantine_expiry_enabled', '')
        quarantine_expiry = request.POST.get('quarantine_expiry', '')
        if quarantine_expiry_enabled == 'yes' and quarantine_expiry != '':
            xmlChoices.add_choice(
                '19adb668-b19a-4fcb-8938-f49d7485eaf3',  # Remove from quarantine
                '333643b7-122a-4019-8bef-996443f3ecc5',  # Unquarantine
                str(float(quarantine_expiry) * (24 * 60 * 60)))

        # use select field submissions to add to XML
        for field in select_fields:
            enabled = request.POST.get(field['name'] + '_enabled')
            if enabled == 'yes':
                field_value = request.POST.get(field['name'], '')
                if field_value != '':
                    if field.get('do_not_lookup', False):
                        target = field_value
                    else:
                        target = uuid_from_description(field_value,
                                                       field['choice_uuid'])

                    xmlChoices.add_choice(field['choice_uuid'], target)

        xmlChoices.write_to_file(file_path)

        messages.info(request, 'Saved!')

        return redirect('components.administration.views.processing')
    else:
        debug = request.GET.get('debug', '')
        quarantine_expiry = ''

        file = open(file_path, 'r')
        xml = file.read()

        # parse XML to work out locals()
        root = etree.fromstring(xml)
        choices = root.findall('.//preconfiguredChoice')

        for item in boolean_select_fields:
            item['checked'] = ''
            item['yes_checked'] = ''
            item['no_checked'] = ''

        for choice in choices:
            applies_to = choice.find('appliesTo').text
            go_to_chain = choice.find('goToChain').text

            # use toggle field submissions to add to XML
            for field in boolean_select_fields:
                if applies_to == field['choice_uuid']:
                    set_field_property_by_name(boolean_select_fields,
                                               field['name'], 'checked',
                                               'checked')

                    if 'yes_option' in field:
                        if go_to_chain == field['yes_option']:
                            set_field_property_by_name(boolean_select_fields,
                                                       field['name'],
                                                       'yes_checked',
                                                       'selected')
                        else:
                            set_field_property_by_name(boolean_select_fields,
                                                       field['name'],
                                                       'no_checked',
                                                       'selected')

            # a quarantine expiry was found
            if applies_to == 'Remove from quarantine':
                quarantine_expiry_enabled_checked = 'checked'
                quarantine_expiry = float(
                    choice.find('delay').text) / (24 * 60 * 60)

            # check select fields for defaults
            for field in select_fields:
                if applies_to == field['choice_uuid'] and go_to_chain:
                    try:
                        chain = models.MicroServiceChain.objects.get(
                            pk=go_to_chain)
                        choice = chain.description
                    except models.MicroServiceChain.DoesNotExist:
                        try:
                            choice = models.MicroServiceChoiceReplacementDic.objects.get(
                                pk=go_to_chain).description
                        except models.MicroServiceChoiceReplacementDic.DoesNotExist:
                            # fallback for storage service options, which are
                            # strings that don't map to chains or dicts in
                            # the database
                            choice = go_to_chain

                    field['selected'] = choice
                    field['checked'] = 'checked'

    hide_features = hidden_features()
    return render(request, 'administration/processing.html', locals())
示例#30
0
def copy_to_arrange(request, sources=None, destinations=None, fetch_children=False):
    """
    Add files to in-progress SIPs being arranged.

    Files being copied can be located in either the backlog or in another SIP being arranged.

    If sources or destinations are strs not a list, they will be converted into a list and fetch_children will be set to True.

    :param list sources: List of paths relative to this pipelines backlog. If None, will look for filepath[] or filepath
    :param list destinations: List of paths within arrange folder. All paths should start with DEFAULT_ARRANGE_PATH
    :param bool fetch_children: If True, will fetch all children of the provided path(s) to copy to the destination.
    """
    if isinstance(sources, six.string_types) or isinstance(
        destinations, six.string_types
    ):
        fetch_children = True
        sources = [sources]
        destinations = [destinations]

    if sources is None or destinations is None:
        # List of sources & destinations
        if "filepath[]" in request.POST or "destination[]" in request.POST:
            sources = map(base64.b64decode, request.POST.getlist("filepath[]", []))
            destinations = map(
                base64.b64decode, request.POST.getlist("destination[]", [])
            )
        # Single path representing tree
        else:
            fetch_children = True
            sources = [base64.b64decode(request.POST.get("filepath", ""))]
            destinations = [base64.b64decode(request.POST.get("destination", ""))]
    logger.info("sources: %s", sources)
    logger.info("destinations: %s", destinations)

    # The DEFAULT_BACKLOG_PATH constant is missing a leading slash for
    # historical reasons; TODO change this at some point.
    # External paths passed into these views are in the format
    # /originals/, whereas copy_from_arrange_to_completed constructs
    # paths without a leading slash as an implementation detail
    # (to communicate with the Storage Service).
    # Possibly the constant used to refer to externally-constructed
    # paths and the one used solely internally should be two different
    # constants.
    if sources[0].startswith("/" + DEFAULT_BACKLOG_PATH):
        action = "copy"
        backlog_uuid = storage_service.get_location(purpose="BL")[0]["uuid"]
    elif sources[0].startswith(DEFAULT_ARRANGE_PATH):
        action = "move"
    else:
        logger.error(
            "Filepath %s is not in base backlog path nor arrange path", sources[0]
        )
        return helpers.json_response(
            {
                "error": True,
                "message": _("%(path)s is not in base backlog path nor arrange path")
                % {"path": sources[0]},
            }
        )

    entries_to_copy = []
    try:
        for source, dest in zip(sources, destinations):
            if action == "copy":
                entries = _copy_files_to_arrange(
                    source,
                    dest,
                    fetch_children=fetch_children,
                    backlog_uuid=backlog_uuid,
                )
                for entry in entries:
                    entries_to_copy.append(
                        models.SIPArrange(
                            original_path=entry["original_path"],
                            arrange_path=entry["arrange_path"],
                            file_uuid=entry["file_uuid"],
                            transfer_uuid=entry["transfer_uuid"],
                        )
                    )
            elif action == "move":
                _move_files_within_arrange(source, dest)
                response = {"message": _("SIP files successfully moved.")}
                status_code = 200
        if entries_to_copy:
            models.SIPArrange.objects.bulk_create(
                entries_to_copy, BULK_CREATE_BATCH_SIZE
            )
    except ValueError as e:
        logger.exception("Failed copying %s to %s", source, dest)
        response = {"message": str(e), "error": True}
        status_code = 400
    else:
        response = {"message": _("Files added to the SIP.")}
        status_code = 201

    return helpers.json_response(response, status_code=status_code)