Пример #1
0
    def find_by_collection_number(num, parent=None):
        '''Find a CollectionObject in Fedora by collection number (or
        source id), optionally limited by parent collection (owning
        archive).

        :param num: collection number to search for (aka source id)
        :param parent: optional; archive that the collection must belong to
        :return: generator of any matching items, as instances of
            :class:`CollectionObject`
        '''
        solr = solr_interface()
        solrquery = solr.query(content_model=CollectionObject.COLLECTION_CONTENT_MODEL,
                               pid='%s:*' % settings.FEDORA_PIDSPACE,
                               source_id=int(num))
        # if parent is specified, restrict by archive id (parent should be a pid)
        if parent is not None:
            # remove prefix on parent
            prefix ='info:fedora/'
            if parent.startswith(prefix):
                parent = parent[12:]
            solrquery = solrquery.query(archive_id=parent)
        # by default, only returns 10; get everything
        # - solr response is a list of dictionary with collection info
        # use dictsort in template for sorting where appropriate
        collections = solrquery.paginate(start=0, rows=1000).execute()

        # return a generator of matching items, as instances of CollectionObject
        repo = Repository()
        for coll in collections:
            yield repo.get_object(coll['pid'], type=CollectionObject)
Пример #2
0
    def archives(format=None):
        """Find Archives objects, to which CollectionObjects belong.

        :returns: list of :class:`CollectionObject`
        :rtype: list
        """
        # NOTE: formerly called top-level collections or Repository /
        # Owning Repository; should now be called archive and labeled
        # as such anywhere user-facing

        # TODO: search logic very similar to item_collections and
        # subcollections methods; consider refactoring search logic
        # into a common search method.

        if CollectionObject._archives is None:
            # find all objects with cmodel collection-1.1 and no parents

            # search solr for collection objects with NO parent collection id
            solr = solr_interface()
            # NOTE: not filtering on pidspace, since top-level objects are loaded as fixtures
            # and may not match the configured pidspace in a dev environment
            solrquery = solr.query(content_model=CollectionObject.COLLECTION_CONTENT_MODEL)
            collections = solrquery.exclude(archive_id__any=True).sort_by('title_exact').execute()
            # store the solr response format
            CollectionObject._archives = collections

        if format == dict:
            return CollectionObject._archives

        # otherwise, initialize as instances of CollectionObject
        repo = Repository()
        return [repo.get_object(arch['pid'], type=CollectionObject)
                                                       for arch in CollectionObject._archives]
Пример #3
0
def view(request, pid):
    '''View a single :class:`~keep.video.models.Video`.
    User must either have general view video permissions, or if they have
    view researcher view, the object must be researcher accessible
    (based on rights codes).
    '''
    repo = Repository(request=request)
    obj = repo.get_object(pid=pid, type=Video)
    # # user either needs view video permissions OR
    # # if they can view researcher audio and object must be researcher-accessible

    viewable = request.user.has_perm('video.view_video') or \
        (request.user.has_perm('video.view_researcher_video') and
         bool(obj.researcher_access))

    if not viewable:
        return prompt_login_or_403(request)

    try:
        if not obj.has_requisite_content_models:
            raise Http404
    except:
        raise Http404

    return render(request, 'video/view.html', {"resource": obj})
Пример #4
0
def view_audit_trail(request, pid):
    'Access XML audit trail for an audio object'
    # initialize local repo with logged-in user credentials & call eulfedora view
    # FIXME: redundant across collection/arrangement/audio apps; consolidate?
    return raw_audit_trail(request,
                           pid,
                           type=AudioObject,
                           repo=Repository(request=request))
Пример #5
0
    def setUp(self):
        self.repo = Repository()
        self.pids = []

        # test EmailMessage
        self.email = self.repo.get_object(type=EmailMessage)
        self.email.cerp.content.from_list = ['*****@*****.**']
        self.email.cerp.content.to_list = ['*****@*****.**']
        self.email.cerp.content.subject_list = ['Interesting Subject']
Пример #6
0
 def all():
     'Find all Audio objects by content model within the configured pidspace.'
     search_opts = {
         'type': AudioObject,
         # restrict to objects in configured pidspace
         'pid__contains': '%s:*' % settings.FEDORA_PIDSPACE,
         # restrict by cmodel in dc:format
         'format__contains': AudioObject.AUDIO_CONTENT_MODEL,
     }
     repo = Repository()
     return repo.find_objects(**search_opts)
Пример #7
0
 def simple_collection(label=None, status=None, pid=None):
     repo = Repository()
     obj = repo.get_object(type=SimpleCollection)
     if label is not None:
         obj.label = label
     obj.mods.content.create_restrictions_on_access()
     if status is not None:
         obj.mods.content.restrictions_on_access.text = status
     if pid is not None:
         obj.pid = pid
     return obj
Пример #8
0
def download(request, pid):
    'Download disk image datastream contents'
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=DiskImage)
    extra_headers = {
        'Content-Disposition': "attachment; filename=%s.%s" % \
            (obj.noid, obj.provenance.content.object.latest_format.name)
    }
    return raw_datastream(request,
                          pid,
                          DiskImage.content.id,
                          repo=repo,
                          headers=extra_headers)
Пример #9
0
 def englishdocs_collection():
     repo = Repository()
     obj = repo.get_object(type=CollectionObject)
     obj.label = 'English documents collection'
     obj.mods.content.title = 'English documents collection'
     obj.mods.content.source_id = '309'
     obj.collection = repo.get_object(FedoraFixtures.archives()[1].uri)
     obj.mods.content.create_origin_info()
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=1509, point='start'))
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=1805, point='end'))
     return obj
Пример #10
0
def _objects_by_type(type_uri, type=None):
    """
    Returns a list of objects with the specified type_uri as objects of the specified type
    :param type_uri: The uri of the type being searched
    :param type: The type of object that should be returned
    """
    repo = Repository()

    pids = repo.risearch.get_subjects(RDF.type, type_uri)
    pids_list = list(pids)

    for pid in pids_list:
        yield repo.get_object(pid=pid, type=type)
Пример #11
0
def tasks(request, pid):
    '''Manage tasks associated with an :class:`~keep.audio.models.AudioObject`.
    Currently, the only supported functionality is to queue access
    copy conversion; this should be done by POSTing the type of task to
    be queued, i.e. **generate access copy**.

    Supported tasks:

        * **generate access copy** - queue access copy conversion for an audio
            item by pid.  Returns a status message as the body of a plain/text response

    :param pid: the pid of the object for which tasks should be queued

    '''
    if request.method == 'POST':
        status = "queued"
        task_type = request.POST.get('task', None)

        # TODO May want to prevent queuing of more than one at a time or within a time period.
        # TODO For now javascript disables the link until the page is refreshed.

        # currently the only supported task is
        if task_type == 'generate access copy':
            try:
                repo = Repository(request=request)
                obj = repo.get_object(pid, type=AudioObject)

                # if object doesn't exist or isn't an audio item, 404
                if not obj.exists or not obj.has_requisite_content_models:
                    raise Http404

                queue_access_copy(obj)
                status = 'Successfully queued access copy conversion'

            except Exception as err:
                # re-raise any 404 error
                if isinstance(err, Http404):
                    raise

                logger.error('Error queueing access copy conversion for %s : %s' % \
                    (pid, err))
                status = 'Error queueing access copy conversion (%s)' % err

            return HttpResponse(status, content_type='text/plain')

        # unsupported task
        else:
            return HttpResponse('Task "%s" is not supported' % task_type,
                                content_type='text/plain',
                                status=500)
Пример #12
0
    def archives(format=None):
        if format == dict:
            return [{
                'title': nick,
                'pid': pid
            } for nick, pid in settings.PID_ALIASES.iteritems()]

        if not hasattr(FedoraFixtures, '_archives'):
            repo = Repository()
            FedoraFixtures._archives = [
                repo.get_object(pid, type=CollectionObject)
                for pid in settings.PID_ALIASES.itervalues()
            ]
        return FedoraFixtures._archives
Пример #13
0
def create_from_findingaid(request):
    form = FindCollection(request.POST)
    if not form.is_valid():
        messages.error(request, 'Form is not valid; please try again.')
    else:
        data = form.cleaned_data
        q = CollectionObject.item_collection_query()
        # submitted value is pid alias; lookup pid for solr query
        archive_id = settings.PID_ALIASES[data['archive']]
        q = q.query(archive_id=archive_id,
                    source_id=data['collection'])
        # if collection is found, redirect to collection view with message
        if q.count():
            messages.info(request, 'Found %d collection%s for %s %s.' %
                          (q.count(), 's' if q.count() != 1 else '',
                           data['archive'].upper(), data['collection']))
            return HttpResponseSeeOtherRedirect(reverse('collection:view',
                kwargs={'pid': q[0]['pid']}))

        else:
            # otherwise, create the new record and redirect to new
            # collection edit page
            repo = Repository(request=request)
            coll_id = data['collection']
            coll = None
            try:
                archive = repo.get_object(archive_id, type=CollectionObject)
                fa = FindingAid.find_by_unitid(unicode(coll_id),
                                               archive.mods.content.title)
                coll = fa.generate_collection()
                coll.collection = archive
                coll.save()
                messages.info(request, 'Added %s for collection %s: %s'
                              % (coll, coll_id, coll.mods.content.title))

                return HttpResponseSeeOtherRedirect(
                    reverse('collection:edit', kwargs={'pid': coll.pid}))

            except DoesNotExist:
                messages.error(request, 'No EAD found for %s in %s' %
                               (coll_id, data['archive'].upper()))
            except ReturnedMultiple:
                messages.error(request, 'Multiple EADs found for %s in %s' %
                               (coll_id, data['archive'].upper()))
            except RequestFailed as err:
                print err
                messages.error(request, 'Failed to save new collection')

    return HttpResponseSeeOtherRedirect(reverse('repo-admin:dashboard'))
Пример #14
0
    def setUp(self):
        self.repo = Repository()
        self.pids = []

        # create test collection
        coll = self.repo.get_object(type=CollectionObject)
        coll.pid = '%s:parent-1' % settings.FEDORA_PIDSPACE
        coll.mods.content.source_id = '12345'
        coll.save()
        self.pids.append(coll.pid)

        #create test arrangement object
        self.arr = self.repo.get_object(type=ArrangementObject)
        self.arr.pid = 'foo:1'
        self.arr.collection = coll
Пример #15
0
 def esterbrook_collection():
     repo = Repository()
     obj = repo.get_object(type=CollectionObject)
     obj.label = 'Thomas Esterbrook letter books'
     obj.mods.content.title = 'Thomas Esterbrook letter books'
     obj.mods.content.source_id = '123'
     obj.collection = repo.get_object(FedoraFixtures.archives()[2].uri)
     obj.mods.content.create_origin_info()
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=1855, point='start'))
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=1861, point='end'))
     obj.mods.content.create_name()
     obj.mods.content.name.name_parts.append(
         mods.NamePart(text='Thomas Esterbrook'))
     return obj
Пример #16
0
 def rushdie_collection():
     repo = Repository()
     obj = repo.get_object(type=CollectionObject)
     obj.label = 'Salman Rushdie Collection'
     obj.mods.content.title = 'Salman Rushdie Collection'
     obj.mods.content.source_id = '1000'
     obj.collection = repo.get_object(FedoraFixtures.archives()[1].uri)
     obj.mods.content.create_origin_info()
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=1947, point='start'))
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=2008, point='end'))
     obj.mods.content.create_name()
     obj.mods.content.name.name_parts.append(
         mods.NamePart(text='Salman Rushdie'))
     return obj
Пример #17
0
    def init_from_file(filename,
                       initial_label=None,
                       request=None,
                       checksum=None,
                       mimetype=None):
        '''Static method to create a new :class:`AudioObject` instance from
        a file.  Sets the object label and metadata title based on the initial
        label specified, or file basename.  Calculates and stores the duration
        based on the file. Also sets the following default metadata values:

            * mods:typeOfResource = "sound recording"
            * dt:codecQuality = "lossless"

        :param filename: full path to the audio file, as a string
        :param initial_label: optional initial label to use; if not specified,
            the base name of the specified file will be used
        :param request: :class:`django.http.HttpRequest` passed into a view method;
            must be passed in order to connect to Fedora as the currently-logged
            in user
        :param checksum: the checksum of the file being sent to fedora.
        :returns: :class:`AudioObject` initialized from the file
        '''
        if initial_label is None:
            initial_label = os.path.basename(filename)
        repo = Repository(request=request)
        obj = repo.get_object(type=AudioObject)
        # set initial object label from the base filename
        obj.label = initial_label
        obj.dc.content.title = obj.mods.content.title = obj.label
        obj.audio.content = open(
            filename)  # FIXME: at what point does/should this get closed?
        # Set the file checksum, if set.
        obj.audio.checksum = checksum
        # set content datastream mimetype if passed in
        if mimetype is not None:
            obj.audio.mimetype = mimetype
        #Get the label, minus the ".wav" (mimetype indicates that)
        obj.audio.label = initial_label[:-4]
        # set initial mods:typeOfResource - all AudioObjects default to sound recording
        obj.mods.content.resource_type = 'sound recording'
        # set codec quality to lossless in digital tech metadata
        # - default for AudioObjects, should only accept lossless audio for master file
        obj.digitaltech.content.codec_quality = 'lossless'
        # get wav duration and store in digital tech metadata
        obj.digitaltech.content.duration = '%d' % round(wav_duration(filename))

        return obj
Пример #18
0
    def disk_images(self):
        self.stderr.write('Disk images')
        ### disk images
        # representative sample of aff and ad1
        # DO NOT include anything in these collections:
        # Trethewey (ghsdj), Rushdie (94k9k), Mackey (g1btw),
        # Clifton (94kf4), and Grennan (9k0st)

        solr = solr_interface()
        repo = Repository()
        q = solr.query(content_model=DiskImage.DISKIMAGE_CONTENT_MODEL) \
                .exclude(collection_id=self.collections['trethewey']) \
                .exclude(collection_id=self.collections['rushdie']) \
                .exclude(collection_id=self.collections['mackey']) \
                .exclude(collection_id=self.collections['clifton']) \
                .exclude(collection_id=self.collections['grennan']) \
                .field_limit('pid')
        if self.verbosity >= self.v_normal:
            self.stderr.write(
                'Found %d disk images not in restricted collections' %
                q.count())

        # currently there is no way to filter on format or size in either
        # solr or fedora risearch
        # so, go through individually and group them by type,
        # then sort by size and pick the smallest ones
        diskimgs_by_type = defaultdict(list)
        for result in q:
            diskimg = repo.get_object(result['pid'], type=DiskImage)
            if not diskimg.exists:
                if self.verbosity >= self.v_normal:
                    self.stderr.write('Referenced disk image %s does not exist or is inaccessible' \
                        % result['pid'])
                continue

            fmt = diskimg.provenance.content.object.format.name
            diskimgs_by_type[fmt].append(diskimg)

        for fmt, diskimages in diskimgs_by_type.iteritems():
            if self.verbosity >= self.v_normal:
                self.stderr.write('Selecting %s disk images' % fmt)
            # sort on binary file size so we sync the smallest ones
            diskimages = sorted(diskimages,
                                key=lambda diskimg: diskimg.content.size)
            # use the first 10 of each type
            for d in diskimages[:10]:
                self.stdout.write(d.pid)
Пример #19
0
    def setUp(self):
        self.repo = Repository()
        self.pids = []

        #Create a simple Collection
        self.sc = self.repo.get_object(type=SimpleCollection)
        self.sc.label = "SimpleCollection For Test"
        self.sc.save()
        self.pids.append(self.sc.pid)

        #Create a Master Collection
        self.mc = self.repo.get_object(type=CollectionObject)
        self.mc.label = "MasterCollection For Test"
        self.mc.save()
        self.pids.append(self.mc.pid)

        #Create a a DigitalObject
        self.digObj = self.repo.get_object(type=RushdieArrangementFile)
        self.digObj.label = "Object For Test"
        self.digObj.save()
        self.pids.append(self.digObj.pid)
        self.digObj.api.addDatastream(self.digObj.pid,
                                      "MARBL-MACTECH",
                                      "MARBL-MACTECH",
                                      mimeType="application/xml",
                                      content=self.MM_FIXTURE)
        self.digObj.api.addDatastream(self.digObj.pid,
                                      "MARBL-ANALYSIS",
                                      "MARBL-ANALYSIS",
                                      mimeType="application/xml",
                                      content=self.MA_FIXTURE)
        #Remove Arrangement model so it can be added later
        relation = (self.digObj.uriref, modelns.hasModel,
                    "info:fedora/emory-control:Arrangement-1.0")
        self.digObj.rels_ext.content.remove(relation)
        self.digObj.save()

        #Setup Command
        self.cmd = migrate_rushdie.Command()
        self.cmd.verbosity = 1
        self.cmd.v_normal = 1
        self.cmd.v_none = 0
        self.cmd.simple_collection = self.sc
        self.cmd.stdout = sys.stdout
        self.cmd.CONTENT_MODELS = CONTENT_MODELS
        self.cmd.repo = self.repo
Пример #20
0
def view(request, pid):
    '''View a single :class:`~keep.collection.models.CollectionObject`,
    with a paginated list of all items in that collection.
    '''
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=CollectionObject)
    # if pid doesn't exist or isn't a collection, 404
    if not obj.exists or not obj.has_requisite_content_models:
        raise Http404

    # search for all items that belong to this collection
    q = obj.solr_items_query()
    q = q.sort_by('date_created') \
         .sort_by('date_issued') \
         .sort_by('title_exact')
    # filter by logged-in user permissions
    # (includes researcher-accessible content filter when appropriate)
    q = filter_by_perms(q, request.user)

    # if current user can only view researcher-accesible collections and
    # no items were found, they don't have permission to view this collection
    if not request.user.has_perm('collection.view_collection') and \
           request.user.has_perm('collection.view_researcher_collection') and \
           q.count() == 0:
       return prompt_login_or_403(request)

    # paginate the solr result set
    paginator = Paginator(q, 30)
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1
    try:
        results = paginator.page(page)
    except (EmptyPage, InvalidPage):
        results = paginator.page(paginator.num_pages)

    # url parameters for pagination links
    url_params = request.GET.copy()
    if 'page' in url_params:
        del url_params['page']

    return TemplateResponse(request, 'collection/view.html',
        {'collection': obj, 'items': results,
         'url_params': urlencode(url_params)})
Пример #21
0
    def find_by_field(field, value, repo=None):
        '''
        Static method to find a single :class:`EmailMessage` by an indexed
        value.  Looks for the item in Solr and
        returns an :class:`EmailMessage` instance initialized
        from the repository if a single match is found for the
        requested field and value.

        Raises :class:`django.core.exceptions.MultipleObjectsReturned`
        if more than one match is found; raises
        :class:`django.core.exceptions.ObjectDoesNotExist` if no
        matches are found in the Solr index.

        :param field: solr field to search
        :param value: value to search on in the specified field

        :param repo: optional :class:`eulfedora.server.Repository`
            to use an existing connection with specific credentials

        :returns: :class:`EmailMessage`


        '''
        solr = solr_interface()
        search_terms = {
            field: value,
            'content_model': ArrangementObject.ARRANGEMENT_CONTENT_MODEL
        }
        q = solr.query(**search_terms).field_limit('pid')

        # check that we found one and only one
        found = len(q)
        # borrowing custom django exceptions for not found / too many
        # matches
        if found > 1:
            raise MultipleObjectsReturned('Found %d records with %s %s' % \
                                          (found, field, value))
        if not found:
            raise ObjectDoesNotExist('No record found with %s %s' %
                                     (field, value))

        if repo is None:
            repo = Repository()

        return repo.get_object(q[0]['pid'], type=EmailMessage)
Пример #22
0
def playlist(request, pid):
    # FIXME: this needs last-modified so browser can cache!!!

    # NOTE: preliminary logic duplicated from view above
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=CollectionObject)
    # if pid doesn't exist or isn't a collection, 404
    if not obj.exists or not obj.has_requisite_content_models:
        raise Http404

    # search for all items that belong to this collection
    q = obj.solr_items_query()
    q = q.sort_by('date_created') \
         .sort_by('date_issued') \
         .sort_by('title_exact')
    # filter by logged-in user permissions
    # (includes researcher-accessible content filter when appropriate)
    q = filter_by_perms(q, request.user)

    # if current user can only view researcher-accesible collections and
    # no items were found, they don't have permission to view this collection
    if not request.user.has_perm('collection.view_collection') and \
           request.user.has_perm('collection.view_researcher_collection') and \
           q.count() == 0:
       return prompt_login_or_403(request)

    playlist = []
    for result in q:
        # skip non-audio or audio without access copies
        if result['object_type'] != 'audio' or not result['has_access_copy']:
            continue
        data = {
            'title': result['title'],
            'free': False  # explicitly mark as not downloadable
        }
        if result['access_copy_mimetype'] == 'audio/mp4':
            audio_type = 'm4a'
        else:
            audio_type = 'mp3'
        data[audio_type] = reverse('audio:download-compressed-audio',
            kwargs={'pid': result['pid'], 'extension': audio_type})
        playlist.append(data)

    return HttpResponse(json.dumps(playlist), content_type='application/json')
Пример #23
0
    def handle(self, batch_id=None, folder_path=None, verbosity=1, noact=False,
               max_ingest=None, skip_purge=False, purge_only=False, *args, **options):

        # check batch object
        if batch_id is None:
            raise CommandError('Processing batch id is required')
        self.verbosity = int(verbosity)  # ensure we compare int to int
        if max_ingest is not None:
            self.max_ingest = int(max_ingest)

        # check folder path
        if folder_path is None:
            raise CommandError('Eudora folder base path is required')
        if not os.path.isdir(folder_path):
            raise CommandError('Eudora folder path "%s" is not a directory' % folder_path)
        self.noact = noact

        # check for any specified fedora credentials
        fedora_opts = {}
        if 'username' in options:
            fedora_opts['username'] = options['username']
        if 'password' in options:
            fedora_opts['password'] = options['password']
        self.repo = Repository(**fedora_opts)
        batch = self.repo.get_object(batch_id, type=ProcessingBatch)
        if not batch.exists:
            raise CommandError('Processing batch %s not found' % batch_id)
        print 'Looking for email messages in processing batch "%s"' \
              % batch.label

        try:
            pidman = DjangoPidmanRestClient()
        except:
            raise CommandError('Error initializing PID manager client; ' +
                               'please check settings.')

        self.stats = defaultdict(int)
        # purge old metadata email 'arrangement' objects that belong to this batch
        if not skip_purge:
            self.remove_arrangement_emails(batch)
        # ingest new objects for email mailboxes & messages
        if not purge_only:
            self.ingest_email(folder_path)
Пример #24
0
def simple_edit(request, pid=None):
    ''' Edit an existing Fedora
    :class:`~keep.collection.models.SimpleCollection`.  If a pid is
    specified, attempts to retrieve an existing object.
    '''
    repo = Repository(request=request)

    try:
        obj = repo.get_object(pid=pid, type=SimpleCollection)

        if request.method == 'POST':
            form = SimpleCollectionEditForm(request.POST)
            if form.is_valid():
                status = form.cleaned_data['status']


                if status == obj.mods.content.restrictions_on_access.text:
                    # don't queue job if there is no change
                    messages.info(request, 'Status is unchanged')

                else:
                    # queue celery task to update items in this batch
                    queue_batch_status_update(obj, status)
                    messages.info(
                        request,
                        'Batch status update has been queued; ' +
                        'please check later via <a href="%s">recent tasks</a> page' %
                        reverse('tasks:recent')
                    )

        else:
            #Just Display the form
            form = SimpleCollectionEditForm(initial={'status': obj.mods.content.restrictions_on_access.text})

    except RequestFailed, e:
        # if there was a 404 accessing objects, raise http404
        # NOTE: this probably doesn't distinguish between object exists with
        # no MODS and object does not exist at all
        if e.code == 404:
            raise Http404
        # otherwise, re-raise and handle as a common fedora connection error
        else:
            raise
Пример #25
0
    def by_arrangement_id(id, repo=None):
        '''
        Static method to find an :class:`ArrangementObject` by its
        local or arrangement id.  Looks for the item in Solr and
        returns an :class:`ArrangementObject` instance initialized
        from the repository if a single match is found for the
        requested id.

        Raises :class:`django.core.exceptions.MultipleObjectsReturned`
        if more than one match is found; raises
        :class:`django.core.exceptions.ObjectDoesNotExist` if no
        matches are found in the Solr index.

        :param id: arrangement id or local id

        :param repo: optional :class:`eulfedora.server.Repository`
            to use an existing connection with specific credentials

        :returns: :class:`ArrangementObject`


        '''
        solr = solr_interface()
        q = solr.query(arrangement_id=id,
                   content_model=ArrangementObject.ARRANGEMENT_CONTENT_MODEL) \
                   .field_limit('pid')

        # check that we found one and only one
        found = len(q)
        # borrowing custom django exceptions for not found / too many
        # matches
        if found > 1:
            raise MultipleObjectsReturned('Found %d records with arrangement id %s' % \
                                          (found, id))
        if not found:
            raise ObjectDoesNotExist('No record found with arrangement id %s' %
                                     id)

        if repo is None:
            repo = Repository()

        return repo.get_object(q[0]['pid'], type=ArrangementObject)
Пример #26
0
    def handle(self, *args, **options):
        self.options = options
        self.repaired_count = 0
        self.unrepaired_count = 0

        repo = Repository()
        self.pidman = DjangoPidmanRestClient()

        # populate list of objects to be processed
        objects = []
        for pid in args:
            try:
                obj = repo.get_object(pid=pid, type=CollectionObject)
                if obj.has_requisite_content_models:
                    objects.append(obj)
                else:
                    obj = repo.get_object(pid=pid, type=AudioObject)
                    if obj.has_requisite_content_models:
                        objects.append(obj)
            except Exception:
                self.log(
                    message="Could not find Collection or Audio object for: %s"
                    % pid)

        # get list of all collections from the repository
        # limited to the COLLECTION_CONTENT_MODEL as well as returns a Keep specific collection object
        if not args:
            objects = repo.get_objects_with_cmodel(
                CollectionObject.COLLECTION_CONTENT_MODEL,
                type=CollectionObject)

        if not objects:
            self.log(message="No Collections were found.")

        for obj in objects:
            self.repair_ark(obj)

        self.log(message="\n\n%s ARKs repaired\n%s ARKs were not repaired" %
                 (self.repaired_count, self.unrepaired_count),
                 no_label=True)
Пример #27
0
def view(request, pid):
    '''View a single :class:`~keep.audio.models.AudioObject`.
    User must either have general view audio permissions, or if they have
    view researcher audio, the object must be researcher accessible
    (based on rights codes).
    '''
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=AudioObject)
    # user either needs view audio permissions OR
    # if they can view researcher audio and object must be researcher-accessible
    if not request.user.has_perm('audio.view_audio') and \
       not (request.user.has_perm('audio.view_researcher_audio') and \
       bool(obj.researcher_access)):
        return prompt_login_or_403(request)

    try:
        if not obj.has_requisite_content_models:
            raise Http404
    except:
        raise Http404

    return TemplateResponse(request, 'audio/view.html', {'resource': obj})
Пример #28
0
def batch_set_status(pid, status):
    repo = Repository()
    batch = repo.get_object(pid, type=SimpleCollection)
    # keep track of totals for success and failure
    success = 0
    error = 0

    # translate form status codes to fedora state code
    # TODO: shift this logic to arrangement object for re-use ?
    codes = {'Processed': 'A', 'Accessioned': 'I'}

    # target state for every object in the collection
    if status not in codes:
        err_msg = 'Status %s unknown' % status
        logger.error(err_msg)
        raise Exception(err_msg)
    else:
        state = codes[status]

    # finp all pids associated with this object
    pids = list(
        batch.rels_ext.content.objects(batch.uriref, relsextns.hasMember))

    for pid in pids:
        try:
            # pass in api from batch object to retain user credentials
            obj = ArrangementObject(batch.api, pid)
            obj.state = state
            obj.save('Marking as %s via SimpleCollection %s' %
                     (status, batch.pid))
            success += 1
        except Exception as e:
            logger.error('Failed to update %s : %s' % (pid, e))
            error += 1

    info = {
        'success': success,
        'error': error,
        'success_plural': '' if success == 1 else 's',
        'error_plural': '' if error == 1 else 's',
        'status': status
    }

    summary_msg = "Successfully updated %(success)s item%(success_plural)s; error updating %(error)s" % info

    # if not all objects were updated correctly, exit with error
    if error > 0:
        raise Exception(summary_msg)

    # FIXME: this is based on the current form logic, but could leave
    # some member items stranded in a different status than the parent object

    batch.mods.content.create_restrictions_on_access()
    batch.mods.content.restrictions_on_access.text = status  # Change collection status
    try:
        batch.save(
            'Marking as %(status)s; updated %(success)s member item%(success_plural)s'
            % info)

    except Exception as e:
        save_err = "Error updating SimpleCollection %s - %s" % (obj.pid, e)
        logger.error(save_err)
        raise Exception('%s; %s' % (save_err, summary_msg))

    # success
    return 'Successfully updated %(success)s item%(success_plural)s' % info
Пример #29
0
def migrate_aff_diskimage(self, pid):
    creating_application = 'AccessData FTK Imager'
    application_version = 'v3.1.1 CLI'
    migration_event_detail = 'program="%s"; version="%s"' % \
        (creating_application, application_version)
    migration_event_outcome = 'AFF reformatted as E01 using command line ' + \
        'FTK program with settings: --e01 --compress 0 --frag 100T --quiet'

    # use the configured ingesting staging area as the base tmp dir
    # create
    # for all temporary files
    staging_dir = getattr(settings, 'LARGE_FILE_STAGING_DIR', None)
    # create a tempdir within the large file staging area
    tmpdir = tempfile.mkdtemp(suffix='-aff-migration', dir=staging_dir)
    logger.debug('Using tmpdir %s', tmpdir)

    # Retrieve the object to be migrated
    repo = Repository()
    original = repo.get_object(pid, type=DiskImage)

    # check object before migrating
    # - exists in fedora
    if not original.exists:
        # raise Exception
        raise Exception('%s not found in Fedora' % original.pid)
    # - is a disk image
    if not original.has_requisite_content_models:
        raise Exception('%s is not a DiskImage object' % original.pid)
    # - is an AFF disk image
    if original.provenance.content.object.format.name != 'AFF':
        raise Exception('%s DiskImage format is not AFF' % original.pid)
    # - has not already been migrated
    if original.migrated is not None:
        raise Exception('%s has already been migrated' % original.pid)

    # download the aff disk image to a tempfile
    aff_file = tempfile.NamedTemporaryFile(suffix='.aff',
                                           prefix='keep-%s_' % original.noid,
                                           dir=tmpdir,
                                           delete=False)
    logger.debug('Saving AFF as %s for conversion (datastream size: %s)' \
        % (aff_file.name, filesizeformat(original.content.size)))
    try:
        for chunk in original.content.get_chunked_content():
            aff_file.write(chunk)
    except Exception as err:
        raise Exception('Error downloading %s AFF for conversion' %
                        original.pid)

    # close the file handle in case of weird interactions with ftkimager
    aff_file.close()
    aff_size = os.path.getsize(aff_file.name)
    logger.debug('Downloaded %s' % filesizeformat(aff_size))

    # run ftkimager to generate the E01 version
    logger.debug('Running ftkimager to generate E01')
    e01_file = tempfile.NamedTemporaryFile(suffix='.E01',
                                           prefix='keep-%s_' % original.noid,
                                           dir=tmpdir,
                                           delete=False)
    # close the file handle in case of weird interactions with ftkimager
    e01_file.close()
    # file handle to capture console output from ftkimager
    ftk_output = tempfile.NamedTemporaryFile(suffix='.txt',
                                             prefix='keep-%s-ftkimager_' %
                                             original.noid,
                                             dir=tmpdir)
    logger.debug('E01 temp file is %s' % e01_file.name)
    logger.debug('ftkimager output temp file is %s' % ftk_output.name)
    # ftkimager adds .E01 to the specified filename, so pass in filename without
    e01_file_basename, ext = os.path.splitext(e01_file.name)

    convert_command = [
        'ftkimager', aff_file.name, e01_file_basename, '--e01', '--compress',
        '0', '--frag', '100T', '--quiet'
    ]
    # quiet simply suppresses progress output, which is not meaningful
    # in a captured text file
    logger.debug('conversion command is %s' % ' '.join(convert_command))
    return_val = subprocess.call(convert_command,
                                 stdout=ftk_output,
                                 stderr=subprocess.STDOUT)
    logger.debug('ftkimager return value is %s' % return_val)
    ftk_detail_output = '%s.txt' % e01_file.name

    e01_size = os.path.getsize(e01_file.name)
    if e01_size == 0:
        raise Exception('Generated E01 file is 0 size')

    logger.info('Generated E01 (%s) from %s AFF (%s)' % \
        (filesizeformat(e01_size), original.pid, filesizeformat(aff_size)))

    # use ftkimager to verify aff and e01 and compare checksums
    aff_checksums = ftkimager_verify(aff_file.name)
    if not aff_checksums:
        raise Exception('Error running ftkimager verify on AFF for %s' %
                        original.pid)
    e01_checksums = ftkimager_verify(e01_file.name)
    if not e01_checksums:
        raise Exception('Error running ftkimager verify on E01 for %s' %
                        original.pid)

    logger.debug('AFF verify checksums: %s' % \
        ', '.join('%s: %s' % (k, v) for k, v in aff_checksums.iteritems()))
    logger.debug('E01 verify checksums: %s' % \
        ', '.join('%s: %s' % (k, v) for k, v in e01_checksums.iteritems()))
    if aff_checksums != e01_checksums:
        raise Exception('AFF and E01 ftkimager verify checksums do not match')

    # create a new diskimage object from the file
    # - calculate file uri for content location
    e01_file_uri = fedora_file_uri(e01_file.name)
    logger.debug('E01 fedora file URI is %s', e01_file_uri)

    # change permissions on tmpdir + files to ensure fedora can access them
    os.chmod(tmpdir, 0775)
    os.chmod(e01_file.name, 0666)
    os.chmod(ftk_output.name, 0666)
    os.chmod(ftk_detail_output, 0666)

    migrated = DiskImage.init_from_file(e01_file.name,
                                        initial_label=original.label,
                                        content_location=e01_file_uri)

    # add ftkimager text output & details as supplemental files
    # - console output captured from subprocess call
    dsobj = migrated.getDatastreamObject('supplement0',
                                         dsobj_type=FileDatastreamObject)
    dsobj.label = 'ftkimager_output.txt'
    dsobj.mimetype = 'text/plain'
    dsobj.checksum = md5sum(ftk_output.name)
    logger.debug('Adding ftkimager console output as supplemental dastream %s label=%s mimetype=%s checksum=%s' % \
                (dsobj.id, dsobj.label, dsobj.mimetype, dsobj.checksum))
    dsobj.content = open(ftk_output.name).read()
    # - text file generated by ftkimager alongside the E01
    dsobj2 = migrated.getDatastreamObject('supplement1',
                                          dsobj_type=FileDatastreamObject)
    dsobj2.label = 'ftkimager_summary.txt'
    dsobj2.mimetype = 'text/plain'
    dsobj2.checksum = md5sum(ftk_detail_output)
    logger.debug('Adding ftkimager summary as supplemental dastream %s label=%s mimetype=%s checksum=%s' % \
                (dsobj2.id, dsobj2.label, dsobj2.mimetype, dsobj2.checksum))
    dsobj2.content = open(ftk_detail_output).read()

    # set metadata based on original disk image
    # - associate with original
    migrated.original = original
    # copy over descriptive & rights metadata
    # - collection membership
    migrated.collection = original.collection
    # - mods title, covering dates, abstract
    migrated.mods.content.title = original.mods.content.title
    migrated.mods.content.abstract = original.mods.content.abstract
    migrated.mods.content.coveringdate_start = original.mods.content.coveringdate_start
    migrated.mods.content.coveringdate_end = original.mods.content.coveringdate_end
    # - entire rights datastream
    migrated.rights.content = original.rights.content

    ### Update generated premis to describe migration.
    premis_ds = migrated.provenance.content
    premis_ds.object.composition_level = 0
    # these values are the same for all migrated AFFs
    premis_ds.object.create_creating_application()
    premis_ds.object.creating_application.name = creating_application
    premis_ds.object.creating_application.version = application_version
    premis_ds.object.creating_application.date = date.today()

    # add relationship to the original object
    rel = PremisRelationship(type='derivation')
    rel.subtype = 'has source'
    rel.related_object_type = 'ark'
    rel.related_object_id = original.mods.content.ark
    # relationship must also reference the migration event on the
    # original, which doesn't exist yet.  Generate a migration event
    # id now to use for both
    migration_event_id = uuid.uuid1()
    rel.related_event_type = 'UUID'
    rel.related_event_id = migration_event_id
    premis_ds.object.relationships.append(rel)

    ## NOTE: Due to a Fedora bug with checksums and file uri ingest,
    ## content datastream checksum must be cleared out before ingest
    ## and manually checked after.

    # store datastream checksum that would be sent to fedora
    e01_checksum = migrated.content.checksum
    # clear it out so Fedora can ingest without erroring
    migrated.content.checksum = None

    # ingest
    try:
        migrated.save('Ingest migrated version of %s' % original.pid)
        logger.debug('Migrated object ingested as %s' % migrated.pid)
    except DuplicateContent as err:
        raise Exception('Duplicate content detected for %s: %s %s',
                        original.pid, err, ', '.join(err.pids))
    # would probably be good to catch other fedora errors

    # remove temporary files
    for tmpfilename in [
            aff_file.name, e01_file.name, ftk_output.name, ftk_detail_output
    ]:
        os.remove(tmpfilename)

    # reinitialize migrated object, just to avoid any issues
    # with accessing ark uri for use in original object premis
    migrated = repo.get_object(migrated.pid, type=DiskImage)
    # verify checksum
    if migrated.content.checksum != e01_checksum:
        raise Exception('Checksum mismatch detected on E01 for %s',
                        migrated.pid)

    # once migrated object has been ingested,
    # update original object with migration information
    # - add rels-ext reference to migrated object
    original.migrated = migrated
    # - update premis with migration event and relationship
    migration_event = PremisEvent()
    migration_event.id_type = 'UUID'
    migration_event.id = migration_event_id
    migration_event.type = 'migration'
    migration_event.date = datetime.now().isoformat()
    migration_event.detail = migration_event_detail
    migration_event.outcome = 'Pass'
    migration_event.outcome_detail = migration_event_outcome
    migration_event.agent_type = 'fedora user'
    migration_event.agent_id = repo.username
    # premis wants both source and outcome objects linked in the event
    link_source = PremisLinkingObject(id_type='ark')
    link_source.id = original.mods.content.ark
    link_source.role = 'source'
    link_outcome = PremisLinkingObject(id_type='ark')
    link_outcome.id = migrated.mods.content.ark
    link_outcome.role = 'outcome'
    migration_event.linked_objects.extend([link_source, link_outcome])
    original.provenance.content.events.append(migration_event)
    # add relation to migrated object in to premis object
    rel = PremisRelationship(type='derivation')
    rel.subtype = 'is source of'
    rel.related_object_type = 'ark'
    rel.related_object_id = migrated.mods.content.ark
    rel.related_event_type = 'UUID'
    rel.related_event_id = migration_event.id
    original.provenance.content.object.relationships.append(rel)
    original.save()
    logger.debug('Original disk image updated with migration data')

    # remove aff migration temp dir and any remaining contents
    try:
        shutil.rmtree(tmpdir)
    except OSError:
        # tempdir removal could fail due to nfs files
        # wait a few seconds and try again
        time.sleep(3)
        try:
            shutil.rmtree(tmpdir)
        except OSError as os_err:
            logger.warning('Failed to remove tmpdir %s : %s', tmpdir, os_err)

    logger.info('Migrated %s AFF to %s E01' % (original.pid, migrated.pid))
    return 'Migrated %s to %s' % (original.pid, migrated.pid)
Пример #30
0
def view_audit_trail(request, pid):
    'Access XML audit trail'
    # initialize local repo with logged-in user credentials & call eulfedora view
    # type shouldn't matter for audit trail
    return raw_audit_trail(request, pid, repo=Repository(request=request))