Пример #1
0
    def test_ingest_utf8(self):
        # ingest with unicode log message
        obj = self.loadFixtureData('basic-object.foxml')
        response = self.rest_api.ingest(obj, logMessage=self.unicode_test_str)
        pid = response.text
        self.assertTrue(pid)

        response = self.rest_api.getObjectXML(pid)
        response.encoding = 'utf-8'  # ensure requests decodes as utf-8
        self.assert_(u'<audit:justification>%s</audit:justification>' %
                     self.unicode_test_str in response.text)
        self.rest_api.purgeObject(force_text(pid))

        # ingest with unicode object label
        # convert to text to replace string, then convert back to bytes
        obj = force_bytes(force_text(obj).replace(
            u"A test object", self.unicode_test_str))
        response = self.rest_api.ingest(obj)
        pid = response.text
        self.assertTrue(pid)

        # object label in profile should match the unicode sent
        response = self.rest_api.getObjectProfile(pid)
        response.encoding = 'utf-8'  # ensure requests decodes as utf-8
        self.assert_(u'<objLabel>%s</objLabel>' % self.unicode_test_str
                     in response.text)
        self.rest_api.purgeObject(force_text(pid))
Пример #2
0
    def get_datastream_info(self, dsinfo):
        '''Use regular expressions to pull datastream [version]
        details (id, mimetype, size, and checksum) for binary content,
        in order to sanity check the decoded data.

        :param dsinfo: text content just before a binaryContent tag
        :returns: dict with keys for id, mimetype, size, type and digest,
            or None if no match is found
        '''
        # we only need to look at the end of this section of content
        dsinfo = dsinfo[-750:]
        # if not enough content is present, include the end of
        # the last read chunk, if available
        if len(dsinfo) < 750 and self.end_of_last_chunk is not None:
            dsinfo = self.end_of_last_chunk + dsinfo

        # force text needed for python 3 compatibility (in python 3
        # dsinfo is bytes instead of a string)
        try:
            text = force_text(dsinfo)
        except UnicodeDecodeError as err:
            # it's possible to see a unicode character split across
            # read blocks; if we get an "invalid start byte" unicode
            # decode error, try converting the text without the first
            # character; if that's the problem, it's not needed
            # for datastream context
            if 'invalid start byte' in force_text(err):
                text = force_text(dsinfo[1:])
            else:
                raise err

        infomatch = self.dsinfo_regex.search(text)
        if infomatch:
            return infomatch.groupdict()
Пример #3
0
    def test_ingest_utf8(self):
        # ingest with unicode log message
        obj = self.loadFixtureData('basic-object.foxml')
        response = self.rest_api.ingest(obj, logMessage=self.unicode_test_str)
        pid = response.text
        self.assertTrue(pid)

        response = self.rest_api.getObjectXML(pid)
        response.encoding = 'utf-8'  # ensure requests decodes as utf-8
        self.assert_(u'<audit:justification>%s</audit:justification>' %
                     self.unicode_test_str in response.text)
        self.rest_api.purgeObject(force_text(pid))

        # ingest with unicode object label
        # convert to text to replace string, then convert back to bytes
        obj = force_bytes(force_text(obj).replace(
            u"A test object", self.unicode_test_str))
        response = self.rest_api.ingest(obj)
        pid = response.text
        self.assertTrue(pid)

        # object label in profile should match the unicode sent
        response = self.rest_api.getObjectProfile(pid)
        response.encoding = 'utf-8'  # ensure requests decodes as utf-8
        self.assert_(u'<objLabel>%s</objLabel>' % self.unicode_test_str
                     in response.text)
        self.rest_api.purgeObject(force_text(pid))
Пример #4
0
 def test_encrypt_decrypt(text):
     encrypted = cryptutil.encrypt(text)
     self.assertNotEqual(text, encrypted,
         "encrypted text should not match original")
     decrypted = cryptutil.decrypt(encrypted)
     self.assertEqual(text, force_text(decrypted),
         "decrypted text (%s) should match original encrypted text (%s)" % (force_text(decrypted), text))
Пример #5
0
    def get_datastream_info(self, dsinfo):
        '''Use regular expressions to pull datastream [version]
        details (id, mimetype, size, and checksum) for binary content,
        in order to sanity check the decoded data.

        :param dsinfo: text content just before a binaryContent tag
        :returns: dict with keys for id, mimetype, size, type and digest,
            or None if no match is found
        '''
        # we only need to look at the end of this section of content
        dsinfo = dsinfo[-750:]
        # if not enough content is present, include the end of
        # the last read chunk, if available
        if len(dsinfo) < 750 and self.end_of_last_chunk is not None:
            dsinfo = self.end_of_last_chunk + dsinfo

        # force text needed for python 3 compatibility (in python 3
        # dsinfo is bytes instead of a string)
        try:
            text = force_text(dsinfo)
        except UnicodeDecodeError as err:
            # it's possible to see a unicode character split across
            # read blocks; if we get an "invalid start byte" unicode
            # decode error, try converting the text without the first
            # character; if that's the problem, it's not needed
            # for datastream context
            if 'invalid start byte' in force_text(err):
                text = force_text(dsinfo[1:])
            else:
                raise err

        infomatch = self.dsinfo_regex.search(text)
        if infomatch:
            return infomatch.groupdict()
Пример #6
0
    def test_purgeRelationship(self):
        # add relation to purg
        self.rest_api.addRelationship(
            self.pid, "info:fedora/%s" % self.pid, predicate=force_text(modelns.hasModel), object="info:fedora/pid:123"
        )

        print(self.pid)
        print(force_text(self.pid))
        print(type(self.pid))
        print(self.fedora_fixtures_ingested)

        purged = self.rest_api.purgeRelationship(
            self.pid, "info:fedora/%s" % self.pid, force_text(modelns.hasModel), "info:fedora/pid:123"
        )
        self.assertEqual(purged, True)

        # purge non-existent rel on valid pid
        purged = self.rest_api.purgeRelationship(
            self.pid, "info:fedora/%s" % self.pid, self.rel_owner, "johndoe", isLiteral=True
        )
        self.assertFalse(purged)

        # bogus pid
        self.assertRaises(
            RequestFailed,
            self.rest_api.purgeRelationship,
            "bogus:pid",
            "info:fedora/bogus:pid",
            self.rel_owner,
            "johndoe",
            True,
        )
Пример #7
0
    def test_purgeRelationship(self):
        # add relation to purg
        self.rest_api.addRelationship(self.pid,
                                      'info:fedora/%s' % self.pid,
                                      predicate=force_text(modelns.hasModel),
                                      object='info:fedora/pid:123')

        print(self.pid)
        print(force_text(self.pid))
        print(type(self.pid))
        print(self.fedora_fixtures_ingested)

        purged = self.rest_api.purgeRelationship(self.pid,
                                                 'info:fedora/%s' % self.pid,
                                                 force_text(modelns.hasModel),
                                                 'info:fedora/pid:123')
        self.assertEqual(purged, True)

        # purge non-existent rel on valid pid
        purged = self.rest_api.purgeRelationship(self.pid,
                                                 'info:fedora/%s' % self.pid,
                                                 self.rel_owner,
                                                 'johndoe',
                                                 isLiteral=True)
        self.assertFalse(purged)

        # bogus pid
        self.assertRaises(RequestFailed, self.rest_api.purgeRelationship,
                          "bogus:pid", 'info:fedora/bogus:pid', self.rel_owner,
                          "johndoe", True)
Пример #8
0
 def test_encrypt_decrypt(text):
     encrypted = cryptutil.encrypt(text)
     self.assertNotEqual(text, encrypted,
                         "encrypted text should not match original")
     decrypted = cryptutil.decrypt(encrypted)
     self.assertEqual(
         text, force_text(decrypted),
         "decrypted text (%s) should match original encrypted text (%s)"
         % (force_text(decrypted), text))
Пример #9
0
    def test_ingest_without_pid(self):
        obj = self.loadFixtureData('basic-object.foxml')
        pid = self.repo.ingest(force_bytes(obj))
        self.assertTrue(pid)
        self.repo.purge_object(force_text(pid))

        # test ingesting with log message
        pid = self.repo.ingest(obj, "this is my test ingest message")
        # ingest message is stored in AUDIT datastream
        # - can currently only be accessed by retrieving entire object xml
        r = self.repo.api.getObjectXML(force_text(pid))
        self.assertTrue("this is my test ingest message" in r.text)
        purged = self.repo.purge_object(force_text(pid), "removing test ingest object")
        self.assertTrue(purged)
Пример #10
0
    def test_getRelationships(self):
        # add relations to retrieve
        self.rest_api.addRelationship(
            self.pid, "info:fedora/%s" % self.pid, force_text(modelns.hasModel), "info:fedora/pid:123", False
        )
        self.rest_api.addRelationship(self.pid, "info:fedora/%s" % self.pid, self.rel_owner, "johndoe", True)

        r = self.rest_api.getRelationships(self.pid)
        graph = parse_rdf(r.content, r.url)

        # check total number: fedora-system cmodel + two just added
        self.assertEqual(3, len(list(graph)))
        # newly added triples should be included in the graph
        self.assert_((URIRef("info:fedora/%s" % self.pid), modelns.hasModel, URIRef("info:fedora/pid:123")) in graph)

        self.assertEqual(
            "johndoe", str(graph.value(subject=URIRef("info:fedora/%s" % self.pid), predicate=URIRef(self.rel_owner)))
        )

        # get rels for a single predicate
        r = self.rest_api.getRelationships(self.pid, predicate=self.rel_owner)
        graph = parse_rdf(r.content, r.url)
        # should include just the one we asked for
        self.assertEqual(1, len(list(graph)))

        self.assertEqual(
            "johndoe", str(graph.value(subject=URIRef("info:fedora/%s" % self.pid), predicate=URIRef(self.rel_owner)))
        )
Пример #11
0
def curl_upload_file(filename):
    print("curl upload")
    conn = pycurl.Curl()
    auth = base64.b64encode(force_bytes("%s:%s" % (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)))
    headers = {"Authorization": "Basic %s" % force_text(auth)}
    conn.setopt(conn.URL, "%supload" % testsettings.FEDORA_ROOT_NONSSL)
    conn.setopt(pycurl.VERBOSE, 1)
    conn.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()])

    filesize = os.path.getsize(filename)
    widgets = [
        "Upload: ",
        progressbar.widgets.Percentage(),
        " ",
        progressbar.widgets.Bar(),
        " ",
        progressbar.widgets.ETA(),
        " ",
        progressbar.widgets.FileTransferSpeed(),
    ]
    # set initial progressbar size based on file; will be slightly larger because
    # of multipart boundary content
    pbar = progressbar.ProgressBar(widgets=widgets, maxval=filesize).start()

    def progress(dl_total, dl, up_total, up):
        # update the progressbar to actual maxval (content + boundary)
        pbar.max_value = up_total
        # update current status
        pbar.update(up)

    conn.setopt(
        conn.HTTPPOST,
        [
            (
                "file",
                (
                    # upload the contents of this file
                    conn.FORM_FILE,
                    filename,
                    # specify a different file name for the upload
                    conn.FORM_FILENAME,
                    "file",
                    # specify a different content type
                    # conn.FORM_CONTENTTYPE, 'application/x-python',
                ),
            )
        ],
    )
    # conn.setopt(conn.CURLOPT_READFUNCTION)
    conn.setopt(conn.XFERINFOFUNCTION, progress)
    conn.setopt(conn.NOPROGRESS, False)

    conn.perform()

    # HTTP response code, e.g. 200.
    print("Status: %d" % conn.getinfo(conn.RESPONSE_CODE))
    # Elapsed time for the transfer.
    print("Time: %f" % conn.getinfo(conn.TOTAL_TIME))

    conn.close()
Пример #12
0
    def test_getRelationships(self):
        # add relations to retrieve
        self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid,
                                      force_text(modelns.hasModel),
                                      "info:fedora/pid:123", False)
        self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid,
                                      self.rel_owner, "johndoe", True)

        r = self.rest_api.getRelationships(self.pid)
        graph = parse_rdf(r.content, r.url)

        # check total number: fedora-system cmodel + two just added
        self.assertEqual(3, len(list(graph)))
        # newly added triples should be included in the graph
        self.assert_((URIRef('info:fedora/%s' % self.pid), modelns.hasModel,
                      URIRef('info:fedora/pid:123')) in graph)

        self.assertEqual(
            'johndoe',
            str(
                graph.value(subject=URIRef('info:fedora/%s' % self.pid),
                            predicate=URIRef(self.rel_owner))))

        # get rels for a single predicate
        r = self.rest_api.getRelationships(self.pid, predicate=self.rel_owner)
        graph = parse_rdf(r.content, r.url)
        # should include just the one we asked for
        self.assertEqual(1, len(list(graph)))

        self.assertEqual(
            'johndoe',
            str(
                graph.value(subject=URIRef('info:fedora/%s' % self.pid),
                            predicate=URIRef(self.rel_owner))))
Пример #13
0
def index_data(request, id, repo=None):
    '''Return the fields and values to be indexed for a single object
    as JSON.  Index content is generated via
    :meth:`eulfedora.models.DigitalObject.index_data`.

    :param id: id of the object to be indexed; in this case a Fedora pid
    '''

    # Ensure permission to this resource is allowed. Currently based on IP only.
    if _permission_denied_check(request):
        return HttpResponseForbidden('Access to this web service was denied.', content_type='text/html')

    if repo is None:
        repo_opts = {}
        # if credentials are specified via Basic Auth, use them for Fedora access
        auth_info = request.META.get('HTTP_AUTHORIZATION', None)
        basic = 'Basic '
        if auth_info and auth_info.startswith(basic):
            basic_info = auth_info[len(basic):]
            basic_info_decoded = base64.b64decode(force_bytes(basic_info))
            # NOTE: codecs.decode works everywhere but python 3.3. which
            # complains about an unknown encoding
            # basic_info_decoded = codecs.decode(force_bytes(basic_info), 'base64')
            u, p = force_text(basic_info_decoded).split(':')
            repo_opts.update({'username': u, 'password': p})

        repo = TypeInferringRepository(**repo_opts)
    try:
        obj = repo.get_object(id)
        return HttpResponse(json.dumps(obj.index_data()),
                            content_type='application/json')
    except RequestFailed:
        # for now, treat any failure getting the object from Fedora as a 404
        # (could also potentially be a permission error)
        raise Http404
Пример #14
0
    def test_login_and_store_credentials_in_session(self):
        # only testing custom logic, which happens on POST
        # everything else is handled by django.contrib.auth
        mockrequest = Mock()
        mockrequest.method = 'POST'

        def not_logged_in(rqst):
            rqst.user.is_authenticated.return_value = False

        def set_logged_in(rqst):
            rqst.user.is_authenticated.return_value = True
            rqst.POST.get.return_value = "TEST_PASSWORD"

        # failed login
        with patch('eulfedora.views.authviews.login',
                   new=Mock(side_effect=not_logged_in)):
            mockrequest.session = dict()
            response = login_and_store_credentials_in_session(mockrequest)
            self.assert_(FEDORA_PASSWORD_SESSION_KEY not in mockrequest.session,
                         'user password for fedora should not be stored in session on failed login')

        # successful login
        with patch('eulfedora.views.authviews.login',
                   new=Mock(side_effect=set_logged_in)):
            response = login_and_store_credentials_in_session(mockrequest)
            self.assert_(FEDORA_PASSWORD_SESSION_KEY in mockrequest.session,
                         'user password for fedora should be stored in session on successful login')
            # test password stored in the mock request
            pwd = mockrequest.POST.get()
            # encrypted password stored in session
            sessionpwd = mockrequest.session[FEDORA_PASSWORD_SESSION_KEY]
            self.assertNotEqual(pwd, sessionpwd,
                                'password should not be stored in the session without encryption')
            self.assertEqual(pwd, force_text(cryptutil.decrypt(sessionpwd)),
                             'user password stored in session is encrypted')
Пример #15
0
def to_blocksize(password):
    # pad the text to create a string of acceptable block size for the encryption algorithm
    width = len(password) + \
        (EncryptionAlgorithm.block_size - len(password) % EncryptionAlgorithm.block_size)
    block = password.ljust(
        width, force_text(ENCRYPT_PAD_CHARACTER) if six.PY3 else ENCRYPT_PAD_CHARACTER)
    return block
Пример #16
0
    def test_addRelationship(self):
        # rel to resource
        added = self.rest_api.addRelationship(
            self.pid, "info:fedora/%s" % self.pid, force_text(modelns.hasModel), "info:fedora/pid:123", False
        )
        self.assertTrue(added)
        r = self.rest_api.getDatastreamDissemination(self.pid, "RELS-EXT")
        self.assert_("<hasModel" in r.text)
        self.assert_('rdf:resource="info:fedora/pid:123"' in r.text)

        # literal
        added = self.rest_api.addRelationship(self.pid, "info:fedora/%s" % self.pid, self.rel_owner, "johndoe", True)
        self.assertTrue(added)
        r = self.rest_api.getDatastreamDissemination(self.pid, "RELS-EXT")
        self.assert_("<owner" in r.text)
        self.assert_(">johndoe<" in r.text)

        # bogus pid
        self.assertRaises(
            RequestFailed,
            self.rest_api.addRelationship,
            "bogus:pid",
            "info:fedora/bogus:pid",
            self.rel_owner,
            "johndoe",
            True,
        )
Пример #17
0
    def test_addDatastream(self):
        # returns result from addDatastream call and info used for add
        ((added, msg), ds) = self._add_text_datastream()

        self.assertTrue(added)  # response from addDatastream
        r = self.rest_api.getObjectXML(self.pid)
        message = r.content
        self.assert_(ds['logMessage'] in force_text(message))
        r = self.rest_api.listDatastreams(self.pid)
        self.assert_('<datastream dsid="%(id)s" label="%(label)s" mimeType="%(mimeType)s" />'
            % ds in r.text)
        r = self.rest_api.getDatastream(self.pid, ds['id'])
        ds_profile = r.text
        self.assert_('dsID="%s"' % ds['id'] in ds_profile)
        self.assert_('<dsLabel>%s</dsLabel>' % ds['label'] in ds_profile)
        self.assert_('<dsVersionID>%s.0</dsVersionID>' % ds['id'] in ds_profile)
        self.assert_('<dsCreateDate>%s' % self.today in ds_profile)
        self.assert_('<dsState>A</dsState>' in ds_profile)
        self.assert_('<dsMIME>%s</dsMIME>' % ds['mimeType'] in ds_profile)
        self.assert_('<dsControlGroup>%s</dsControlGroup>' % ds['controlGroup'] in ds_profile)
        self.assert_('<dsVersionable>true</dsVersionable>' in ds_profile)

        # content returned from fedora should be exactly what we started with
        r = self.rest_api.getDatastreamDissemination(self.pid, ds['id'])
        self.assertEqual(self.TEXT_CONTENT, r.text)
Пример #18
0
def index_data(request, id, repo=None):
    '''Return the fields and values to be indexed for a single object
    as JSON.  Index content is generated via
    :meth:`eulfedora.models.DigitalObject.index_data`.

    :param id: id of the object to be indexed; in this case a Fedora pid
    '''

    #Ensure permission to this resource is allowed. Currently based on IP only.
    if _permission_denied_check(request):
        return HttpResponseForbidden('Access to this web service was denied.',
                                     content_type='text/html')

    if repo is None:
        repo_opts = {}
        # if credentials are specified via Basic Auth, use them for Fedora access
        auth_info = request.META.get('HTTP_AUTHORIZATION', None)
        basic = 'Basic '
        if auth_info and auth_info.startswith(basic):
            basic_info = auth_info[len(basic):]
            basic_info_decoded = codecs.decode(force_bytes(basic_info),
                                               'base64')
            u, p = force_text(basic_info_decoded).split(':')
            repo_opts.update({'username': u, 'password': p})

        repo = TypeInferringRepository(**repo_opts)
    try:
        obj = repo.get_object(id)
        return HttpResponse(json.dumps(obj.index_data()),
                            content_type='application/json')
    except RequestFailed:
        # for now, treat any failure getting the object from Fedora as a 404
        # (could also potentially be a permission error)
        raise Http404
Пример #19
0
def index_data(request, id, repo=None):
    """Return the fields and values to be indexed for a single object
    as JSON.  Index content is generated via
    :meth:`eulfedora.models.DigitalObject.index_data`.

    :param id: id of the object to be indexed; in this case a Fedora pid
    """

    # Ensure permission to this resource is allowed. Currently based on IP only.
    if _permission_denied_check(request):
        return HttpResponseForbidden("Access to this web service was denied.", content_type="text/html")

    if repo is None:
        repo_opts = {}
        # if credentials are specified via Basic Auth, use them for Fedora access
        auth_info = request.META.get("HTTP_AUTHORIZATION", None)
        basic = "Basic "
        if auth_info and auth_info.startswith(basic):
            basic_info = auth_info[len(basic) :]
            basic_info_decoded = codecs.decode(force_bytes(basic_info), "base64")
            u, p = force_text(basic_info_decoded).split(":")
            repo_opts.update({"username": u, "password": p})

        repo = TypeInferringRepository(**repo_opts)
    try:
        obj = repo.get_object(id)
        return HttpResponse(json.dumps(obj.index_data()), content_type="application/json")
    except RequestFailed:
        # for now, treat any failure getting the object from Fedora as a 404
        # (could also potentially be a permission error)
        raise Http404
Пример #20
0
 def ingestFixture(self, fname):
     obj = self.loadFixtureData(fname)
     pid = self.repo.ingest(force_text(obj))
     if pid:
         # we'd like this always to be true. if ingest fails we should
         # throw an exception. that probably hasn't been thoroughly
         # tested yet, though, so we'll check it until it has been.
         self.append_pid(pid)
Пример #21
0
def curl_upload_file(filename):
    print('curl upload')
    conn = pycurl.Curl()
    auth = base64.b64encode(
        force_bytes("%s:%s" %
                    (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)))
    headers = {'Authorization': 'Basic %s' % force_text(auth)}
    conn.setopt(conn.URL, '%supload' % testsettings.FEDORA_ROOT_NONSSL)
    conn.setopt(pycurl.VERBOSE, 1)
    conn.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()])

    filesize = os.path.getsize(filename)
    widgets = [
        'Upload: ',
        progressbar.widgets.Percentage(), ' ',
        progressbar.widgets.Bar(), ' ',
        progressbar.widgets.ETA(), ' ',
        progressbar.widgets.FileTransferSpeed()
    ]
    # set initial progressbar size based on file; will be slightly larger because
    # of multipart boundary content
    pbar = progressbar.ProgressBar(widgets=widgets, maxval=filesize).start()

    def progress(dl_total, dl, up_total, up):
        # update the progressbar to actual maxval (content + boundary)
        pbar.max_value = up_total
        # update current status
        pbar.update(up)

    conn.setopt(
        conn.HTTPPOST,
        [
            (
                'file',
                (
                    # upload the contents of this file
                    conn.FORM_FILE,
                    filename,
                    # specify a different file name for the upload
                    conn.FORM_FILENAME,
                    'file',
                    # specify a different content type
                    # conn.FORM_CONTENTTYPE, 'application/x-python',
                )),
        ])
    # conn.setopt(conn.CURLOPT_READFUNCTION)
    conn.setopt(conn.XFERINFOFUNCTION, progress)
    conn.setopt(conn.NOPROGRESS, False)

    conn.perform()

    # HTTP response code, e.g. 200.
    print('Status: %d' % conn.getinfo(conn.RESPONSE_CODE))
    # Elapsed time for the transfer.
    print('Time: %f' % conn.getinfo(conn.TOTAL_TIME))

    conn.close()
Пример #22
0
def to_blocksize(password):
    # pad the text to create a string of acceptable block size for the encryption algorithm
    width = len(password) + \
        (EncryptionAlgorithm.block_size - len(password) % EncryptionAlgorithm.block_size)
    block = password.ljust(
        width,
        force_text(ENCRYPT_PAD_CHARACTER)
        if six.PY3 else ENCRYPT_PAD_CHARACTER)
    return block
Пример #23
0
    def test_purgeRelationship(self):
        # add relation to purg
        self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid,
                                      predicate=force_text(modelns.hasModel),
                                      object='info:fedora/pid:123')

        purged = self.rest_api.purgeRelationship(self.pid, 'info:fedora/%s' % self.pid,
                                                 force_text(modelns.hasModel),
                                                 'info:fedora/pid:123')
        self.assertEqual(purged, True)

        # purge non-existent rel on valid pid
        purged = self.rest_api.purgeRelationship(self.pid, 'info:fedora/%s' % self.pid,
                                                 self.rel_owner, 'johndoe', isLiteral=True)
        self.assertFalse(purged)

        # bogus pid
        self.assertRaises(RequestFailed, self.rest_api.purgeRelationship, "bogus:pid",
                          'info:fedora/bogus:pid', self.rel_owner, "johndoe", True)
Пример #24
0
    def test_addDatastream(self):
        # returns result from addDatastream call and info used for add
        ((added, msg), ds) = self._add_text_datastream()

        self.assertTrue(added)  # response from addDatastream
        r = self.rest_api.getObjectXML(self.pid)
        message = r.content
        self.assert_(ds['logMessage'] in force_text(message))
        r = self.rest_api.listDatastreams(self.pid)
        self.assert_('<datastream dsid="%(id)s" label="%(label)s" mimeType="%(mimeType)s" />'
            % ds in r.text)
        r = self.rest_api.getDatastream(self.pid, ds['id'])
        ds_profile = r.text
        self.assert_('dsID="%s"' % ds['id'] in ds_profile)
        self.assert_('<dsLabel>%s</dsLabel>' % ds['label'] in ds_profile)
        self.assert_('<dsVersionID>%s.0</dsVersionID>' % ds['id'] in ds_profile)
        self.assert_('<dsCreateDate>%s' % self.today in ds_profile)
        self.assert_('<dsState>A</dsState>' in ds_profile)
        self.assert_('<dsMIME>%s</dsMIME>' % ds['mimeType'] in ds_profile)
        self.assert_('<dsControlGroup>%s</dsControlGroup>' % ds['controlGroup'] in ds_profile)
        self.assert_('<dsVersionable>true</dsVersionable>' in ds_profile)

        # content returned from fedora should be exactly what we started with
        r = self.rest_api.getDatastreamDissemination(self.pid, ds['id'])
        self.assertEqual(self.TEXT_CONTENT, r.text)

        # invalid checksum
        self.assertRaises(
            ChecksumMismatch, self.rest_api.addDatastream, self.pid,
            "TEXT2", "text datastream", mimeType="text/plain",
            logMessage="creating TEXT2", content='<some> text content</some>',
            checksum='totally-bogus-not-even-an-MD5', checksumType='MD5')

        # invalid checksum without a checksum type - warning, but no checksum mismatch
        with warnings.catch_warnings(record=True) as w:
            self.rest_api.addDatastream(
                self.pid, "TEXT2", "text datastream", mimeType="text/plain",
                logMessage="creating TEXT2", content='<some> text content</some>',
                checksum='totally-bogus-not-even-an-MD5', checksumType=None)
            self.assertEqual(1, len(w),
                'calling addDatastream with checksum but no checksum type should generate a warning')
            self.assert_('Fedora will ignore the checksum' in str(w[0].message))

        # attempt to add to a non-existent object
        FILE = tempfile.NamedTemporaryFile(mode="w", suffix=".txt")
        FILE.write("bogus")
        FILE.flush()

        with open(FILE.name) as textfile:
            self.assertRaises(RequestFailed, self.rest_api.addDatastream, 'bogus:pid',
              'TEXT', 'text datastream',
              mimeType='text/plain', logMessage='creating new datastream',
              controlGroup='M', content=textfile)

        FILE.close()
Пример #25
0
    def test_addDatastream(self):
        # returns result from addDatastream call and info used for add
        ((added, msg), ds) = self._add_text_datastream()

        self.assertTrue(added)  # response from addDatastream
        r = self.rest_api.getObjectXML(self.pid)
        message = r.content
        self.assert_(ds['logMessage'] in force_text(message))
        r = self.rest_api.listDatastreams(self.pid)
        self.assert_('<datastream dsid="%(id)s" label="%(label)s" mimeType="%(mimeType)s" />'
            % ds in r.text)
        r = self.rest_api.getDatastream(self.pid, ds['id'])
        ds_profile = r.text
        self.assert_('dsID="%s"' % ds['id'] in ds_profile)
        self.assert_('<dsLabel>%s</dsLabel>' % ds['label'] in ds_profile)
        self.assert_('<dsVersionID>%s.0</dsVersionID>' % ds['id'] in ds_profile)
        self.assert_('<dsCreateDate>%s' % self.today in ds_profile)
        self.assert_('<dsState>A</dsState>' in ds_profile)
        self.assert_('<dsMIME>%s</dsMIME>' % ds['mimeType'] in ds_profile)
        self.assert_('<dsControlGroup>%s</dsControlGroup>' % ds['controlGroup'] in ds_profile)
        self.assert_('<dsVersionable>true</dsVersionable>' in ds_profile)

        # content returned from fedora should be exactly what we started with
        r = self.rest_api.getDatastreamDissemination(self.pid, ds['id'])
        self.assertEqual(self.TEXT_CONTENT, r.text)

        # invalid checksum
        self.assertRaises(ChecksumMismatch, self.rest_api.addDatastream, self.pid,
            "TEXT2", "text datastream",  mimeType="text/plain", logMessage="creating TEXT2",
            content='<some> text content</some>', checksum='totally-bogus-not-even-an-MD5',
            checksumType='MD5')

        # invalid checksum without a checksum type - warning, but no checksum mismatch
        with warnings.catch_warnings(record=True) as w:
            self.rest_api.addDatastream(self.pid,
                "TEXT2", "text datastream",  mimeType="text/plain", logMessage="creating TEXT2",
                content='<some> text content</some>', checksum='totally-bogus-not-even-an-MD5',
                checksumType=None)
            self.assertEqual(1, len(w),
                'calling addDatastream with checksum but no checksum type should generate a warning')
            self.assert_('Fedora will ignore the checksum' in str(w[0].message))

        # attempt to add to a non-existent object
        FILE = tempfile.NamedTemporaryFile(mode="w", suffix=".txt")
        FILE.write("bogus")
        FILE.flush()

        with open(FILE.name) as textfile:
            self.assertRaises(RequestFailed, self.rest_api.addDatastream, 'bogus:pid',
              'TEXT', 'text datastream',
              mimeType='text/plain', logMessage='creating new datastream',
              controlGroup='M', content=textfile)

        FILE.close()
Пример #26
0
    def test_getDissemination(self):
        # testing with built-in fedora dissemination
        r = self.rest_api.getDissemination(self.pid, "fedora-system:3", "viewItemIndex")
        self.assert_("<title>Object Items HTML Presentation</title>" in r.text)
        self.assert_(self.pid in r.text)

        # return_http_response
        response = self.rest_api.getDissemination(self.pid, "fedora-system:3", "viewItemIndex")
        self.assert_(isinstance(response, requests.Response), "getDissemination should return a response object")
        # datastream content should still be accessible
        self.assert_(self.pid in force_text(response.content))
Пример #27
0
    def test_getDissemination(self):
        # testing with built-in fedora dissemination
        r = self.rest_api.getDissemination(self.pid, "fedora-system:3", "viewItemIndex")
        self.assert_('<title>Object Items HTML Presentation</title>' in r.text)
        self.assert_(self.pid in r.text)

        # return_http_response
        response = self.rest_api.getDissemination(self.pid, "fedora-system:3", "viewItemIndex")
        self.assert_(isinstance(response, requests.Response),
                     'getDissemination should return a response object')
        # datastream content should still be accessible
        self.assert_(self.pid in force_text(response.content))
Пример #28
0
 def testGetPredicates(self):
     # get all predicates for test object
     predicates = list(self.risearch.get_predicates(self.object.uri, None))
     self.assertTrue(force_text(modelns.hasModel) in predicates)
     self.assertTrue(self.rel_isMemberOf in predicates)
     self.assertTrue(self.rel_owner in predicates)
     # resource
     predicates = list(self.risearch.get_predicates(self.object.uri, self.related.uri))
     self.assertEqual(predicates[0], self.rel_isMemberOf)
     self.assertEqual(len(predicates), 1)
     # literal
     predicates = list(self.risearch.get_predicates(self.object.uri, "'testuser'"))
     self.assertEqual(predicates[0], self.rel_owner)
     self.assertEqual(len(predicates), 1)
Пример #29
0
    def test_index_data(self):
        # create a test object for testing index data view
        repo = Repository()
        testobj = repo.get_object(type=SimpleObject)
        testobj.label = 'test object'
        testobj.owner = 'tester'
        testobj.save()
        self.pids.append(testobj.pid)

        # test with request IP not allowed to access the service
        with override_settings(EUL_INDEXER_ALLOWED_IPS=['0.13.23.134']):
            response = index_data(self.request, testobj.pid)
            expected, got = 403, response.status_code
            self.assertEqual(expected, got,
                'Expected %s but returned %s for index_data view with request IP not in configured list' \
                % (expected, got))

        # test with request IP allowed to hit the service
        with override_settings(EUL_INDEXER_ALLOWED_IPS=[self.request_ip]):
            response = index_data(self.request, testobj.pid)
            expected, got = 200, response.status_code
            self.assertEqual(expected, got,
                'Expected %s but returned %s for index_data view' \
                % (expected, got))
            expected, got = 'application/json', response['Content-Type']
            self.assertEqual(expected, got,
                'Expected %s but returned %s for mimetype on index_data view' \
                % (expected, got))
            response_data = json.loads(response.content.decode('utf-8'))
            self.assertEqual(
                testobj.index_data(), response_data,
                'Response content loaded from JSON should be equal to object indexdata'
            )

            # test with basic auth
            testuser, testpass = '******', 'testpass'
            token = base64.b64encode(
                force_bytes('%s:%s' % (testuser, testpass)))
            self.request.META['HTTP_AUTHORIZATION'] = 'Basic %s' % force_text(
                token)
            with patch('eulfedora.indexdata.views.TypeInferringRepository'
                       ) as typerepo:
                typerepo.return_value.get_object.return_value.index_data.return_value = {}
                index_data(self.request, testobj.pid)
                typerepo.assert_called_with(username=testuser,
                                            password=testpass)

            # non-existent pid should generate a 404
            self.assertRaises(Http404, index_data, self.request,
                              'bogus:testpid')
Пример #30
0
 def testGetPredicates(self):
     # get all predicates for test object
     predicates = list(self.risearch.get_predicates(self.object.uri, None))
     self.assertTrue(force_text(modelns.hasModel) in predicates)
     self.assertTrue(self.rel_isMemberOf in predicates)
     self.assertTrue(self.rel_owner in predicates)
     # resource
     predicates = list(self.risearch.get_predicates(self.object.uri, self.related.uri))
     self.assertEqual(predicates[0], self.rel_isMemberOf)
     self.assertEqual(len(predicates), 1)
     # literal
     predicates = list(self.risearch.get_predicates(self.object.uri, "'testuser'"))
     self.assertEqual(predicates[0], self.rel_owner)
     self.assertEqual(len(predicates), 1)
Пример #31
0
    def test_ingest(self):
        obj = self.loadFixtureData('basic-object.foxml')
        r = self.rest_api.ingest(obj)
        pid = r.content
        self.assertTrue(pid)
        self.rest_api.purgeObject(force_text(pid))

        # test ingesting with log message
        r = self.rest_api.ingest(obj, "this is my test ingest message")
        pid = r.text
        # ingest message is stored in AUDIT datastream
        # - can currently only be accessed by retrieving entire object xml
        r = self.rest_api.getObjectXML(pid)
        self.assertTrue("this is my test ingest message" in r.text)
        self.rest_api.purgeObject(pid, "removing test ingest object")
Пример #32
0
    def test_ingest(self):
        obj = self.loadFixtureData("basic-object.foxml")
        r = self.rest_api.ingest(obj)
        pid = r.content
        self.assertTrue(pid)
        self.rest_api.purgeObject(force_text(pid))

        # test ingesting with log message
        r = self.rest_api.ingest(obj, "this is my test ingest message")
        pid = r.text
        # ingest message is stored in AUDIT datastream
        # - can currently only be accessed by retrieving entire object xml
        r = self.rest_api.getObjectXML(pid)
        self.assertTrue("this is my test ingest message" in r.text)
        self.rest_api.purgeObject(pid, "removing test ingest object")
Пример #33
0
def curl_download_file(pid, dsid):
    repo = Repository(testsettings.FEDORA_ROOT_NONSSL,
                      testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)
    obj = repo.get_object(pid)
    ds = obj.getDatastreamObject(dsid)

    tmpfile = tempfile.NamedTemporaryFile(prefix='%s-%s_' % (pid, dsid),
                                          delete=False)
    print('writing to ', tmpfile.name)

    widgets = [
        'Download: ',
        progressbar.widgets.Percentage(), ' ',
        progressbar.widgets.Bar(), ' ',
        progressbar.widgets.ETA(), ' ',
        progressbar.widgets.FileTransferSpeed()
    ]
    # set initial progressbar size based on file; will be slightly larger because
    # of multipart boundary content
    pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start()

    def progress(dl_total, dl, up_total, up):
        # update current status
        pbar.update(dl)

    c = pycurl.Curl()
    auth = base64.b64encode(
        force_bytes("%s:%s" %
                    (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)))
    headers = {'Authorization': 'Basic %s' % force_text(auth)}
    c.setopt(pycurl.VERBOSE, 1)
    c.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()])

    # /objects/{pid}/datastreams/{dsID}/content ? [asOfDateTime] [download]
    c.setopt(c.URL, '%sobjects/%s/datastreams/%s/content' % \
        (testsettings.FEDORA_ROOT_NONSSL, pid, dsid))
    # c.setopt(c.WRITEDATA, buffer)
    c.setopt(c.WRITEFUNCTION, tmpfile.write)
    c.setopt(c.XFERINFOFUNCTION, progress)
    c.setopt(c.NOPROGRESS, False)
    c.perform()

    # HTTP response code, e.g. 200.
    print('Status: %d' % c.getinfo(c.RESPONSE_CODE))
    # Elapsed time for the transfer.
    print('Time: %f' % c.getinfo(c.TOTAL_TIME))

    c.close()
Пример #34
0
    def test_index_data(self):
        # create a test object for testing index data view
        repo = Repository()
        testobj = repo.get_object(type=SimpleObject)
        testobj.label = "test object"
        testobj.owner = "tester"
        testobj.save()
        self.pids.append(testobj.pid)

        # test with request IP not allowed to access the service
        with override_settings(EUL_INDEXER_ALLOWED_IPS=["0.13.23.134"]):
            response = index_data(self.request, testobj.pid)
            expected, got = 403, response.status_code
            self.assertEqual(
                expected,
                got,
                "Expected %s but returned %s for index_data view with request IP not in configured list"
                % (expected, got),
            )

        # test with request IP allowed to hit the service
        with override_settings(EUL_INDEXER_ALLOWED_IPS=[self.request_ip]):
            response = index_data(self.request, testobj.pid)
            expected, got = 200, response.status_code
            self.assertEqual(expected, got, "Expected %s but returned %s for index_data view" % (expected, got))
            expected, got = "application/json", response["Content-Type"]
            self.assertEqual(
                expected, got, "Expected %s but returned %s for mimetype on index_data view" % (expected, got)
            )
            response_data = json.loads(response.content.decode("utf-8"))
            self.assertEqual(
                testobj.index_data(),
                response_data,
                "Response content loaded from JSON should be equal to object indexdata",
            )

            # test with basic auth
            testuser, testpass = "******", "testpass"
            token = base64.b64encode(force_bytes("%s:%s" % (testuser, testpass)))
            self.request.META["HTTP_AUTHORIZATION"] = "Basic %s" % force_text(token)
            with patch("eulfedora.indexdata.views.TypeInferringRepository") as typerepo:
                typerepo.return_value.get_object.return_value.index_data.return_value = {}
                index_data(self.request, testobj.pid)
                typerepo.assert_called_with(username=testuser, password=testpass)

            # non-existent pid should generate a 404
            self.assertRaises(Http404, index_data, self.request, "bogus:testpid")
Пример #35
0
    def get_datastream_info(self, dsinfo):
        '''Use regular expressions to pull datastream [version]
        details (id, mimetype, size, and checksum) for binary content,
        in order to sanity check the decoded data.

        :param dsinfo: text content just before a binaryContent tag
        :returns: dict with keys for id, mimetype, size, type and digest,
            or None if no match is found
        '''
        # we only need to look at the end of this section of content
        dsinfo = dsinfo[-250:]
        # if not enough content is present, include the end of
        # the last read chunk, if available
        if len(dsinfo) < 250 and self.end_of_last_chunk is not None:
            dsinfo = self.end_of_last_chunk + dsinfo

        infomatch = self.dsinfo_regex.search(force_text(dsinfo))
        if infomatch:
            return infomatch.groupdict()
Пример #36
0
    def get_datastream_info(self, dsinfo):
        '''Use regular expressions to pull datastream [version]
        details (id, mimetype, size, and checksum) for binary content,
        in order to sanity check the decoded data.

        :param dsinfo: text content just before a binaryContent tag
        :returns: dict with keys for id, mimetype, size, type and digest,
            or None if no match is found
        '''
        # we only need to look at the end of this section of content
        dsinfo = dsinfo[-250:]
        # if not enough content is present, include the end of
        # the last read chunk, if available
        if len(dsinfo) < 250 and self.end_of_last_chunk is not None:
            dsinfo = self.end_of_last_chunk + dsinfo

        infomatch = self.dsinfo_regex.search(force_text(dsinfo))
        if infomatch:
            return infomatch.groupdict()
Пример #37
0
def curl_download_file(pid, dsid):
    repo = Repository(testsettings.FEDORA_ROOT_NONSSL, testsettings.FEDORA_USER,
                      testsettings.FEDORA_PASSWORD)
    obj = repo.get_object(pid)
    ds = obj.getDatastreamObject(dsid)

    tmpfile = tempfile.NamedTemporaryFile(
        prefix='%s-%s_' % (pid, dsid), delete=False)
    print('writing to ', tmpfile.name)

    widgets = ['Download: ', progressbar.widgets.Percentage(), ' ',
               progressbar.widgets.Bar(), ' ', progressbar.widgets.ETA(),
               ' ', progressbar.widgets.FileTransferSpeed()]
    # set initial progressbar size based on file; will be slightly larger because
    # of multipart boundary content
    pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start()

    def progress(dl_total, dl, up_total, up):
        # update current status
        pbar.update(dl)

    c = pycurl.Curl()
    auth = base64.b64encode(force_bytes("%s:%s" % (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)))
    headers = {'Authorization' : 'Basic %s' % force_text(auth)}
    c.setopt(pycurl.VERBOSE, 1)
    c.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()])

            # /objects/{pid}/datastreams/{dsID}/content ? [asOfDateTime] [download]
    c.setopt(c.URL, '%sobjects/%s/datastreams/%s/content' % \
        (testsettings.FEDORA_ROOT_NONSSL, pid, dsid))
    # c.setopt(c.WRITEDATA, buffer)
    c.setopt(c.WRITEFUNCTION, tmpfile.write)
    c.setopt(c.XFERINFOFUNCTION, progress)
    c.setopt(c.NOPROGRESS, False)
    c.perform()

    # HTTP response code, e.g. 200.
    print('Status: %d' % c.getinfo(c.RESPONSE_CODE))
    # Elapsed time for the transfer.
    print('Time: %f' % c.getinfo(c.TOTAL_TIME))

    c.close()
Пример #38
0
    def test_login_and_store_credentials_in_session(self):
        # only testing custom logic, which happens on POST
        # everything else is handled by django.contrib.auth
        mockrequest = Mock()
        mockrequest.method = 'POST'

        def not_logged_in(rqst):
            rqst.user.is_authenticated.return_value = False

        def set_logged_in(rqst):
            rqst.user.is_authenticated.return_value = True
            rqst.POST.get.return_value = "TEST_PASSWORD"

        # failed login
        with patch('eulfedora.views.authviews.login',
                   new=Mock(side_effect=not_logged_in)):
            mockrequest.session = dict()
            response = login_and_store_credentials_in_session(mockrequest)
            self.assert_(
                FEDORA_PASSWORD_SESSION_KEY not in mockrequest.session,
                'user password for fedora should not be stored in session on failed login'
            )

        # successful login
        with patch('eulfedora.views.authviews.login',
                   new=Mock(side_effect=set_logged_in)):
            response = login_and_store_credentials_in_session(mockrequest)
            self.assert_(
                FEDORA_PASSWORD_SESSION_KEY in mockrequest.session,
                'user password for fedora should be stored in session on successful login'
            )
            # test password stored in the mock request
            pwd = mockrequest.POST.get()
            # encrypted password stored in session
            sessionpwd = mockrequest.session[FEDORA_PASSWORD_SESSION_KEY]
            self.assertNotEqual(
                pwd, sessionpwd,
                'password should not be stored in the session without encryption'
            )
            self.assertEqual(pwd, force_text(cryptutil.decrypt(sessionpwd)),
                             'user password stored in session is encrypted')
Пример #39
0
    def test_addRelationship(self):
        # rel to resource
        added = self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid,
                                              force_text(modelns.hasModel),
                                              'info:fedora/pid:123', False)
        self.assertTrue(added)
        r = self.rest_api.getDatastreamDissemination(self.pid, 'RELS-EXT')
        self.assert_('<hasModel' in r.text)
        self.assert_('rdf:resource="info:fedora/pid:123"' in r.text)

        # literal
        added = self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid,
                                              self.rel_owner, "johndoe", True)
        self.assertTrue(added)
        r = self.rest_api.getDatastreamDissemination(self.pid, 'RELS-EXT')
        self.assert_('<owner' in r.text)
        self.assert_('>johndoe<' in r.text)

        # bogus pid
        self.assertRaises(RequestFailed, self.rest_api.addRelationship,
            'bogus:pid', 'info:fedora/bogus:pid', self.rel_owner, 'johndoe', True)
Пример #40
0
    def test_addRelationship(self):
        # rel to resource
        added = self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid,
                                              force_text(modelns.hasModel),
                                              'info:fedora/pid:123', False)
        self.assertTrue(added)
        r = self.rest_api.getDatastreamDissemination(self.pid, 'RELS-EXT')
        self.assert_('<hasModel' in r.text)
        self.assert_('rdf:resource="info:fedora/pid:123"' in r.text)

        # literal
        added = self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid,
                                              self.rel_owner, "johndoe", True)
        self.assertTrue(added)
        r = self.rest_api.getDatastreamDissemination(self.pid, 'RELS-EXT')
        self.assert_('<owner' in r.text)
        self.assert_('>johndoe<' in r.text)

        # bogus pid
        self.assertRaises(RequestFailed, self.rest_api.addRelationship,
            'bogus:pid', 'info:fedora/bogus:pid', self.rel_owner, 'johndoe', True)
Пример #41
0
 def append_pid(self, pid):
         self.fedora_fixtures_ingested.append(force_text(pid))
Пример #42
0
def sync_object(
    src_obj, dest_repo, export_context="migrate", overwrite=False, show_progress=False, requires_auth=False
):
    """Copy an object from one repository to another using the Fedora
    export functionality.

    :param src_obj: source :class:`~eulfedora.models.DigitalObject` to
        be copied
    :param dest_repo: destination  :class:`~eulfedora.server.Repository`
        where the object will be copied to
    :param export_context: Fedora export format to use, one of "migrate"
        or "archive"; migrate is generally faster, but requires access
        from destination repository to source and may result in checksum
        errors for some content; archive exports take longer to process
        (default: migrate)
    :param overwrite: if an object with the same pid is already present
        in the destination repository, it will be removed only if
        overwrite is set to true (default: false)
    :param show_progress: if True, displays a progress bar with content size,
        progress, speed, and ETA (only applicable to archive exports)
    :param requires_auth: content datastreams require authentication,
        and should have credentials patched in (currently only supported
        in archive-xml export mode)  (default: False)
    :returns: result of Fedora ingest on the destination repository on
        success
    """

    # NOTE: currently exceptions are expected to be handled by the
    # calling method; see repo-cp script for an example

    if show_progress and progressbar:
        # calculate rough estimate of object size
        size_estimate = estimate_object_size(src_obj, archive=(export_context in ["archive", "archive-xml"]))
        # create a new progress bar with current pid and size
        widgets = [
            src_obj.pid,
            " Estimated size: %s // " % humanize_file_size(size_estimate),
            "Read: ",
            progressbar.widgets.DataSize(),
            " ",
            progressbar.widgets.AdaptiveTransferSpeed(),
            " ",
            "| Uploaded: ",
            progressbar.widgets.DataSize(value="upload"),
            " // ",
            # FileTransferSpeed('upload'), currently no way to track upload speed...
            progressbar.widgets.Timer(),
            " | ",
            progressbar.widgets.AdaptiveETA(),
        ]

        class DownUpProgressBar(progressbar.ProgressBar):
            upload = 0

            def data(self):
                data = super(DownUpProgressBar, self).data()
                data["upload"] = self.upload
                return data

        pbar = DownUpProgressBar(widgets=widgets, max_value=size_estimate)
    else:
        pbar = None

    # migrate export can simply be read and uploaded to dest fedora
    if export_context == "migrate":
        response = src_obj.api.export(src_obj, context=export_context, stream=True)
        export_data = response.iter_content(4096 * 1024)

    # archive export needs additional processing to handle large binary content
    elif export_context in ["archive", "archive-xml"]:
        export = ArchiveExport(
            src_obj,
            dest_repo,
            progress_bar=pbar,
            requires_auth=requires_auth,
            xml_only=(export_context == "archive-xml"),
        )
        # NOTE: should be possible to pass BytesIO to be read, but that is failing
        export_data = export.object_data().getvalue()

    else:
        raise Exception("Unsupported export context %s", export_context)

    dest_obj = dest_repo.get_object(src_obj.pid)
    if dest_obj.exists:
        if overwrite:
            dest_repo.purge_object(src_obj.pid)
        else:
            # exception ?
            return False

    result = dest_repo.ingest(export_data)
    if pbar:
        pbar.finish()
    return force_text(result)
Пример #43
0
def sync_object(src_obj,
                dest_repo,
                export_context='migrate',
                overwrite=False,
                show_progress=False,
                requires_auth=False):
    '''Copy an object from one repository to another using the Fedora
    export functionality.

    :param src_obj: source :class:`~eulfedora.models.DigitalObject` to
        be copied
    :param dest_repo: destination  :class:`~eulfedora.server.Repository`
        where the object will be copied to
    :param export_context: Fedora export format to use, one of "migrate"
        or "archive"; migrate is generally faster, but requires access
        from destination repository to source and may result in checksum
        errors for some content; archive exports take longer to process
        (default: migrate)
    :param overwrite: if an object with the same pid is already present
        in the destination repository, it will be removed only if
        overwrite is set to true (default: false)
    :param show_progress: if True, displays a progress bar with content size,
        progress, speed, and ETA (only applicable to archive exports)
    :param requires_auth: content datastreams require authentication,
        and should have credentials patched in (currently only supported
        in archive-xml export mode)  (default: False)
    :returns: result of Fedora ingest on the destination repository on
        success
    '''

    # NOTE: currently exceptions are expected to be handled by the
    # calling method; see repo-cp script for an example

    if show_progress and progressbar:
        # calculate rough estimate of object size
        size_estimate = estimate_object_size(
            src_obj, archive=(export_context in ['archive', 'archive-xml']))
        # create a new progress bar with current pid and size
        widgets = [
            src_obj.pid,
            ' Estimated size: %s // ' % humanize_file_size(size_estimate),
            'Read: ',
            progressbar.widgets.DataSize(),
            ' ',
            progressbar.widgets.AdaptiveTransferSpeed(),
            ' ',
            '| Uploaded: ',
            progressbar.widgets.DataSize(value='upload'),
            ' // ',
            # FileTransferSpeed('upload'), currently no way to track upload speed...
            progressbar.widgets.Timer(),
            ' | ',
            progressbar.widgets.AdaptiveETA()
        ]

        class DownUpProgressBar(progressbar.ProgressBar):
            upload = 0

            def data(self):
                data = super(DownUpProgressBar, self).data()
                data['upload'] = self.upload
                return data

        pbar = DownUpProgressBar(widgets=widgets, max_value=size_estimate)
    else:
        pbar = None

    # migrate export can simply be read and uploaded to dest fedora
    if export_context == 'migrate':
        response = src_obj.api.export(src_obj,
                                      context=export_context,
                                      stream=True)
        export_data = response.iter_content(4096 * 1024)

    # archive export needs additional processing to handle large binary content
    elif export_context in ['archive', 'archive-xml']:
        export = ArchiveExport(src_obj,
                               dest_repo,
                               progress_bar=pbar,
                               requires_auth=requires_auth,
                               xml_only=(export_context == 'archive-xml'))
        # NOTE: should be possible to pass BytesIO to be read, but that is failing
        export_data = export.object_data().getvalue()

    else:
        raise Exception('Unsupported export context %s', export_context)

    dest_obj = dest_repo.get_object(src_obj.pid)
    if dest_obj.exists:
        if overwrite:
            dest_repo.purge_object(src_obj.pid)
        else:
            # exception ?
            return False

    result = dest_repo.ingest(export_data)
    if pbar:
        pbar.finish()
    return force_text(result)
Пример #44
0
def sync_object(src_obj, dest_repo, export_context='migrate',
                overwrite=False, show_progress=False,
                requires_auth=False, omit_checksums=False):
    '''Copy an object from one repository to another using the Fedora
    export functionality.

    :param src_obj: source :class:`~eulfedora.models.DigitalObject` to
        be copied
    :param dest_repo: destination  :class:`~eulfedora.server.Repository`
        where the object will be copied to
    :param export_context: Fedora export format to use, one of "migrate"
        or "archive"; migrate is generally faster, but requires access
        from destination repository to source and may result in checksum
        errors for some content; archive exports take longer to process
        (default: migrate)
    :param overwrite: if an object with the same pid is already present
        in the destination repository, it will be removed only if
        overwrite is set to true (default: false)
    :param show_progress: if True, displays a progress bar with content size,
        progress, speed, and ETA (only applicable to archive exports)
    :param requires_auth: content datastreams require authentication,
        and should have credentials patched in (currently only supported
        in archive-xml export mode)  (default: False)
    :param omit_checksums: scrubs contentDigest -- aka checksums -- from datastreams;
        helpful for datastreams with Redirect (R) or External (E) contexts
        (default: False)
    :returns: result of Fedora ingest on the destination repository on
        success
    '''

    # NOTE: currently exceptions are expected to be handled by the
    # calling method; see repo-cp script for an example

    if show_progress and progressbar:
        # calculate rough estimate of object size
        size_estimate = estimate_object_size(src_obj,
            archive=(export_context in ['archive', 'archive-xml']))
        # create a new progress bar with current pid and size
        widgets = [src_obj.pid,
            ' Estimated size: %s // ' % humanize_file_size(size_estimate),
            'Read: ', progressbar.widgets.DataSize(), ' ',
            progressbar.widgets.AdaptiveTransferSpeed(), ' ',
            '| Uploaded: ', progressbar.widgets.DataSize(value='upload'), ' // ',
            # FileTransferSpeed('upload'), currently no way to track upload speed...
             progressbar.widgets.Timer(), ' | ', progressbar.widgets.AdaptiveETA()
            ]

        class DownUpProgressBar(progressbar.ProgressBar):
            upload = 0
            def data(self):
                data = super(DownUpProgressBar, self).data()
                data['upload'] = self.upload
                return data

        pbar = DownUpProgressBar(widgets=widgets, max_value=size_estimate)
    else:
        pbar = None

    # migrate export can simply be read and uploaded to dest fedora
    if export_context == 'migrate':
        response = src_obj.api.export(src_obj, context=export_context, stream=True)
        export_data = response.iter_content(4096*1024)

    # archive export needs additional processing to handle large binary content
    elif export_context in ['archive', 'archive-xml']:
        export = ArchiveExport(src_obj, dest_repo,
            progress_bar=pbar, requires_auth=requires_auth,
            xml_only=(export_context == 'archive-xml'))
        # NOTE: should be possible to pass BytesIO to be read, but that is failing
        export_data = export.object_data().getvalue()

    else:
        raise Exception('Unsupported export context %s', export_context)

    # wipe checksums from FOXML if flagged in options
    if omit_checksums:
        checksum_re = r'<foxml:contentDigest.+?/>'
        try:
            # export data is either a string
            export_data = re.sub(checksum_re, '', export_data)
        except TypeError:
            # or a generator
            export_data = (re.sub(checksum_re, '', chunk)
                           for chunk in export_data)

    dest_obj = dest_repo.get_object(src_obj.pid)
    if dest_obj.exists:
        if overwrite:
            dest_repo.purge_object(src_obj.pid)
        else:
            # exception ?
            return False

    result = dest_repo.ingest(export_data)
    if pbar:
        pbar.finish()
    return force_text(result)
Пример #45
0
 def test_upload_string(self):
     data = "Here is some temporary content to upload to fedora."
     upload_id = self.rest_api.upload(data)
     # current format looks like uploaded://####
     pattern = re.compile("uploaded://[0-9]+")
     self.assert_(pattern.match(force_text(upload_id)))
Пример #46
0
    def test_raw_datastream_old(self):
        rqst = Mock()
        rqst.method = 'GET'
        # return empty headers for ETag condition check
        rqst.META = {}
        # rqst.META.get.return_value = None

        # DC
        response = raw_datastream_old(rqst, self.obj.pid, 'DC')
        expected, got = 200, response.status_code
        content = force_text(response.content)
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream_old view of DC' \
                % (expected, got))
        expected, got = 'text/xml', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream_old view of DC' \
                % (expected, got))
        self.assertEqual(self.obj.dc.checksum, response['ETag'],
            'datastream checksum should be set as ETag header in the response')
        self.assertEqual(self.obj.dc.checksum, response['Content-MD5'])
        self.assert_('<dc:title>%s</dc:title>' % self.obj.dc.content.title in content)

        # RELS-EXT
        response = raw_datastream_old(rqst, self.obj.pid, 'RELS-EXT')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream_old view of RELS-EXT' \
                % (expected, got))
        expected, got = 'application/rdf+xml', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream_old view of RELS-EXT' \
                % (expected, got))

        # TEXT  (non-xml content)
        response = raw_datastream_old(rqst, self.obj.pid, 'TEXT')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream_old view of TEXT' \
                % (expected, got))
        expected, got = 'text/plain', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream_old view of TEXT' \
                % (expected, got))
        # non-xml datastreams should have content-md5 & content-length headers
        self.assertEqual(self.obj.text.checksum, response['Content-MD5'],
            'datastream checksum should be set as Content-MD5 header in the response')
        self.assertEqual(len(self.obj.text.content), int(response['Content-Length']))

        # IMAGE (binary content)
        response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream_old view of IMAGE' \
                % (expected, got))
        expected, got = 'image/png', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream_old view of IMAGE' \
                % (expected, got))
        # non-xml datastreams should have content-md5 & content-length headers
        self.assertEqual(self.obj.image.checksum, response['Content-MD5'],
            'datastream checksum should be set as Content-MD5 header in the response')
        self.assertTrue(response.has_header('Content-Length'),
            'content-length header should be set in the response for binary datastreams')
        self.assert_(isinstance(response, HttpResponse))

        # streaming
        response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE', streaming=True)
        self.assert_(isinstance(response, StreamingHttpResponse))

        # non-existent datastream should 404
        self.assertRaises(Http404, raw_datastream_old, rqst, self.obj.pid, 'BOGUS-DSID')

        # non-existent record should 404
        self.assertRaises(Http404, raw_datastream_old, rqst, 'bogus-pid:1', 'DC')

        # check type handling?

        # set extra headers in the response
        extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'}
        response = raw_datastream_old(rqst, self.obj.pid, 'TEXT',
            headers=extra_headers)
        self.assertTrue(response.has_header('Content-Disposition'))
        self.assertEqual(response['Content-Disposition'], extra_headers['Content-Disposition'])

        # explicitly support GET and HEAD requests only
        rqst.method = 'POST'
        response = raw_datastream_old(rqst, self.obj.pid, 'DC')
        expected, got = 405, response.status_code
        self.assertEqual(expected, got,
            'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \
                % (expected, got))

        # HEAD request is handled internally, for efficiency
        rqst.method = 'HEAD'
        response = raw_datastream_old(rqst, self.obj.pid, 'DC')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for HEAD request on raw_datastream_old view' \
                % (expected, got))
        self.assertEqual(b'', response.content)
Пример #47
0
    def test_raw_datastream(self):
        # tests for new version of raw_datastream introduced in 1.5,
        # based on old raw_datastream tests

        rqst = Mock()
        rqst.method = 'GET'
        # return empty headers for ETag condition check
        rqst.META = {}

        # DC
        response = raw_datastream(rqst, self.obj.pid, 'DC')
        expected, got = 200, response.status_code
        content = b''.join(c for c in response.streaming_content)
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream view of DC' \
                % (expected, got))
        expected, got = 'text/xml', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream view of DC' \
                % (expected, got))
        self.assertEqual(
            self.obj.dc.checksum, response['ETag'],
            'datastream checksum should be set as ETag header in the response')
        self.assertEqual(self.obj.dc.checksum, response['Content-MD5'])
        self.assert_('<dc:title>%s</dc:title>' %
                     self.obj.dc.content.title in force_text(content))

        # RELS-EXT
        response = raw_datastream(rqst, self.obj.pid, 'RELS-EXT')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream view of RELS-EXT' \
                % (expected, got))
        expected, got = 'application/rdf+xml', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream view of RELS-EXT' \
                % (expected, got))

        # TEXT  (non-xml content)
        response = raw_datastream(rqst, self.obj.pid, 'TEXT')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream view of TEXT' \
                % (expected, got))
        expected, got = 'text/plain', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream view of TEXT' \
                % (expected, got))
        # non-xml datastreams should have content-md5 & content-length headers
        self.assertEqual(
            self.obj.text.checksum, response['Content-MD5'],
            'datastream checksum should be set as Content-MD5 header in the response'
        )
        self.assertEqual(len(self.obj.text.content),
                         int(response['Content-Length']))

        # IMAGE (binary content)
        response = raw_datastream(rqst, self.obj.pid, 'IMAGE')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream view of IMAGE' \
                % (expected, got))
        expected, got = 'image/png', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream view of IMAGE' \
                % (expected, got))
        # non-xml datastreams should have content-md5 & content-length headers
        self.assertEqual(
            self.obj.image.checksum, response['Content-MD5'],
            'datastream checksum should be set as Content-MD5 header in the response'
        )
        self.assertTrue(
            response.has_header('Content-Length'),
            'content-length header should be set in the response for binary datastreams'
        )
        self.assert_(isinstance(response, StreamingHttpResponse))

        # non-existent datastream should 404
        self.assertRaises(Http404, raw_datastream, rqst, self.obj.pid,
                          'BOGUS-DSID')

        # non-existent record should 404
        self.assertRaises(Http404, raw_datastream, rqst, 'bogus-pid:1', 'DC')

        # set extra headers in the response
        extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'}
        response = raw_datastream_old(rqst,
                                      self.obj.pid,
                                      'TEXT',
                                      headers=extra_headers)
        self.assertTrue(response.has_header('Content-Disposition'))
        self.assertEqual(response['Content-Disposition'],
                         extra_headers['Content-Disposition'])

        # explicitly support GET and HEAD requests only
        rqst.method = 'POST'
        response = raw_datastream(rqst, self.obj.pid, 'DC')
        expected, got = 405, response.status_code
        self.assertEqual(expected, got,
            'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \
                % (expected, got))

        # test HEAD request
        rqst.method = 'HEAD'
        response = raw_datastream(rqst, self.obj.pid, 'DC')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for HEAD request on raw_datastream view' \
                % (expected, got))
        self.assert_(isinstance(response, HttpResponse))
        self.assertEqual(b'', response.content)

        # test that range requests are passed through to fedora

        # use IMAGE for testing since it is binary content
        # set range header in the request; bytes=0- : entire datastream
        rqst.META['HTTP_RANGE'] = 'bytes=0-'
        rqst.method = 'GET'

        response = raw_datastream(rqst, self.obj.pid, 'IMAGE')
        expected, got = 206, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream range request' \
                % (expected, got))
        content = b''.join(c for c in response.streaming_content)
        self.assertEqual(self.obj.image.size, len(content),
            'range request of bytes=0- should return entire content (expected %d, got %d)' \
            % (self.obj.image.size, len(content)))
        self.assertEqual(self.obj.image.size, int(response['Content-Length']),
            'content-length header should be size of entire content (expected %d, got %d)' \
            % (self.obj.image.size, int(response['Content-Length'])))
        expected = 'bytes 0-%d/%d' % (self.obj.image.size - 1,
                                      self.obj.image.size)
        self.assertEqual(expected, response['Content-Range'],
            'content range response header should indicate bytes returned (expected %s, got %s)' \
            % (expected, response['Content-Range']))
        del response

        # set range request for partial beginning content; bytes=0-150
        bytes_requested = 'bytes=0-150'
        rqst.META['HTTP_RANGE'] = bytes_requested
        response = raw_datastream(rqst, self.obj.pid, 'IMAGE')
        expected, got = 206, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream range request' \
                % (expected, got))
        content_len = 151
        content = b''.join(c for c in response.streaming_content)
        self.assertEqual(content_len, len(content),
            'range request of %s should return %d bytes, got %d' \
            % (bytes_requested, content_len, len(content)))
        self.assertEqual(content_len, int(response['Content-Length']),
            'content-length header should be set to partial size %d (got %d)' \
            % (content_len, int(response['Content-Length'])))
        expected = 'bytes 0-150/%d' % self.obj.image.size
        self.assertEqual(expected, response['Content-Range'],
            'content range response header should indicate bytes returned (expected %s, got %s)' \
            % (expected, response['Content-Range']))

        # complex ranges not yet supported
        bytes_requested = 'bytes=1-10,30-50'
        rqst.META['HTTP_RANGE'] = bytes_requested
        response = raw_datastream_old(rqst,
                                      self.obj.pid,
                                      'IMAGE',
                                      accept_range_request=True)
        expected, got = 416, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream_old invalid range request %s' \
                % (expected, got, bytes_requested))
Пример #48
0
    def test_raw_datastream_old(self):
        rqst = Mock()
        rqst.method = 'GET'
        # return empty headers for ETag condition check
        rqst.META = {}
        # rqst.META.get.return_value = None

        # DC
        response = raw_datastream_old(rqst, self.obj.pid, 'DC')
        expected, got = 200, response.status_code
        content = force_text(response.content)
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream_old view of DC' \
                % (expected, got))
        expected, got = 'text/xml', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream_old view of DC' \
                % (expected, got))
        self.assertEqual(
            self.obj.dc.checksum, response['ETag'],
            'datastream checksum should be set as ETag header in the response')
        self.assertEqual(self.obj.dc.checksum, response['Content-MD5'])
        self.assert_('<dc:title>%s</dc:title>' %
                     self.obj.dc.content.title in content)

        # RELS-EXT
        response = raw_datastream_old(rqst, self.obj.pid, 'RELS-EXT')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream_old view of RELS-EXT' \
                % (expected, got))
        expected, got = 'application/rdf+xml', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream_old view of RELS-EXT' \
                % (expected, got))

        # TEXT  (non-xml content)
        response = raw_datastream_old(rqst, self.obj.pid, 'TEXT')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream_old view of TEXT' \
                % (expected, got))
        expected, got = 'text/plain', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream_old view of TEXT' \
                % (expected, got))
        # non-xml datastreams should have content-md5 & content-length headers
        self.assertEqual(
            self.obj.text.checksum, response['Content-MD5'],
            'datastream checksum should be set as Content-MD5 header in the response'
        )
        self.assertEqual(len(self.obj.text.content),
                         int(response['Content-Length']))

        # IMAGE (binary content)
        response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream_old view of IMAGE' \
                % (expected, got))
        expected, got = 'image/png', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream_old view of IMAGE' \
                % (expected, got))
        # non-xml datastreams should have content-md5 & content-length headers
        self.assertEqual(
            self.obj.image.checksum, response['Content-MD5'],
            'datastream checksum should be set as Content-MD5 header in the response'
        )
        self.assertTrue(
            response.has_header('Content-Length'),
            'content-length header should be set in the response for binary datastreams'
        )
        self.assert_(isinstance(response, HttpResponse))

        # streaming
        response = raw_datastream_old(rqst,
                                      self.obj.pid,
                                      'IMAGE',
                                      streaming=True)
        self.assert_(isinstance(response, StreamingHttpResponse))

        # non-existent datastream should 404
        self.assertRaises(Http404, raw_datastream_old, rqst, self.obj.pid,
                          'BOGUS-DSID')

        # non-existent record should 404
        self.assertRaises(Http404, raw_datastream_old, rqst, 'bogus-pid:1',
                          'DC')

        # check type handling?

        # set extra headers in the response
        extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'}
        response = raw_datastream_old(rqst,
                                      self.obj.pid,
                                      'TEXT',
                                      headers=extra_headers)
        self.assertTrue(response.has_header('Content-Disposition'))
        self.assertEqual(response['Content-Disposition'],
                         extra_headers['Content-Disposition'])

        # explicitly support GET and HEAD requests only
        rqst.method = 'POST'
        response = raw_datastream_old(rqst, self.obj.pid, 'DC')
        expected, got = 405, response.status_code
        self.assertEqual(expected, got,
            'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \
                % (expected, got))

        # HEAD request is handled internally, for efficiency
        rqst.method = 'HEAD'
        response = raw_datastream_old(rqst, self.obj.pid, 'DC')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for HEAD request on raw_datastream_old view' \
                % (expected, got))
        self.assertEqual(b'', response.content)
Пример #49
0
 def test_upload_string(self):
     data = "Here is some temporary content to upload to fedora."
     upload_id = self.rest_api.upload(data)
     # current format looks like uploaded://####
     pattern = re.compile('uploaded://[0-9]+')
     self.assert_(pattern.match(force_text(upload_id)))
Пример #50
0
    def test_raw_datastream(self):
        # tests for new version of raw_datastream introduced in 1.5,
        # based on old raw_datastream tests

        rqst = Mock()
        rqst.method = 'GET'
        # return empty headers for ETag condition check
        rqst.META = {}

        # DC
        response = raw_datastream(rqst, self.obj.pid, 'DC')
        expected, got = 200, response.status_code
        content = b''.join(c for c in response.streaming_content)
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream view of DC' \
                % (expected, got))
        expected, got = 'text/xml', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream view of DC' \
                % (expected, got))
        self.assertEqual(self.obj.dc.checksum, response['ETag'],
            'datastream checksum should be set as ETag header in the response')
        self.assertEqual(self.obj.dc.checksum, response['Content-MD5'])
        self.assert_('<dc:title>%s</dc:title>' % self.obj.dc.content.title in force_text(content))

        # RELS-EXT
        response = raw_datastream(rqst, self.obj.pid, 'RELS-EXT')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream view of RELS-EXT' \
                % (expected, got))
        expected, got = 'application/rdf+xml', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream view of RELS-EXT' \
                % (expected, got))

        # TEXT  (non-xml content)
        response = raw_datastream(rqst, self.obj.pid, 'TEXT')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream view of TEXT' \
                % (expected, got))
        expected, got = 'text/plain', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream view of TEXT' \
                % (expected, got))
        # non-xml datastreams should have content-md5 & content-length headers
        self.assertEqual(self.obj.text.checksum, response['Content-MD5'],
            'datastream checksum should be set as Content-MD5 header in the response')
        self.assertEqual(len(self.obj.text.content), int(response['Content-Length']))

        # IMAGE (binary content)
        response = raw_datastream(rqst, self.obj.pid, 'IMAGE')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream view of IMAGE' \
                % (expected, got))
        expected, got = 'image/png', response['Content-Type']
        self.assertEqual(expected, got,
            'Expected %s but returned %s for mimetype on raw_datastream view of IMAGE' \
                % (expected, got))
        # non-xml datastreams should have content-md5 & content-length headers
        self.assertEqual(self.obj.image.checksum, response['Content-MD5'],
            'datastream checksum should be set as Content-MD5 header in the response')
        self.assertTrue(response.has_header('Content-Length'),
            'content-length header should be set in the response for binary datastreams')
        self.assert_(isinstance(response, StreamingHttpResponse))

        # non-existent datastream should 404
        self.assertRaises(Http404, raw_datastream, rqst, self.obj.pid, 'BOGUS-DSID')

        # non-existent record should 404
        self.assertRaises(Http404, raw_datastream, rqst, 'bogus-pid:1', 'DC')

        # set extra headers in the response
        extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'}
        response = raw_datastream_old(rqst, self.obj.pid, 'TEXT',
            headers=extra_headers)
        self.assertTrue(response.has_header('Content-Disposition'))
        self.assertEqual(response['Content-Disposition'], extra_headers['Content-Disposition'])

        # explicitly support GET and HEAD requests only
        rqst.method = 'POST'
        response = raw_datastream(rqst, self.obj.pid, 'DC')
        expected, got = 405, response.status_code
        self.assertEqual(expected, got,
            'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \
                % (expected, got))

        # test HEAD request
        rqst.method = 'HEAD'
        response = raw_datastream(rqst, self.obj.pid, 'DC')
        expected, got = 200, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for HEAD request on raw_datastream view' \
                % (expected, got))
        self.assert_(isinstance(response, HttpResponse))
        self.assertEqual(b'', response.content)

        # test that range requests are passed through to fedora

        # use IMAGE for testing since it is binary content
        # set range header in the request; bytes=0- : entire datastream
        rqst.META['HTTP_RANGE'] = 'bytes=0-'
        rqst.method = 'GET'

        response = raw_datastream(rqst, self.obj.pid, 'IMAGE')
        expected, got = 206, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream range request' \
                % (expected, got))
        content = b''.join(c for c in response.streaming_content)
        self.assertEqual(self.obj.image.size, len(content),
            'range request of bytes=0- should return entire content (expected %d, got %d)' \
            % (self.obj.image.size, len(content)))
        self.assertEqual(self.obj.image.size, int(response['Content-Length']),
            'content-length header should be size of entire content (expected %d, got %d)' \
            % (self.obj.image.size, int(response['Content-Length'])))
        expected = 'bytes 0-%d/%d' % (self.obj.image.size - 1, self.obj.image.size)
        self.assertEqual(expected, response['Content-Range'],
            'content range response header should indicate bytes returned (expected %s, got %s)' \
            % (expected, response['Content-Range']))
        del response

        # set range request for partial beginning content; bytes=0-150
        bytes_requested = 'bytes=0-150'
        rqst.META['HTTP_RANGE'] = bytes_requested
        response = raw_datastream(rqst, self.obj.pid, 'IMAGE')
        expected, got = 206, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream range request' \
                % (expected, got))
        content_len = 151
        content = b''.join(c for c in response.streaming_content)
        self.assertEqual(content_len, len(content),
            'range request of %s should return %d bytes, got %d' \
            % (bytes_requested, content_len, len(content)))
        self.assertEqual(content_len, int(response['Content-Length']),
            'content-length header should be set to partial size %d (got %d)' \
            % (content_len, int(response['Content-Length'])))
        expected = 'bytes 0-150/%d' % self.obj.image.size
        self.assertEqual(expected, response['Content-Range'],
            'content range response header should indicate bytes returned (expected %s, got %s)' \
            % (expected, response['Content-Range']))

        # complex ranges not yet supported
        bytes_requested = 'bytes=1-10,30-50'
        rqst.META['HTTP_RANGE'] = bytes_requested
        response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE',
                                  accept_range_request=True)
        expected, got = 416, response.status_code
        self.assertEqual(expected, got,
            'Expected %s but returned %s for raw_datastream_old invalid range request %s' \
                % (expected, got, bytes_requested))