Python Statement примеры использования

Язык программирования: Python

Пространство имен/Пакет: sss

Класс/Тип: Statement

Примеров на hotexamples.com: 11

Python Statement - 11 примеров найдено. Это лучшие примеры Python кода для sss.Statement, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Statement(6)

serialise_rdf(4)

add_state(1)

original_deposit(1)

set_state(1)

Пример #1

Показать файл

    def test_01_init_statement(self):
        n = datetime.now()
        ods = [("http://od1/", n, "http://package/", "sword", "obo"),
               ("http://od2/", n, "http://package/", "bob", None)]
        s = Statement(aggregation_uri="http://aggregation/",
                      rem_uri="http://rem/",
                      original_deposits=ods,
                      aggregates=[
                          "http://od1/", "http://od2/", "http://agg1/",
                          "http://agg2/"
                      ],
                      states=[("http://state/", "everything is groovy")])

        # now check that the item is correctly initialised
        assert s.aggregation_uri == "http://aggregation/"
        assert s.rem_uri == "http://rem/"
        assert len(s.original_deposits) == 2
        assert "http://od1/" in s.original_deposits[0]
        assert "http://od2/" in s.original_deposits[1]
        assert "http://od1/" in s.aggregates
        assert "http://od2/" in s.aggregates
        assert "http://agg1/" in s.aggregates
        assert "http://agg2/" in s.aggregates
        assert len(s.aggregates) == 4
        assert len(s.states) == 1

        state_uri, state_description = s.states[0]
        assert state_uri == "http://state/"
        assert state_description == "everything is groovy"

Пример #2

Показать файл

Файл: test_statement.py Проект: HackODLRC/Simple-Swift-Sword-Server

 def test_02_modify_statement(self):
     n = datetime.now()
     ods = [
         ("http://od1/", n, "http://package/", "sword", "obo"),
         ("http://od2/", n, "http://package/", "bob", None)
     ]
     s = Statement(aggregation_uri="http://aggregation/", rem_uri="http://rem/",
                     original_deposits=ods,
                     aggregates=["http://od1/", "http://od2/", "http://agg1/", "http://agg2/"],
                     states=[("http://state/", "everything is groovy")])
     
     s.set_state("http://new/state/", "still good, though")
     
     assert len(s.states) == 1
     state_uri, state_description = s.states[0]
     assert state_uri == "http://new/state/"
     assert state_description == "still good, though"
     
     s.add_state("http://another/state", "also, this")
     assert len(s.states) == 2

Пример #3

Показать файл

    def test_02_modify_statement(self):
        n = datetime.now()
        ods = [("http://od1/", n, "http://package/", "sword", "obo"),
               ("http://od2/", n, "http://package/", "bob", None)]
        s = Statement(aggregation_uri="http://aggregation/",
                      rem_uri="http://rem/",
                      original_deposits=ods,
                      aggregates=[
                          "http://od1/", "http://od2/", "http://agg1/",
                          "http://agg2/"
                      ],
                      states=[("http://state/", "everything is groovy")])

        s.set_state("http://new/state/", "still good, though")

        assert len(s.states) == 1
        state_uri, state_description = s.states[0]
        assert state_uri == "http://new/state/"
        assert state_description == "still good, though"

        s.add_state("http://another/state", "also, this")
        assert len(s.states) == 2

Пример #4

Показать файл

Файл: test_statement.py Проект: HackODLRC/Simple-Swift-Sword-Server

 def test_03_rdf_serialise(self):
     n = datetime.now()
     ods = [
         ("http://od1/", n, "http://package/", "sword", "obo"),
         ("http://od2/", n, "http://package/", "bob", None)
     ]
     od_uris = ["http://od1/", "http://od2/"]
     s = Statement(aggregation_uri="http://aggregation/", rem_uri="http://rem/",
                     original_deposits=ods,
                     aggregates=["http://od1/", "http://od2/", "http://agg1/", "http://agg2/"],
                     states=[("http://state/", "everything is groovy")])
                     
     rdf_string = s.serialise_rdf()
     
     # first try the round trip
     rdf = etree.fromstring(rdf_string)
     
     # here are some counters/switches which will help us test that everything
     # is good within the statement
     descriptions = 0
     states = 0
     state_descriptions = 0
     original_deposits = 0
     aggregated_resources = 0
     packaging = 0
     dep_on = 0
     dep_by = 0
     dep_obo = 0
     
     has_rem_description = False
     has_agg_description = False
     
     # now go through the rdf and check that everything is as expected
     for desc in rdf.findall(RDF + "Description"):
         descriptions += 1
         about = desc.get(RDF + "about")
         for element in desc.getchildren():
             if element.tag == ORE + "describes":
                 resource = element.get(RDF + "resource")
                 assert about == s.rem_uri
                 assert resource == s.aggregation_uri
                 has_rem_description = True
             if element.tag == ORE + "isDescribedBy":
                 resource = element.get(RDF + "resource")
                 assert about == s.aggregation_uri
                 assert resource == s.rem_uri
                 has_agg_description = True
             if element.tag == ORE + "aggregates":
                 resource = element.get(RDF + "resource")
                 assert resource in s.aggregates or resource in od_uris
                 aggregated_resources += 1
             if element.tag == SWORD + "originalDeposit":
                 resource = element.get(RDF + "resource")
                 assert resource in od_uris
                 original_deposits += 1
             if element.tag == SWORD + "state":
                 resource = element.get(RDF + "resource")
                 assert resource == "http://state/"
                 states += 1
             if element.tag == SWORD + "stateDescription":
                 assert element.text.strip() == "everything is groovy"
                 assert about == "http://state/"
                 state_descriptions += 1
             if element.tag == SWORD + "packaging":
                 resource = element.get(RDF + "resource")
                 assert resource == "http://package/"
                 assert about in od_uris
                 packaging += 1
             if element.tag == SWORD + "depositedOn":
                 assert about in od_uris
                 dep_on += 1
             if element.tag == SWORD + "depositedBy":
                 assert element.text in ["sword", "bob"]
                 assert about in od_uris
                 dep_by += 1
             if element.tag == SWORD + "depositedOnBehalfOf":
                 assert element.text == "obo"
                 assert about in od_uris
                 dep_obo += 1
     
     # now check that our counters/switches were flipped appropriately
     assert descriptions == 5
     assert states == 1
     assert state_descriptions == 1
     assert original_deposits == 2
     assert aggregated_resources == 4
     assert packaging == 2
     assert dep_on == 2
     assert dep_by == 2
     assert dep_obo == 1
     assert has_rem_description
     assert has_agg_description

Пример #5

Показать файл

Файл: test_statement.py Проект: HackODLRC/Simple-Swift-Sword-Server

 def test_04_rdf_aggregation_uri_exists(self):
     n = datetime.now()
     ods = [
         ("http://od1/", n, "http://package/", "sword", "obo"),
         ("http://192.168.23.133/asdfasd/datasets/mydataset6/example.zip", n, "http://package/", "bob", None)
     ]
     od_uris = ["http://od1/", "http://192.168.23.133/asdfasd/datasets/mydataset6/example.zip"]
     s = Statement(aggregation_uri="http://192.168.23.133/asdfasd/datasets/mydataset6", rem_uri="http://rem/",
                     original_deposits=ods,
                     aggregates=["http://od1/", "http://192.168.23.133/asdfasd/datasets/mydataset6/example.zip", "http://agg1/", "http://agg2/"],
                     states=[("http://state/", "everything is groovy")])
                     
     rdf_string = s.serialise_rdf(RDF_DOC)
     
     # first try the round trip
     rdf = etree.fromstring(rdf_string)
     
     # here are some counters/switches which will help us test that everything
     # is good within the statement
     descriptions = 0
     states = 0
     state_descriptions = 0
     original_deposits = 0
     aggregated_resources = 0
     packaging = 0
     dep_on = 0
     dep_by = 0
     dep_obo = 0
     
     has_rem_description = False
     has_agg_description = False
     ox_tag = False
     dc_tag = False
     rdf_tag = False
     
     # now go through the rdf and check that everything is as expected
     for desc in rdf.findall(RDF + "Description"):
         descriptions += 1
         about = desc.get(RDF + "about")
         for element in desc.getchildren():
             # we expect all of the same things to be true as in the previous
             # test
             if element.tag == ORE + "describes":
                 resource = element.get(RDF + "resource")
                 assert about == s.rem_uri
                 assert resource == s.aggregation_uri
                 has_rem_description = True
             if element.tag == ORE + "isDescribedBy":
                 resource = element.get(RDF + "resource")
                 assert about == s.aggregation_uri
                 assert resource == s.rem_uri
                 has_agg_description = True
             if element.tag == ORE + "aggregates":
                 resource = element.get(RDF + "resource")
                 assert resource in s.aggregates or resource in od_uris
                 aggregated_resources += 1
             if element.tag == SWORD + "originalDeposit":
                 resource = element.get(RDF + "resource")
                 assert resource in od_uris
                 original_deposits += 1
             if element.tag == SWORD + "state":
                 resource = element.get(RDF + "resource")
                 assert resource == "http://state/"
                 states += 1
             if element.tag == SWORD + "stateDescription":
                 assert element.text.strip() == "everything is groovy"
                 assert about == "http://state/"
                 state_descriptions += 1
             if element.tag == SWORD + "packaging":
                 resource = element.get(RDF + "resource")
                 assert resource == "http://package/"
                 assert about in od_uris
                 packaging += 1
             if element.tag == SWORD + "depositedOn":
                 assert about in od_uris
                 dep_on += 1
             if element.tag == SWORD + "depositedBy":
                 assert element.text in ["sword", "bob"]
                 assert about in od_uris
                 dep_by += 1
             if element.tag == SWORD + "depositedOnBehalfOf":
                 assert element.text == "obo"
                 assert about in od_uris
                 dep_obo += 1
                 
             # and we must verify that we didn't overwrite anything in the
             # passed in RDF document (don't check everything, but let's pick
             # one thing from each namespace)
             if element.tag == OX + "currentVersion":
                 assert element.text == "6"
                 ox_tag = True
             if element.tag == DC + "identifier":
                 assert element.text == "mydataset6"
                 dc_tag = True
             if element.tag == RDF + "type":
                 resource = element.get(RDF + "resource")
                 assert resource == "http://vocab.ox.ac.uk/dataset/schema#DataSet"
                 rdf_tag = True
     
     # now check that our counters/switches were flipped appropriately
     assert descriptions == 5
     assert states == 1
     assert state_descriptions == 1
     assert original_deposits == 2
     assert aggregated_resources == 4
     assert packaging == 2
     assert dep_on == 2
     assert dep_by == 2
     assert dep_obo == 1
     assert has_rem_description
     assert has_agg_description
     
     assert ox_tag
     assert dc_tag
     assert rdf_tag

Пример #6

Показать файл

Файл: sword_server.py Проект: anusharanganathan/django-databank

    def replace(self, path, deposit):
        """
        Replace all the content represented by the supplied id with the supplied deposit
        Args:
        - oid:  the object ID in the store
        - deposit:  a DepositRequest object
        Return a DepositResponse containing the Deposit Receipt or a SWORD Error
        """
        silo, dataset_id, accept_parameters = self.um.interpret_path(path)
        rdf_silo = self._get_authorised_rdf_silo(silo)
            
        # now get the dataset object itself
        dataset = rdf_silo.get_item(dataset_id)
        
        # deal with possible problems with the filename
        if deposit.filename is None or deposit.filename == "":
            raise SwordError(error_uri=Errors.bad_request, msg="You must supply a filename to unpack")
        if JAILBREAK.search(deposit.filename) != None:
            raise SwordError(error_uri=Errors.bad_request, msg="'..' cannot be used in the path or as a filename")
        
        # FIXME: at the moment this metadata operation is not supported by DataBank
        #
        # first figure out what to do about the metadata
        keep_atom = False
        metadata_state = None # This will be used to store any state information associated
                                # with a metadata update.  It gets tied up with the content state
                                # and any pre-existing states further down
        #if deposit.atom is not None:
        #    ssslog.info("Replace request has ATOM part - updating")
        #    entry_ingester = self.configuration.get_entry_ingester()(self.dao)
        #    entry_ingester.ingest(collection, id, deposit.atom)
        #    keep_atom = True
        
        content_state = None
        deposit_uri = None
        derived_resource_uris = []
        if deposit.content is not None:
            ssslog.info("Replace request has file content - updating")
            
            # remove all the old files before adding the new.  We always leave
            # behind the metadata; this will be overwritten later if necessary
            #self.dao.remove_content(collection, id, True, keep_atom)
            #Increment the version, but do not clone the previous version.
            # An update will replace the entire contents of the container (if previously unpacked) with the bagit file
            dataset.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])

            # store the content file
            dataset.put_stream(deposit.filename, deposit.content)
            ssslog.debug("New incoming file stored with filename " + deposit.filename)
            
            # FIXME: unpacking doesn't happen here ... (keeping for the time being for reference)
            # Broadcast to unpack and add sword:state in manifest
            # <sword:state rdf:resource="http://purl.org/net/sword/state/queuedForUnpacking"/>
            
            # now that we have stored the atom and the content, we can invoke a package ingester over the top to extract
            # all the metadata and any files we want.  Notice that we pass in the metadata_relevant flag, so the
            # packager won't overwrite the existing metadata if it isn't supposed to
            #packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao)
            #derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant)
            #ssslog.debug("Resources derived from deposit: " + str(derived_resources))
        
            # a list of identifiers which will resolve to the derived resources
            #derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources)

            # An identifier which will resolve to the package just deposited
            deposit_uri = self.um.file_uri(silo, dataset_id, deposit.filename)
            ssslog.debug("Incoming file has been stored at URI " + deposit_uri)
            
            # register a new content state to be used
            content_state = DataBankStates.zip_file_added

        # Taken from dataset.py, seems to be the done thing when adding an item.
        # NOTE: confirmed with Anusha that this is correct
        dataset.del_triple(dataset.uri, u"dcterms:modified")
        dataset.add_triple(dataset.uri, u"dcterms:modified", datetime.now())
        dataset.del_triple(dataset.uri, u"oxds:currentVersion")
        dataset.add_triple(dataset.uri, u"oxds:currentVersion", dataset.currentversion)

        # before we do any state management, we have to be sure that the sword namespace
        # is registered
        dataset.get_rdf_manifest().add_namespace("sword", "http://purl.org/net/sword/terms/")
        dataset.sync()
        
        # sort out the new list of states for the item
        current_states = self._extract_states(dataset)
        new_states = []
        
        # for each existing state, consider whether to carry it over
        ssslog.info("new content state: " + str(content_state))
        for state_uri, state_desc in current_states:
            keep = True
            if metadata_state is not None and state_uri in DataBankStates.metadata_states:
                # we do not want the state if it is a metadata state and we have been given
                # a new metadata state
                keep = False
            if content_state is not None and state_uri in DataBankStates.content_states:
                    ssslog.debug("Removing state: " + state_uri)
                    # we do not want the state if it is a content state and we have been given
                    # a new content state
                    keep = False            
            if keep:
                ssslog.debug("carrying over state: " + state_uri)
                new_states.append((state_uri, state_desc))
        
        # add the new metadata and content states provided from above
        if metadata_state is not None:
            new_states.append(metadata_state)
        if content_state is not None:
            ssslog.debug("adding new content state: " + str(content_state))
            new_states.append(content_state)
            
        ssslog.debug("New Dataset States: " + str(new_states))
        
        # FIXME: how safe is this?  What other ore:aggregates might there be?
        # we need to back out some of the triples in preparation to update the
        # statement
        # NOTE AR: I have commented the following lines. 
        #       For aggregates this is not needed. put_stream will add the aggregate into the URI. 
        #       Why delete other triples in the manifest - ??
        # sword:originalDeposit point to isVersionOf
        
        aggregates = dataset.list_rdf_objects(dataset.uri, u"ore:aggregates")
        original_deposits = dataset.list_rdf_objects(dataset.uri, u"sword:originalDeposit")
        states = dataset.list_rdf_objects(dataset.uri, u"sword:state")
        
        for a in aggregates:
            dataset.del_triple(a, "*")
        for od in original_deposits:
            dataset.del_triple(od, "*")
        for s in states:
            dataset.del_triple(s, "*")
        dataset.del_triple(dataset.uri, u"ore:aggregates")
        dataset.del_triple(dataset.uri, u"sword:originalDeposit")
        dataset.del_triple(dataset.uri, u"sword:state")

        # FIXME: also unsafe in the same way as above
        # Write the md5 checksum into the manifest
        # A deposit contains just the new stuff so no harm in deleting all triples 
        dataset.del_triple("*", u"oxds:hasMD5")
        #dataset.del_triple(deposit_uri, u"oxds:hasMD5")
        if deposit.content_md5 is not None:
            dataset.add_triple(deposit_uri, u"oxds:hasMD5", deposit.content_md5)
        
        dataset.sync()

        # the aggregation uri
        agg_uri = self.um.agg_uri(silo, dataset_id)

        # the Edit-URI
        edit_uri = self.um.edit_uri(silo, dataset_id)

        # FIXME: here we also need to keep existing states where relevant.
        #   A state will continue to be relevant if it applies to an area of the
        #   item (i.e. the container or the media resource) for which this operation
        #   has no effect.
        #   for example:
        #   this is a metadata replace, but a status on the item is set to say that
        #   the item's zip file is corrupt and needs replacing.  The new status 
        #   should leave this alone (and probably not do anything, tbh), no matter
        #   what else it does
        # create the statement outline
        # FIXME: there is something weird going on with instantiating this object without the original_deposits argument
        # apparently if I don't explicitly say there are no original deposits, then it "remembers" original deposits 
        # from previous uses of the object
        s = Statement(aggregation_uri=agg_uri, rem_uri=edit_uri, states=new_states, original_deposits=[])
         
        # set the original deposit (which sorts out the aggregations for us too)
        by = deposit.auth.username if deposit.auth is not None else None
        obo = deposit.auth.on_behalf_of if deposit.auth is not None else None
        if deposit_uri is not None:
            s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
        
        # create the new manifest and store it
        manifest = dataset.get_rdf_manifest()
        f = open(manifest.filepath, "r")
        rdf_string = f.read()
        
        new_manifest = s.serialise_rdf(rdf_string)
        dataset.put_stream("manifest.rdf", new_manifest)
        
        # FIXME: add in proper treatment here
        # now generate a receipt. 
        # TODO: Include audit log instead of 'added zip to dataset'
        receipt = self.deposit_receipt(silo, dataset_id, dataset, "added zip to dataset")
        
        # now augment the receipt with the details of this particular deposit
        # this handles None arguments, and converts the xml receipt into a string
        receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris)

        # finally, assemble the deposit response and return
        dr = DepositResponse()
        dr.receipt = receipt.serialise()
        dr.location = receipt.edit_uri
        return dr

Пример #7

Показать файл

Файл: sword_server.py Проект: anusharanganathan/django-databank

    def deposit_new(self, silo, deposit):
        """
        Take the supplied deposit and treat it as a new container with content to be created in the specified collection
        Args:
        -collection:    the ID of the collection to be deposited into
        -deposit:       the DepositRequest object to be processed
        Returns a DepositResponse object which will contain the Deposit Receipt or a SWORD Error
        """
        # check against the authorised list of silos
        rdf_silo = self._get_authorised_rdf_silo(silo)

        # ensure that we have a slug
        if deposit.slug is None:
            deposit.slug = str(uuid.uuid4())
            
        # weed out unacceptable deposits
        if rdf_silo.exists(deposit.slug):
            raise SwordError(error_uri=DataBankErrors.dataset_conflict, msg="A Dataset with the name " + deposit.slug + " already exists")
        if not allowable_id2(deposit.slug):
            raise SwordError(error_uri=Errors.bad_request, msg="Dataset name can contain only the following characters - " + 
                                                                ag.naming_rule_humanized + " and has to be more than 1 character")
        
        # NOTE: we pass in an empty dictionary of metadata on create, and then run
        # _ingest_metadata to augment the item from the deposit
        item = create_new(rdf_silo, deposit.slug, self.auth_credentials.username, {})
        add_dataset(silo, deposit.slug)
        self._ingest_metadata(item, deposit)
        
        # NOTE: left in for reference for the time being, but deposit_new 
        # only support entry only deposits in databank.  This will need to be
        # re-introduced for full sword support
        # store the content file if one exists, and do some processing on it
        #deposit_uri = None
        #derived_resource_uris = []
        #if deposit.content is not None:
        
       #     if deposit.filename is None:
       #         deposit.filename = "unnamed.file"
       #     fn = self.dao.store_content(collection, id, deposit.content, deposit.filename)

            # now that we have stored the atom and the content, we can invoke a package ingester over the top to extract
            # all the metadata and any files we want
            
            # FIXME: because the deposit interpreter doesn't deal with multipart properly
            # we don't get the correct packaging format here if the package is anything
            # other than Binary
       #     ssslog.info("attempting to load ingest packager for format " + str(deposit.packaging))
       #     packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao)
       #     derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant)

            # An identifier which will resolve to the package just deposited
       #     deposit_uri = self.um.part_uri(collection, id, fn)
            
            # a list of identifiers which will resolve to the derived resources
       #     derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources)

        # the aggregation uri
        agg_uri = self.um.agg_uri(silo, deposit.slug)

        # the Edit-URI
        edit_uri = self.um.edit_uri(silo, deposit.slug)

        # create the initial statement
        s = Statement(aggregation_uri=agg_uri, rem_uri=edit_uri, states=[DataBankStates.initial_state])
        
        # FIXME: need to sort out authentication before we can do this ...
        # FIXME: also, it's not relevant unless we take a binary-only deposit, which
        # we currently don't
        # User already authorized to deposit in this silo (_get_authorised_rdf_silo). 
        # This is to augment metadata with details like who created, on behalf of, when
        #
        #by = deposit.auth.username if deposit.auth is not None else None
        #obo = deposit.auth.on_behalf_of if deposit.auth is not None else None
        #if deposit_uri is not None:
        #    s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
        #s.aggregates = derived_resource_uris

        # In creating the statement we use the existing manifest.rdf file in the
        # item:
        manifest = item.get_rdf_manifest()
        f = open(manifest.filepath, "r")
        rdf_string = f.read()

        # create the new manifest and store it
        #Serialize rdf adds the sword statement - state, depositedOn, by, onBehalfOf, stateDesc
        new_manifest = s.serialise_rdf(rdf_string)
        item.put_stream("manifest.rdf", new_manifest)

        # FIXME: here is where we have to put the correct treatment in
        # now generate a receipt for the deposit
        # TODO: Add audit log from item.manifest in place of  "created new item"
        receipt = self.deposit_receipt(silo, deposit.slug, item, "created new item")

        # FIXME: while we don't have full text deposit, we don't need to augment
        # the deposit receipt
        
        # now augment the receipt with the details of this particular deposit
        # this handles None arguments, and converts the xml receipt into a string
        # receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris)
        
        # finally, assemble the deposit response and return
        dr = DepositResponse()
        dr.receipt = receipt.serialise()
        dr.location = receipt.edit_uri
        
        # Broadcast change as message
        ag.b.creation(silo, deposit.slug, ident=self.auth_credentials.username)
        
        return dr

Пример #8

Показать файл

    def replace(self, path, deposit):
        """
        Replace all the content represented by the supplied id with the supplied deposit
        Args:
        - oid:  the object ID in the store
        - deposit:  a DepositRequest object
        Return a DepositResponse containing the Deposit Receipt or a SWORD Error
        """
        silo, dataset_id, accept_parameters = self.um.interpret_path(path)
        rdf_silo = self._get_authorised_rdf_silo(silo)

        # now get the dataset object itself
        dataset = rdf_silo.get_item(dataset_id)

        # deal with possible problems with the filename
        if deposit.filename is None or deposit.filename == "":
            raise SwordError(error_uri=Errors.bad_request,
                             msg="You must supply a filename to unpack")
        if JAILBREAK.search(deposit.filename) != None:
            raise SwordError(
                error_uri=Errors.bad_request,
                msg="'..' cannot be used in the path or as a filename")

        # FIXME: at the moment this metadata operation is not supported by DataBank
        #
        # first figure out what to do about the metadata
        keep_atom = False
        metadata_state = None  # This will be used to store any state information associated
        # with a metadata update.  It gets tied up with the content state
        # and any pre-existing states further down
        #if deposit.atom is not None:
        #    ssslog.info("Replace request has ATOM part - updating")
        #    entry_ingester = self.configuration.get_entry_ingester()(self.dao)
        #    entry_ingester.ingest(collection, id, deposit.atom)
        #    keep_atom = True

        content_state = None
        deposit_uri = None
        derived_resource_uris = []
        if deposit.content is not None:
            ssslog.info("Replace request has file content - updating")

            # remove all the old files before adding the new.  We always leave
            # behind the metadata; this will be overwritten later if necessary
            #self.dao.remove_content(collection, id, True, keep_atom)
            #Increment the version, but do not clone the previous version.
            # An update will replace the entire contents of the container (if previously unpacked) with the bagit file
            dataset.increment_version_delta(clone_previous_version=True,
                                            copy_filenames=['manifest.rdf'])

            # store the content file
            dataset.put_stream(deposit.filename, deposit.content)
            ssslog.debug("New incoming file stored with filename " +
                         deposit.filename)

            # FIXME: unpacking doesn't happen here ... (keeping for the time being for reference)
            # Broadcast to unpack and add sword:state in manifest
            # <sword:state rdf:resource="http://purl.org/net/sword/state/queuedForUnpacking"/>

            # now that we have stored the atom and the content, we can invoke a package ingester over the top to extract
            # all the metadata and any files we want.  Notice that we pass in the metadata_relevant flag, so the
            # packager won't overwrite the existing metadata if it isn't supposed to
            #packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao)
            #derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant)
            #ssslog.debug("Resources derived from deposit: " + str(derived_resources))

            # a list of identifiers which will resolve to the derived resources
            #derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources)

            # An identifier which will resolve to the package just deposited
            deposit_uri = self.um.file_uri(silo, dataset_id, deposit.filename)
            ssslog.debug("Incoming file has been stored at URI " + deposit_uri)

            # register a new content state to be used
            content_state = DataBankStates.zip_file_added

        # Taken from dataset.py, seems to be the done thing when adding an item.
        # NOTE: confirmed with Anusha that this is correct
        dataset.del_triple(dataset.uri, u"dcterms:modified")
        dataset.add_triple(dataset.uri, u"dcterms:modified", datetime.now())
        dataset.del_triple(dataset.uri, u"oxds:currentVersion")
        dataset.add_triple(dataset.uri, u"oxds:currentVersion",
                           dataset.currentversion)

        # before we do any state management, we have to be sure that the sword namespace
        # is registered
        dataset.get_rdf_manifest().add_namespace(
            "sword", "http://purl.org/net/sword/terms/")
        dataset.sync()

        # sort out the new list of states for the item
        current_states = self._extract_states(dataset)
        new_states = []

        # for each existing state, consider whether to carry it over
        ssslog.info("new content state: " + str(content_state))
        for state_uri, state_desc in current_states:
            keep = True
            if metadata_state is not None and state_uri in DataBankStates.metadata_states:
                # we do not want the state if it is a metadata state and we have been given
                # a new metadata state
                keep = False
            if content_state is not None and state_uri in DataBankStates.content_states:
                ssslog.debug("Removing state: " + state_uri)
                # we do not want the state if it is a content state and we have been given
                # a new content state
                keep = False
            if keep:
                ssslog.debug("carrying over state: " + state_uri)
                new_states.append((state_uri, state_desc))

        # add the new metadata and content states provided from above
        if metadata_state is not None:
            new_states.append(metadata_state)
        if content_state is not None:
            ssslog.debug("adding new content state: " + str(content_state))
            new_states.append(content_state)

        ssslog.debug("New Dataset States: " + str(new_states))

        # FIXME: how safe is this?  What other ore:aggregates might there be?
        # we need to back out some of the triples in preparation to update the
        # statement
        # NOTE AR: I have commented the following lines.
        #       For aggregates this is not needed. put_stream will add the aggregate into the URI.
        #       Why delete other triples in the manifest - ??
        # sword:originalDeposit point to isVersionOf

        aggregates = dataset.list_rdf_objects(dataset.uri, u"ore:aggregates")
        original_deposits = dataset.list_rdf_objects(dataset.uri,
                                                     u"sword:originalDeposit")
        states = dataset.list_rdf_objects(dataset.uri, u"sword:state")

        for a in aggregates:
            dataset.del_triple(a, "*")
        for od in original_deposits:
            dataset.del_triple(od, "*")
        for s in states:
            dataset.del_triple(s, "*")
        dataset.del_triple(dataset.uri, u"ore:aggregates")
        dataset.del_triple(dataset.uri, u"sword:originalDeposit")
        dataset.del_triple(dataset.uri, u"sword:state")

        # FIXME: also unsafe in the same way as above
        # Write the md5 checksum into the manifest
        # A deposit contains just the new stuff so no harm in deleting all triples
        dataset.del_triple("*", u"oxds:hasMD5")
        #dataset.del_triple(deposit_uri, u"oxds:hasMD5")
        if deposit.content_md5 is not None:
            dataset.add_triple(deposit_uri, u"oxds:hasMD5",
                               deposit.content_md5)

        dataset.sync()

        # the aggregation uri
        agg_uri = self.um.agg_uri(silo, dataset_id)

        # the Edit-URI
        edit_uri = self.um.edit_uri(silo, dataset_id)

        # FIXME: here we also need to keep existing states where relevant.
        #   A state will continue to be relevant if it applies to an area of the
        #   item (i.e. the container or the media resource) for which this operation
        #   has no effect.
        #   for example:
        #   this is a metadata replace, but a status on the item is set to say that
        #   the item's zip file is corrupt and needs replacing.  The new status
        #   should leave this alone (and probably not do anything, tbh), no matter
        #   what else it does
        # create the statement outline
        # FIXME: there is something weird going on with instantiating this object without the original_deposits argument
        # apparently if I don't explicitly say there are no original deposits, then it "remembers" original deposits
        # from previous uses of the object
        s = Statement(aggregation_uri=agg_uri,
                      rem_uri=edit_uri,
                      states=new_states,
                      original_deposits=[])

        # set the original deposit (which sorts out the aggregations for us too)
        by = deposit.auth.username if deposit.auth is not None else None
        obo = deposit.auth.on_behalf_of if deposit.auth is not None else None
        if deposit_uri is not None:
            s.original_deposit(deposit_uri, datetime.now(), deposit.packaging,
                               by, obo)

        # create the new manifest and store it
        manifest = dataset.get_rdf_manifest()
        f = open(manifest.filepath, "r")
        rdf_string = f.read()

        new_manifest = s.serialise_rdf(rdf_string)
        dataset.put_stream("manifest.rdf", new_manifest)

        # FIXME: add in proper treatment here
        # now generate a receipt.
        # TODO: Include audit log instead of 'added zip to dataset'
        receipt = self.deposit_receipt(silo, dataset_id, dataset,
                                       "added zip to dataset")

        # now augment the receipt with the details of this particular deposit
        # this handles None arguments, and converts the xml receipt into a string
        receipt = self.augmented_receipt(receipt, deposit_uri,
                                         derived_resource_uris)

        # finally, assemble the deposit response and return
        dr = DepositResponse()
        dr.receipt = receipt.serialise()
        dr.location = receipt.edit_uri
        return dr

Пример #9

Показать файл

    def deposit_new(self, silo, deposit):
        """
        Take the supplied deposit and treat it as a new container with content to be created in the specified collection
        Args:
        -collection:    the ID of the collection to be deposited into
        -deposit:       the DepositRequest object to be processed
        Returns a DepositResponse object which will contain the Deposit Receipt or a SWORD Error
        """
        # check against the authorised list of silos
        rdf_silo = self._get_authorised_rdf_silo(silo)

        # ensure that we have a slug
        if deposit.slug is None:
            deposit.slug = str(uuid.uuid4())

        # weed out unacceptable deposits
        if rdf_silo.exists(deposit.slug):
            raise SwordError(error_uri=DataBankErrors.dataset_conflict,
                             msg="A Dataset with the name " + deposit.slug +
                             " already exists")
        if not allowable_id2(deposit.slug):
            raise SwordError(
                error_uri=Errors.bad_request,
                msg="Dataset name can contain only the following characters - "
                + ag.naming_rule_humanized +
                " and has to be more than 1 character")

        # NOTE: we pass in an empty dictionary of metadata on create, and then run
        # _ingest_metadata to augment the item from the deposit
        item = create_new(rdf_silo, deposit.slug,
                          self.auth_credentials.username, {})
        add_dataset(silo, deposit.slug)
        self._ingest_metadata(item, deposit)

        # NOTE: left in for reference for the time being, but deposit_new
        # only support entry only deposits in databank.  This will need to be
        # re-introduced for full sword support
        # store the content file if one exists, and do some processing on it
        #deposit_uri = None
        #derived_resource_uris = []
        #if deposit.content is not None:

        #     if deposit.filename is None:
        #         deposit.filename = "unnamed.file"
        #     fn = self.dao.store_content(collection, id, deposit.content, deposit.filename)

        # now that we have stored the atom and the content, we can invoke a package ingester over the top to extract
        # all the metadata and any files we want

        # FIXME: because the deposit interpreter doesn't deal with multipart properly
        # we don't get the correct packaging format here if the package is anything
        # other than Binary
        #     ssslog.info("attempting to load ingest packager for format " + str(deposit.packaging))
        #     packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao)
        #     derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant)

        # An identifier which will resolve to the package just deposited
        #     deposit_uri = self.um.part_uri(collection, id, fn)

        # a list of identifiers which will resolve to the derived resources
        #     derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources)

        # the aggregation uri
        agg_uri = self.um.agg_uri(silo, deposit.slug)

        # the Edit-URI
        edit_uri = self.um.edit_uri(silo, deposit.slug)

        # create the initial statement
        s = Statement(aggregation_uri=agg_uri,
                      rem_uri=edit_uri,
                      states=[DataBankStates.initial_state])

        # FIXME: need to sort out authentication before we can do this ...
        # FIXME: also, it's not relevant unless we take a binary-only deposit, which
        # we currently don't
        # User already authorized to deposit in this silo (_get_authorised_rdf_silo).
        # This is to augment metadata with details like who created, on behalf of, when
        #
        #by = deposit.auth.username if deposit.auth is not None else None
        #obo = deposit.auth.on_behalf_of if deposit.auth is not None else None
        #if deposit_uri is not None:
        #    s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
        #s.aggregates = derived_resource_uris

        # In creating the statement we use the existing manifest.rdf file in the
        # item:
        manifest = item.get_rdf_manifest()
        f = open(manifest.filepath, "r")
        rdf_string = f.read()

        # create the new manifest and store it
        #Serialize rdf adds the sword statement - state, depositedOn, by, onBehalfOf, stateDesc
        new_manifest = s.serialise_rdf(rdf_string)
        item.put_stream("manifest.rdf", new_manifest)

        # FIXME: here is where we have to put the correct treatment in
        # now generate a receipt for the deposit
        # TODO: Add audit log from item.manifest in place of  "created new item"
        receipt = self.deposit_receipt(silo, deposit.slug, item,
                                       "created new item")

        # FIXME: while we don't have full text deposit, we don't need to augment
        # the deposit receipt

        # now augment the receipt with the details of this particular deposit
        # this handles None arguments, and converts the xml receipt into a string
        # receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris)

        # finally, assemble the deposit response and return
        dr = DepositResponse()
        dr.receipt = receipt.serialise()
        dr.location = receipt.edit_uri

        # Broadcast change as message
        ag.b.creation(silo, deposit.slug, ident=self.auth_credentials.username)

        return dr

Пример #10

Показать файл

    def test_03_rdf_serialise(self):
        n = datetime.now()
        ods = [("http://od1/", n, "http://package/", "sword", "obo"),
               ("http://od2/", n, "http://package/", "bob", None)]
        od_uris = ["http://od1/", "http://od2/"]
        s = Statement(aggregation_uri="http://aggregation/",
                      rem_uri="http://rem/",
                      original_deposits=ods,
                      aggregates=[
                          "http://od1/", "http://od2/", "http://agg1/",
                          "http://agg2/"
                      ],
                      states=[("http://state/", "everything is groovy")])

        rdf_string = s.serialise_rdf()

        # first try the round trip
        rdf = etree.fromstring(rdf_string)

        # here are some counters/switches which will help us test that everything
        # is good within the statement
        descriptions = 0
        states = 0
        state_descriptions = 0
        original_deposits = 0
        aggregated_resources = 0
        packaging = 0
        dep_on = 0
        dep_by = 0
        dep_obo = 0

        has_rem_description = False
        has_agg_description = False

        # now go through the rdf and check that everything is as expected
        for desc in rdf.findall(RDF + "Description"):
            descriptions += 1
            about = desc.get(RDF + "about")
            for element in desc.getchildren():
                if element.tag == ORE + "describes":
                    resource = element.get(RDF + "resource")
                    assert about == s.rem_uri
                    assert resource == s.aggregation_uri
                    has_rem_description = True
                if element.tag == ORE + "isDescribedBy":
                    resource = element.get(RDF + "resource")
                    assert about == s.aggregation_uri
                    assert resource == s.rem_uri
                    has_agg_description = True
                if element.tag == ORE + "aggregates":
                    resource = element.get(RDF + "resource")
                    assert resource in s.aggregates or resource in od_uris
                    aggregated_resources += 1
                if element.tag == SWORD + "originalDeposit":
                    resource = element.get(RDF + "resource")
                    assert resource in od_uris
                    original_deposits += 1
                if element.tag == SWORD + "state":
                    resource = element.get(RDF + "resource")
                    assert resource == "http://state/"
                    states += 1
                if element.tag == SWORD + "stateDescription":
                    assert element.text.strip() == "everything is groovy"
                    assert about == "http://state/"
                    state_descriptions += 1
                if element.tag == SWORD + "packaging":
                    resource = element.get(RDF + "resource")
                    assert resource == "http://package/"
                    assert about in od_uris
                    packaging += 1
                if element.tag == SWORD + "depositedOn":
                    assert about in od_uris
                    dep_on += 1
                if element.tag == SWORD + "depositedBy":
                    assert element.text in ["sword", "bob"]
                    assert about in od_uris
                    dep_by += 1
                if element.tag == SWORD + "depositedOnBehalfOf":
                    assert element.text == "obo"
                    assert about in od_uris
                    dep_obo += 1

        # now check that our counters/switches were flipped appropriately
        assert descriptions == 5
        assert states == 1
        assert state_descriptions == 1
        assert original_deposits == 2
        assert aggregated_resources == 4
        assert packaging == 2
        assert dep_on == 2
        assert dep_by == 2
        assert dep_obo == 1
        assert has_rem_description
        assert has_agg_description

Пример #11

Показать файл

    def test_04_rdf_aggregation_uri_exists(self):
        n = datetime.now()
        ods = [
            ("http://od1/", n, "http://package/", "sword", "obo"),
            ("http://192.168.23.133/asdfasd/datasets/mydataset6/example.zip",
             n, "http://package/", "bob", None)
        ]
        od_uris = [
            "http://od1/",
            "http://192.168.23.133/asdfasd/datasets/mydataset6/example.zip"
        ]
        s = Statement(
            aggregation_uri="http://192.168.23.133/asdfasd/datasets/mydataset6",
            rem_uri="http://rem/",
            original_deposits=ods,
            aggregates=[
                "http://od1/",
                "http://192.168.23.133/asdfasd/datasets/mydataset6/example.zip",
                "http://agg1/", "http://agg2/"
            ],
            states=[("http://state/", "everything is groovy")])

        rdf_string = s.serialise_rdf(RDF_DOC)

        # first try the round trip
        rdf = etree.fromstring(rdf_string)

        # here are some counters/switches which will help us test that everything
        # is good within the statement
        descriptions = 0
        states = 0
        state_descriptions = 0
        original_deposits = 0
        aggregated_resources = 0
        packaging = 0
        dep_on = 0
        dep_by = 0
        dep_obo = 0

        has_rem_description = False
        has_agg_description = False
        ox_tag = False
        dc_tag = False
        rdf_tag = False

        # now go through the rdf and check that everything is as expected
        for desc in rdf.findall(RDF + "Description"):
            descriptions += 1
            about = desc.get(RDF + "about")
            for element in desc.getchildren():
                # we expect all of the same things to be true as in the previous
                # test
                if element.tag == ORE + "describes":
                    resource = element.get(RDF + "resource")
                    assert about == s.rem_uri
                    assert resource == s.aggregation_uri
                    has_rem_description = True
                if element.tag == ORE + "isDescribedBy":
                    resource = element.get(RDF + "resource")
                    assert about == s.aggregation_uri
                    assert resource == s.rem_uri
                    has_agg_description = True
                if element.tag == ORE + "aggregates":
                    resource = element.get(RDF + "resource")
                    assert resource in s.aggregates or resource in od_uris
                    aggregated_resources += 1
                if element.tag == SWORD + "originalDeposit":
                    resource = element.get(RDF + "resource")
                    assert resource in od_uris
                    original_deposits += 1
                if element.tag == SWORD + "state":
                    resource = element.get(RDF + "resource")
                    assert resource == "http://state/"
                    states += 1
                if element.tag == SWORD + "stateDescription":
                    assert element.text.strip() == "everything is groovy"
                    assert about == "http://state/"
                    state_descriptions += 1
                if element.tag == SWORD + "packaging":
                    resource = element.get(RDF + "resource")
                    assert resource == "http://package/"
                    assert about in od_uris
                    packaging += 1
                if element.tag == SWORD + "depositedOn":
                    assert about in od_uris
                    dep_on += 1
                if element.tag == SWORD + "depositedBy":
                    assert element.text in ["sword", "bob"]
                    assert about in od_uris
                    dep_by += 1
                if element.tag == SWORD + "depositedOnBehalfOf":
                    assert element.text == "obo"
                    assert about in od_uris
                    dep_obo += 1

                # and we must verify that we didn't overwrite anything in the
                # passed in RDF document (don't check everything, but let's pick
                # one thing from each namespace)
                if element.tag == OX + "currentVersion":
                    assert element.text == "6"
                    ox_tag = True
                if element.tag == DC + "identifier":
                    assert element.text == "mydataset6"
                    dc_tag = True
                if element.tag == RDF + "type":
                    resource = element.get(RDF + "resource")
                    assert resource == "http://vocab.ox.ac.uk/dataset/schema#DataSet"
                    rdf_tag = True

        # now check that our counters/switches were flipped appropriately
        assert descriptions == 5
        assert states == 1
        assert state_descriptions == 1
        assert original_deposits == 2
        assert aggregated_resources == 4
        assert packaging == 2
        assert dep_on == 2
        assert dep_by == 2
        assert dep_obo == 1
        assert has_rem_description
        assert has_agg_description

        assert ox_tag
        assert dc_tag
        assert rdf_tag