示例#1
0
    def __init__(self, project,uuid=None,attributes=None,add_default_type=True):
        """
        Default contructor, creates a session activity and links to project object

        :param project: a project object
        :return: none

        """
        if uuid is None:
            self._uuid = getUUID()
            #execute default parent class constructor
            super(Session,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes)
        else:
            self._uuid = uuid
            #execute default parent class constructor
            super(Session,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes)

        project.graph._add_record(self)

        if add_default_type:
            self.add_attributes({pm.PROV_TYPE: Constants.NIDM_SESSION})

        self.graph = project.graph
        project.add_sessions(self)

        #list of acquisitions associated with this session
        self._acquisitions=[]
示例#2
0
    def __init__(self, acquisition, attributes=None, uuid=None):
        """
        Default contructor, creates an acquisition object and links to acquisition activity object

        :param acquisition: a Aquisition activity object
        :param attributes: optional attributes to add to entity
        :param uuid: optional uuid...used mostly for reading in existing NIDM document
        :return: none

        """

        if uuid is None:
            #execute default parent class constructor
            super(AcquisitionObject, self).__init__(
                acquisition.graph,
                pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM),
                                 getUUID()), attributes)
        else:
            super(AcquisitionObject, self).__init__(
                acquisition.graph,
                pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), uuid),
                attributes)

        acquisition.graph._add_record(self)

        #carry graph object around
        self.graph = acquisition.graph
        #create link to acquisition activity
        acquisition.add_acquisition_object(self)
示例#3
0
    def __init__(self, session, attributes=None, uuid=None):
        """
        Default contructor, creates a session activity and links to project object

        :param session: a session object
        :param uuid: optional uuid...used mostly for reading in existing NIDM document
        :param attributes: optional dictionary of attributes to add qname:value

        """
        if uuid is None:
            #execute default parent class constructor
            super(Acquisition,self).__init__(session.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes)
        else:
            super(Acquisition,self).__init__(session.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),uuid),attributes)

        session.graph._add_record(self)

        self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ACTIVITY})
        #self.add_attributes({pm.QualifiedName(pm.Namespace("dct",Constants.DCT),'isPartOf'):self})

        #list to store acquisition objects associated with this activity
        self._acquisition_objects=[]
        #if constructor is called with a session object then add this acquisition to the session

        #carry graph object around
        self.graph = session.graph

        #add acquisition to session
        session.add_acquisition(self)
示例#4
0
文件: Project.py 项目: mih/PyNIDM
    def __init__(self, attributes=None, empty_graph=False, uuid=None):
        """
        Default contructor, creates document and adds Project activity to graph with optional attributes

        :param attributes: optional dictionary of attributes to add
        :empty_graph: if set to True, creates empty graph with no namespaces besides Prov defaults
        :uuid: if uuid is not None then use supplied uuid for project instead of generating one (for reading nidm docs)

        """

        if (empty_graph):
            self.graph = pm.ProvDocument()
        else:
            self.graph = Constants.NIDMDocument(
                namespaces=Constants.namespaces)

        if uuid is None:
            #execute default parent class constructor
            super(Project, self).__init__(
                self.graph,
                pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM),
                                 getUUID()), attributes)
        else:
            #execute default parent class constructor
            super(Project, self).__init__(
                self.graph,
                pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), uuid),
                attributes)
        #add record to graph
        self.graph._add_record(self)
        #create empty sessions list
        self._sessions = []

        #prov toolbox doesn't like 2 attributes with PROV_TYPE in 1 add_attributes call so split them...
        self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT})
示例#5
0
def instantiate_template(prov_doc,instance_dict):
	global GLOBAL_UUID_DEF_NS
	'''
	Instantiate a prov template based on a dictionary setting for
	the prov template variables
	
	Supported:
		entity and attribute var: matching
		multiple entity expansion

	Unsupported by now:
		linked entities
		multiple attribute expansion

	To Do: Handle core template expansion rules as described in
		https://ieeexplore.ieee.org/document/7909036/ 
		and maybe add additional expansion/composition rules for
		templates useful to compose ENES community workflow templates

	Args: 
		prov_doc (ProvDocument): input prov document template
		instance_dict (dict): match dictionary
	''' 

	#print("here inst templ")


	#instance dict override: replace tmpl:startTime and tmpl:endTime with prov:startTime and prov:endTime
	instance_dict["tmpl:startTime"]=prov.QualifiedName(prov.Namespace("prov", "http://www.w3.org/ns/prov#"),"startTime")
	instance_dict["tmpl:endTime"]=prov.QualifiedName(prov.Namespace("prov", "http://www.w3.org/ns/prov#"),"endTime")
	instance_dict["tmpl:time"]=prov.QualifiedName(prov.Namespace("prov", "http://www.w3.org/ns/prov#"), "time")

	#print repr(instance_dict)

	#CHECK FOR NAMESPACE FOR VARGEN UUID
	for ns in prov_doc.namespaces:
		if ns.prefix==GLOBAL_UUID_DEF_NS_PREFIX:
			#print ("found namespace")
			#uuid namespace defined in template? Use this one
			GLOBAL_UUID_DEF_NS=ns

	new_doc = set_namespaces(prov_doc.namespaces,prov.ProvDocument()) 

	
	new_doc = add_records(prov_doc,new_doc,instance_dict)

	blist = list(prov_doc.bundles)

	#print (repr(blist))
	#print ("iterating bundles")
	for bundle in blist:       
		id1=match(bundle.identifier, instance_dict, True)
		#print (id1)
		#print (repr(id1))
		#print ("---")
		new_bundle = new_doc.bundle(id1)   
		#print (repr(new_bundle))
		new_bundle = add_records(bundle, new_bundle,instance_dict)      

	return new_doc
示例#6
0
文件: Utils.py 项目: khelm/PyNIDM
def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj):
    """
    Cycles through triples for a particular subject and adds them to the nidm_obj

    :param rdf_graph: RDF graph object
    :param subject_uri: URI of subject to query for additional metadata
    :param namespaces: Namespaces in NIDM document
    :param nidm_obj: NIDM object to add metadata
    :return: None

    """
    #Cycle through remaining metadata and add attributes
    for predicate, objects in rdf_graph.predicate_objects(subject=subject_uri):
        #if find qualified association
        if predicate == URIRef(Constants.PROV['qualifiedAssociation']):
            #need to get associated prov:Agent uri, add person information to graph
            for agent in rdf_graph.objects(
                    subject=subject_uri,
                    predicate=Constants.PROV['wasAssociatedWith']):
                #add person to graph and also add all metadata
                person = nidm_obj.add_person(uuid=agent)
                #now add metadata for person
                add_metadata_for_subject(rdf_graph=rdf_graph,
                                         subject_uri=agent,
                                         namespaces=namespaces,
                                         nidm_obj=person)

            #get role information
            for bnode in rdf_graph.objects(
                    subject=subject_uri,
                    predicate=Constants.PROV['qualifiedAssociation']):
                #for bnode, query for object which is role?  How?
                #term.BNode.__dict__()

                #create temporary resource for this bnode
                r = Resource(rdf_graph, bnode)
                #get the object for this bnode with predicate Constants.PROV['hadRole']
                for r_obj in r.objects(predicate=Constants.PROV['hadRole']):
                    #create qualified names for objects
                    obj_nm, obj_term = split_uri(r_obj._identifier)
                    for uris in namespaces:
                        if uris.uri == URIRef(obj_nm):
                            #create qualified association in graph
                            nidm_obj.add_qualified_association(
                                person=person,
                                role=pm.QualifiedName(uris, obj_term))

        else:
            if validators.url(objects):
                #create qualified names for objects
                obj_nm, obj_term = split_uri(objects)
                for uris in namespaces:
                    if uris.uri == URIRef(obj_nm):
                        #prefix = uris.prefix
                        nidm_obj.add_attributes(
                            {predicate: pm.QualifiedName(uris, obj_term)})
            else:

                nidm_obj.add_attributes(
                    {predicate: get_RDFliteral_type(objects)})
示例#7
0
    def __init__(self, parentDoc=None, attributes=None):
        """
        Default contructor, creates document and adds Project activity to graph with optional attributes

        :param parentDoc: optional ProvDocument
        :param attributes: optional dictionary of attributes to add

        """

        #set graph document
        if (parentDoc):
            self.graph = parentDoc
        else:
            self.graph = Constants.p_graph

        #execute default parent class constructor
        super(Project, self).__init__(
            self.graph,
            pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()),
            attributes)
        self.graph._add_record(self)
        #create empty sessions list
        self._sessions = []

        #prov toolbox doesn't like 2 attributes with PROV_TYPE in 1 add_attributes call so split them...
        self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT})
        self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT_TYPE})
示例#8
0
文件: provrdf.py 项目: vreuter/prov
 def decode_rdf_representation(self, literal, graph):
     if isinstance(literal, RDFLiteral):
         value = literal.value if literal.value is not None else literal
         datatype = literal.datatype if hasattr(literal,
                                                'datatype') else None
         langtag = literal.language if hasattr(literal,
                                               'language') else None
         if datatype and 'XMLLiteral' in datatype:
             value = literal
         if datatype and 'base64Binary' in datatype:
             value = base64.standard_b64encode(value)
         if datatype == XSD['QName']:
             return pm.Literal(literal, datatype=XSD_QNAME)
         if datatype == XSD['dateTime']:
             return dateutil.parser.parse(literal)
         else:
             # The literal of standard Python types is not converted here
             # It will be automatically converted when added to a record by _auto_literal_conversion()
             return Literal(value, self.valid_identifier(datatype), langtag)
     elif isinstance(literal, URIRef):
         rval = self.valid_identifier(literal)
         if rval is None:
             prefix, iri, _ = graph.namespace_manager.compute_qname(literal)
             ns = self.document.add_namespace(prefix, iri)
             rval = pm.QualifiedName(ns, literal.replace(ns.uri, ''))
         return rval
     else:
         # simple type, just return it
         return literal
示例#9
0
 def add_acquisition(self, acquisition):
     self._acquisitions.extend([acquisition])
     #create links in graph
     acquisition.add_attributes({
         pm.QualifiedName(pm.Namespace("dct", Constants.DCT), 'isPartOf'):
         self
     })
示例#10
0
    def __init__(self, project, attributes=None, uuid=None):
        """
        Default contructor, creates a derivative activity

        :param uuid: optional uuid...used mostly for reading in existing NIDM document
        :param attributes: optional dictionary of attributes to add qname:value

        """
        if uuid is None:
            self._uuid = getUUID()

            #execute default parent class constructor
            super(Derivative,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes)
        else:
            self._uuid = uuid
            super(Derivative,self).__init__(project.graph, pm.Identifier(uuid),attributes)

        project.graph._add_record(self)

        #list to store acquisition objects associated with this activity
        self._derivative_objects=[]
        #if constructor is called with a session object then add this acquisition to the session

        #carry graph object around
        self.graph = project.graph

        project.add_derivatives(self)
示例#11
0
    def __init__(self, project, attributes=None, uuid=None, add_default_type=True):
        """
        Default contructor, creates an acquisition object and links to acquisition activity object

        :param project: NIDM project to add data element entity to.\
        :param attributes: optional attributes to add to entity
        :param uuid: optional uuid...used mostly for reading in existing NIDM document
        :return: none

        """

        if uuid is None:
            #execute default parent class constructor
            super(DataElement,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes)
        else:
            super(DataElement,self).__init__(project.graph,pm.Identifier(uuid),attributes)

        project.graph._add_record(self)

        if add_default_type:
            self.add_attributes({pm.PROV_TYPE: Constants.NIDM_DATAELEMENT})
        project.add_dataelements(self)
        self.graph = project.graph

        #list to store acquisition objects associated with this activity
        self._derivative_objects=[]
示例#12
0
    def __init__(self,
                 attributes=None,
                 empty_graph=False,
                 uuid=None,
                 add_default_type=True):
        """
        Default contructor, creates document and adds Project activity to graph with optional attributes

        :param attributes: optional dictionary of attributes to add
        :empty_graph: if set to True, creates empty graph with no namespaces besides Prov defaults
        :uuid: if uuid is not None then use supplied uuid for project instead of generating one (for reading nidm docs)

        """

        if (empty_graph):
            self.graph = Constants.NIDMDocument(namespaces=None)
        else:
            self.graph = Constants.NIDMDocument(
                namespaces=Constants.namespaces)

        if uuid is None:
            self._uuid = getUUID()

            #execute default parent class constructor
            super(Project, self).__init__(
                self.graph,
                pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI),
                                 self.get_uuid()), attributes)
        else:
            self._uuid = uuid
            #execute default parent class constructor
            super(Project, self).__init__(
                self.graph,
                pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI),
                                 self.get_uuid()), attributes)

        #add record to graph
        self.graph._add_record(self)
        #create empty sessions list
        self._sessions = []
        #create empty derivatives list
        self._derivatives = []
        # create empty data elements list
        self._dataelements = []

        if add_default_type:
            self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT})
示例#13
0
def match(eid, mdict, node, numEntries=1):
    '''
    helper function to match strings based on dictionary
    
    Args:
        eid (string): input string
        mdict (dict): match dictionary
    Returns:
        meid: same as input or matching value for eid key in mdict
    '''
    adr = eid
    if isinstance(adr, prov.QualifiedName):
        lp = adr.localpart
        ns = adr.namespace.prefix
        adr = ns + ":" + lp
    #override: vargen found in entity declaration position: create a uuid
    #print "match " + repr(adr) + " with " + str(adr) + " red " + str(adr)[:7]
    #not optimal, need ability to provide custom namespace

    # FIX NAMESPACE FOR UUID!!!!!!!!

    if node and "vargen:" in str(adr) and str(adr)[:7] == "vargen:":
        ret = None
        for e in range(0, numEntries):
            uid = str(uuid.uuid4())
            if adr not in mdict:
                ret = prov.QualifiedName(GLOBAL_UUID_NS, uid)
                mdict[adr] = ret
            else:
                if not isinstance(mdict[adr], list):
                    tmp = list()
                    tmp.append(mdict[adr])
                    mdict[adr] = tmp
                    tmp2 = list()
                    tmp2.append(ret)
                    ret = tmp2
                qn = prov.QualifiedName(GLOBAL_UUID_NS, uid)
                mdict[adr].append(qn)
                ret.append(qn)
        return ret
    if adr in mdict:
        #print("Match: ",adr)
        madr = mdict[adr]
    else:
        #print("No Match: ",adr)
        madr = eid
    return madr
示例#14
0
def test_GetProjectInstruments():
    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII",
        Constants.NIDM_PROJECT_IDENTIFIER: 9610,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation"
    }
    proj_uuid = "_123456gpi"
    project = Project(uuid=proj_uuid, attributes=kwargs)

    session = Session(project)
    acq = AssessmentAcquisition(session)

    kwargs = {
        pm.PROV_TYPE:
        pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM),
                         "NorthAmericanAdultReadingTest")
    }
    acq_obj = AssessmentObject(acq, attributes=kwargs)

    acq2 = AssessmentAcquisition(session)

    kwargs = {
        pm.PROV_TYPE:
        pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM),
                         "PositiveAndNegativeSyndromeScale")
    }
    acq_obj2 = AssessmentObject(acq2, attributes=kwargs)

    #save a turtle file
    with open("test_gpi.ttl", 'w') as f:
        f.write(project.serializeTurtle())

    assessment_list = Query.GetProjectInstruments(["test_gpi.ttl"], proj_uuid)

    remove("test_gpi.ttl")

    assert Constants.NIDM + "NorthAmericanAdultReadingTest" in [
        str(x) for x in assessment_list['assessment_type'].to_list()
    ]
    assert Constants.NIDM + "PositiveAndNegativeSyndromeScale" in [
        str(x) for x in assessment_list['assessment_type'].to_list()
    ]
示例#15
0
    def add_sessions(self,session):
        """
        Adds session to project, creating links and adding reference to sessions list

        :param session: object of type "Session" from nidm API
        :return true if session object added to project, false if session object is already in project

        """
        if session in self._sessions:
            return False
        else:
            #add session to self.sessions list
            self._sessions.extend([session])
            #create links in graph
            #session.add_attributes({str("dct:isPartOf"):self})
            session.add_attributes({pm.QualifiedName(pm.Namespace("dct",Constants.DCT),'isPartOf'):self})
            return True
示例#16
0
def setEntry(rec, regNS):
	"""
	interpret value provided via v3 bindings, 
	check if qualified name or value, 
	handle datatypes accordingly
	
	Args:
		rec : a key value pair read from v3 bindings file
		regNS: the namespaces read from the context section of the v3 bindings file
	Returns:
		"prov-ified" value, value as-is as fallback	

	#keys:	@id	(for quali)

	#	@type	(for value)
	#	@value	(for value)

	"""

	out=rec
	try:
		if "@id" in rec:
			toks=rec["@id"].split(":")
			#print (repr(toks))
			if len(toks) > 2:
				raise BindingFileException( "Invalid Qualified Name " + rec["@id"] + " found in V3 Json Binding " +  repr(rec))
				#print( "Invalid Qualified Name " + rec["@id"] + " found in V3 Json Binding" )
			#for ns in regNS.get_registered_namespaces():
			for ns in regNS:
				#print (ns)
				if ns.prefix==toks[0]:
					#print ("HIT")
					out=prov.QualifiedName(ns, toks[1])	
		if "@value" in rec:
			if "@type" in rec:
				dt=rec["@type"]
				if isinstance(rec["@type"], basestring):
					dt=xsd_datype_to_prov_datatype(dt, regNS)
				out=prov.Literal(rec["@value"], datatype=dt)	
			else:
				out=rec["@value"]
	except:
		raise BindingFileException("Error parsing " + repr(rec))
		#pass
	return out
示例#17
0
 def add_derivatives(self, derivative):
     """
     Adds derivatives to project, creating links and adding reference to derivatives list
     :param derivative: object of type "Derivative" from nidm API
     :return true if derivative object added to project, false if derivative object is already in project
     """
     if derivative in self._derivatives:
         return False
     else:
         # add session to self.sessions list
         self._derivatives.extend([derivative])
         # create links in graph
         # session.add_attributes({str("dct:isPartOf"):self})
         derivative.add_attributes({
             pm.QualifiedName(pm.Namespace("dct", Constants.DCT), 'isPartOf'):
             self
         })
         return True
示例#18
0
文件: Session.py 项目: glatard/PyNIDM
    def __init__(self, project, attributes=None):
        """
        Default contructor, creates a session activity and links to project object

        :param project: a project object
        :return: none

        """
        #execute default parent class constructor
        super(Session, self).__init__(
            project.graph,
            pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()),
            attributes)
        project.graph._add_record(self)

        self.add_attributes({pm.PROV_TYPE: Constants.NIDM_SESSION})
        self.graph = project.graph

        #list of acquisitions associated with this session
        self._acquisitions = []
示例#19
0
 def decode_rdf_representation(self, literal, graph):
     if isinstance(literal, RDFLiteral):
         value = literal.value if literal.value is not None else literal
         datatype = literal.datatype if hasattr(literal, "datatype") else None
         langtag = literal.language if hasattr(literal, "language") else None
         if datatype and "XMLLiteral" in datatype:
             value = literal
         if datatype and "base64Binary" in datatype:
             value = base64.standard_b64encode(value)
         if datatype == XSD["QName"]:
             return pm.Literal(literal, datatype=XSD_QNAME)
         if datatype == XSD["dateTime"]:
             return dateutil.parser.parse(literal)
         if datatype == XSD["gYear"]:
             return pm.Literal(
                 dateutil.parser.parse(literal).year,
                 datatype=self.valid_identifier(datatype),
             )
         if datatype == XSD["gYearMonth"]:
             parsed_info = dateutil.parser.parse(literal)
             return pm.Literal(
                 "{0}-{1:02d}".format(parsed_info.year, parsed_info.month),
                 datatype=self.valid_identifier(datatype),
             )
         else:
             # The literal of standard Python types is not converted here
             # It will be automatically converted when added to a record by
             # _auto_literal_conversion()
             return pm.Literal(value, self.valid_identifier(datatype), langtag)
     elif isinstance(literal, URIRef):
         rval = self.valid_identifier(literal)
         if rval is None:
             prefix, iri, _ = graph.namespace_manager.compute_qname(literal)
             ns = self.document.add_namespace(prefix, iri)
             rval = pm.QualifiedName(ns, literal.replace(ns.uri, ""))
         return rval
     else:
         # simple type, just return it
         return literal
示例#20
0
    def __init__(self, acquisition, attributes=None):
        """
        Default contructor, creates an acquisition object and links to acquisition activity object

        :param acquisition: a Aquisition activity object
        :param attributes: optional attributes to add to entity
        :return: none

        """
        #execute default parent class constructor
        #execute default parent class constructor
        super(AcquisitionObject, self).__init__(
            acquisition.graph,
            pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()),
            attributes)
        acquisition.graph._add_record(self)

        #self.add_attributes({PROV_TYPE: Constants.NIDM_ACQUISITION_ENTITY})
        #carry graph object around
        self.graph = acquisition.graph
        #create link to acquisition activity
        acquisition.add_acquisition_object(self)
示例#21
0
    def __init__(self, derivative,attributes=None, uuid=None):
        """
        Default contructor, creates an derivative object and links to derivative activity object

        :param derivative: a Derivative activity object
        :param attributes: optional attributes to add to entity
        :param uuid: optional uuid...used mostly for reading in existing NIDM document
        :return: none

        """

        if uuid is None:
            #execute default parent class constructor
            super(DerivativeObject,self).__init__(derivative.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes)
        else:
            super(DerivativeObject,self).__init__(derivative.graph, pm.Identifier(uuid),attributes)

        derivative.graph._add_record(self)

        #carry graph object around
        self.graph = derivative.graph
        #create link to acquisition activity
        derivative.add_derivative_object(self)
示例#22
0
    def __init__(self, session, attributes=None):
        """
        Default contructor, creates a session activity and links to project object

        :param session: a session object

        """
        #execute default parent class constructor
        super(Acquisition, self).__init__(
            session.graph,
            pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()),
            attributes)
        session.graph._add_record(self)

        self.add_attributes(
            {pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ACTIVITY})
        self.add_attributes({str("dct:isPartOf"): session})

        #list to store acquisition objects associated with this activity
        self._acquisition_objects = []
        #if constructor is called with a session object then add this acquisition to the session

        #carry graph object around
        self.graph = session.graph
示例#23
0
def checkLinked(nodes, instance_dict):

    tmpl_linked_qn = prov.QualifiedName(
        prov.Namespace("tmpl", "http://openprovenance.org/tmpl#"), "linked")
    #make tmpl:linked sweep and determine order
    # ASSUMPTION: Each entity can only be link to one "ancestor" entity,
    #			one ancestor entity can be linked to by multiple "successor" entities
    #				NO CYCLES!
    # -> This implies: There is only one root and the network of linked rels is a directed acyclic graph

    linkedDict = dict()
    linkedGroups = list()
    for rec in nodes:
        eid = rec.identifier
        #print repr(rec.attributes)
        for attr in rec.attributes:
            if tmpl_linked_qn == attr[0]:
                linkedDict[eid] = attr[1]

    dependents = []
    roots = []
    intermediates = []
    for id in linkedDict:
        if id not in dependents:
            dependents.append(id)

    for id in linkedDict:
        if linkedDict[id] not in dependents:
            roots.append(linkedDict[id])
        else:
            intermediates.append(linkedDict[id])

    #print "roots: " + repr(roots)
    #print "dependents: " + repr(dependents)
    #print "intermediates: " + repr(intermediates)

    def dfs_levels(node, links, level):
        lower = dict()
        #print str(node) + " " + repr(lower)
        for k in [k for k, v in links.items() if v == node]:
            #print str(k) + " child of " + str(node)
            ret = dfs_levels(k, links, level + 1)
            #print repr(ret)
            if ret != None:
                lower.update(ret)
        myval = {node: level}
        #print "Appending : " + repr(myval)
        lower.update(myval)
        #print "Returning : " + repr(lower)
        return (lower)

    numInstances = dict()
    combRoot = dict()
    # traverse from root
    offset = 0
    for r in roots:
        retval = dfs_levels(r, linkedDict, offset)
        #print "root: " + str(r)
        #print retval
        #get max rank
        maxr = max(retval.values())

        # we need to check how many entries we have
        maxEntries = 0
        for rec in nodes:
            #print rec
            if rec.identifier in retval:
                eid = rec.identifier
                neid = match(eid, instance_dict, False)
                #neid = match(eid._str,instance_dict, False)
                #assume single instance bound to this node
                length = 0
                if not isinstance(neid, list):
                    length = 1
                #print repr(neid)
                #print repr(eid)
                #if neid==eid._str:
                if neid == eid:
                    # no match: if unassigned var or vargen variable, assume length 0
                    length = 0
                    #print "same"
                if length > maxEntries:
                    maxEntries = length
                #print neid
                if isinstance(neid, list):
                    # list is assigned to node, now all lengths must be equal
                    length = len(neid)
                    if length != maxEntries:
                        if maxEntries > 0:
                            #print length
                            #print maxEntries
                            raise IncorrectNumberOfBindingsForGroupVariable(
                                "Linked entities must have same number of bound instances!"
                            )
                        maxEntries = length
                #print length
        #	if rec.identifier not in combRoot:
        #		retval[rec.identifier]=maxr+1

        for n in retval:
            numInstances[n] = maxEntries
        combRoot.update(retval)
        linkedGroups.append(retval)
        offset = maxr + 1

    for rec in nodes:
        if rec.identifier not in combRoot:
            combRoot[rec.identifier] = offset
            linkedGroups.append({rec.identifier: offset})
            eid = rec.identifier
            neid = match(eid._str, instance_dict, False)
            if isinstance(neid, list):
                numInstances[eid] = len(neid)
            else:
                numInstances[eid] = 1
        #need to remember number of instances for each var
        # when multiple link groups rank accordingly

        #print repr(combRoot)
        #try reorder nodes based on tmpl:linked hierarchy
        #nodes_sorted=sorted(nodes, key=retval.get)

    fnc = lambda x: combRoot[x.identifier]
    nodes_sorted = sorted(nodes, key=fnc)
    #for rec in nodes_sorted:
    #print "SORT : " + str(rec.identifier)

    #print repr(linkedGroups)

    return {
        "nodes": nodes_sorted,
        "numInstances": numInstances,
        "linkedGroups": linkedGroups
    }
示例#24
0
def set_rel(new_entity, rel, idents, expAttr, linkedRelAttrs, otherAttrs):
    '''
       helper function to add specific relations according to relation type
       implements cartesian expansion only (no "linked" restrictions) by now
    '''

    cnt = 0
    attrlists = []
    indexlists = []
    #create groups
    for g in linkedRelAttrs:
        alist = []
        ilist = []
        cnt = 0
        for a in expAttr:
            if a in g:
                alist.append(expAttr[a])
                ilist.append(cnt)
            cnt += 1
        attrlists.append(alist)
        indexlists.append(ilist)

    #print repr(expAttr)
    #print repr(attrlists)
    #print repr(indexlists)

    outLists = []
    for a in attrlists:
        outLists.append(zip(*a))

    #taken from http://code.activestate.com/recipes/577932-flatten-arraytuple/
    flatten = lambda arr: reduce(
        lambda x, y: ((isinstance(y,
                                  (list, tuple)) or x.append(y)) and x.extend(
                                      flatten(y))) or x, arr, [])

    idx = flatten(indexlists)
    relList = itertools.product(*outLists)

    #check identifier
    # if var: namespace: unbound variable, ignore
    # if vargen: namespace : create uuid for each ele
    # if not var and not vargen: if same number of idents as elements: iterate, else: fail
    #print idents

    getIdent = False
    makeUUID = False
    if idents:
        if isinstance(idents, list):
            if len(idents) != len(relList):
                raise IncorrectNumberOfBindingsForStatementVariableException(
                    "Wrong number of idents for expanded rel " + repr(rel))
            getIdent = True
        elif "vargen:" in idents._str and idents._str[:7] == "vargen:":
            #make uuid for each
            makeUUID = True
        elif "var:" in idents._str and idents._str[:4] == "var:":
            #make uuid for each
            idents = None

    cnt = 0
    for element in relList:
        out = flatten(element)
        outordered = [out[i] for i in idx]
        if getIdent:
            make_rel(new_entity, rel, idents[cnt], outordered, otherAttrs)
        elif makeUUID:
            make_rel(new_entity, rel,
                     prov.QualifiedName(GLOBAL_UUID_NS, str(uuid.uuid4())),
                     outordered, otherAttrs)
        else:
            make_rel(new_entity, rel, idents, outordered, otherAttrs)
        cnt += 1
示例#25
0
def checkLinked(nodes, instance_dict):
	"""
	This function identifies groups of linked variables in the current template

	Arguments:
		nodes:  	List of all variables in the template
		instance_dict:	Lookup table with substitutes from bindings
	Returns:
		dict with following keys:
			"nodes" :	template variables sorted so that each link group 	
				forms a contiguous sequence ordered by 
				"direction" of tmpl:linked, eg for 	

						"var b tmpl:linked to var a 
						 var c tmpl:linked to var b
						 var e tmpl:linked to var d"

				we get the order var a, var b, var c, var d, var e

		"numInstances": the number of instances assigned 
				to each variable in "nodes"
				if vargen vars are linked to regular instantiated vars 
				we create the same number of auto generated instances, 
				they thus get the same number assigned

		"linkedGroups": list of lists each containing the variables 
				belonging to the same link group, ex. above: [[a,b,c], [d,e]]
	
	"""

	"""#we need that for lookup"""
	tmpl_linked_qn=prov.QualifiedName(prov.Namespace("tmpl", "http://openprovenance.org/tmpl#"), "linked")
 
	"""	
	#make tmpl:linked sweep and determine order

	# we essentially create a graph containing all "tmpl:linked" ties and the involved nodes

	# ASSUMPTION: Each entity can only be link to one "ancestor" entity, 
	#			one ancestor entity can be linked to by multiple "successor" entities
	#				NO CYCLES!
	# -> This implies: There is only one root in each link group and 
	#			the network of linked rels is a directed acyclic graph
	"""
	linkedDict=dict()
	linkedGroups=list()
	for rec in nodes:
		eid = rec.identifier
		#print (repr(rec.attributes))
		for attr in rec.attributes: 
			if tmpl_linked_qn == attr[0]:
				linkedDict[eid]=attr[1]
 
	"""# determine order, which of the variables is a "root", i.e only linked to by other vars"""
	dependents=[]
	roots=[]
	intermediates=[]
	for id in linkedDict:
		if id not in dependents:
			dependents.append(id)
	for id in linkedDict:
		if linkedDict[id] not in dependents:
			roots.append(linkedDict[id])
		else:
			intermediates.append(linkedDict[id])

	#print ("roots: " + repr(roots))
	#print ("dependents: " + repr(dependents))
	#print ("intermediates: " + repr(intermediates))

	def dfs_levels(node, links, level):
		"""
		#helper function
		#recursive depth first search to determine order of linked vars	
		"""
		lower=dict()
		#print (str(node) + " " + repr(lower))
		for k in [k for k,v in links.items() if v == node]:
			#print str(k) + " child of " + str(node)
			ret=dfs_levels(k, links, level+1)
			#print repr(ret)
			if ret!=None:
				lower.update(ret)
		myval={node : level}
		#print ("Appending : " + repr(myval))
		lower.update(myval)
	#print ("Returning : " + repr(lower))
		return(lower)

	numInstances=dict()
	combRoot=dict()
	#traverse from root
	offset=0
	for r in roots:
		retval=dfs_levels(r, linkedDict, offset)
		#print ("root: " + str(r))
		#print (retval)
		#get max rank
		maxr=max(retval.values())	

	# we need to check how many entries we have
		maxEntries=0
		for rec in nodes:
			#print (rec)
			if rec.identifier in retval:
				eid = rec.identifier
				neid = match(eid,instance_dict, False)
				#neid = match(eid._str,instance_dict, False)
				#assume single instance bound to this node
				length=0
				if not isinstance(neid, list):
					length=1
				#print (repr(neid))
				#print (repr(eid))
			#if neid==eid._str:
				if neid==eid:
					#no match: if unassigned var or vargen variable, assume length 0
					length=0
					#print("same")
				if length>maxEntries:
					maxEntries=length
			#print neid
				if isinstance(neid,list):
					#list is assigned to node, now all lengths must be equal
					length=len(neid)
					if length!=maxEntries:
						if maxEntries>0:
							#print (length)
							#print (maxEntries)
							raise IncorrectNumberOfBindingsForGroupVariable("Linked entities must have same number of bound instances!")
						maxEntries=length
			#	print (length)

		for n in retval:
			numInstances[n]=maxEntries
		combRoot.update(retval)
		linkedGroups.append(retval)
		offset=maxr+1

	for rec in nodes:
		if rec.identifier not in combRoot:
			combRoot[rec.identifier]=offset
			linkedGroups.append({rec.identifier : offset})
			eid=rec.identifier
			neid = match(eid._str,instance_dict, False)
			if isinstance(neid, list):
				numInstances[eid]=len(neid)
			else:
				numInstances[eid]=1
		#need to remember number of instances for each var
		# when multiple link groups rank accordingly

	#print (repr(combRoot))
	#try reorder nodes based on tmpl:linked hierarchy	
	#nodes_sorted=sorted(nodes, key=retval.get)  

	fnc=lambda x: combRoot[x.identifier]
	nodes_sorted=sorted(nodes, key=fnc)
	#for rec in nodes_sorted:
		#print ("SORT : " + str(rec.identifier))

	#print (repr(linkedGroups))

	return { "nodes" : nodes_sorted, "numInstances" : numInstances, "linkedGroups" : linkedGroups}
示例#26
0
def set_rel(new_entity,rel,idents, expAttr, linkedRelAttrs, otherAttrs):
	'''
	helper function to add specific relations according to relation type
	performs "tmpl:linked" aware expansion and passes relation specific as well as generic attributes

	Args: 
		new_entity : Bundle or ProvDoc to be created via template expansion
		rel	:    relation from template to be substituted/expanded
		idents	:    optional identifier(s) for relation, can be None
		expAttr:     ordered dict of formal attributes to be passed to relation constructor
			     sequence of formal attrs must be provided in order as specified in header of func "make_rel"
		linkedRelAttrs:	attributes grouped by link group, important information for expansion.
		otherAttrs:  list of (k,v) tuples with optional non-specified attributes for the relation	
	Returns:
		None - calls "make_rel" (see above) which instantiates relations in the passed "new_entity"	


	The basic concept of expansion including tmpl:linked is conceived as follows:

	consider all the group level variables (See https://provenance.ecs.soton.ac.uk/prov-template/#dfn-group-variable) 
	assigned to formal relation attributes sorted into groups based on wheter they are defined as linked or not
	If influencer, influencee or secondary vars are in the same link group, there is no cartesian expansion

	example: wasAssociatedWith(id;a,ag,pl,attrs), formal attrs are a(ctivity), ag(ent), pl(an)

	5 modes: 
		1) a linked w. ag linked w. p	-> the variables assigned to these attributes must all have the same number of instances
							which are combined according to their position

		2) a linked with ag, pl not linked -> two of the assigned variables are linked, the third is not, we have
		   a linked with pl, ag not linked    " numInstances(linked var1) == numInstances(linked var2)" 
		   ag linked with pl, a not linked			times 
								"numinstances(unlinked var)" 
									expansions

		3) a, ag and pl are not linked with each other	-> we have numInstances(a) x numInstances(ag) x numInstances(pl) expansions

	''' 

	#check identifier
	# if var: namespace: unbound variable, ignore 
	# if vargen: namespace : create uuid for each ele
	# if not var and not vargen: if same number of idents as elements: iterate, else: fail
	#print idents
	getIdent=False
	makeUUID=False
	if idents:
		if isinstance(idents, list):
			if len(idents) != len(relList):
				raise IncorrectNumberOfBindingsForStatementVariableException("Wrong number of idents for expanded rel " + repr(rel)) 
			getIdent=True
		elif "vargen:" in idents._str and idents._str[:7]=="vargen:":
			#make uuid for each
			makeUUID=True
		elif "var:" in idents._str and idents._str[:4]=="var:":
			#make uuid for each
			idents=None

	#create efficient data structures for expansion with tmpl:linked constraints
	cnt=0
	attrlists=[]
	indexlists=[]
	#separate attribute values by group, but remember their original order in ilist
	attrVisited=[]
	for g in linkedRelAttrs:
		alist=[]
		ilist=[]
		cnt=0
		for a in expAttr:
			if a in g:
				attrVisited.append(a)
				alist.append(expAttr[a])
				ilist.append(cnt)
			cnt+=1
		attrlists.append(alist)
		indexlists.append(ilist)
	
	#Some of the variables were not present in the linked groups.
	if len(attrVisited)!=len(expAttr):
		cnt=0
		for a in expAttr:
			if a not in attrVisited:
				attrlists.append([expAttr[a]])
				indexlists.append([cnt])
				attrVisited.append(a)
			cnt+=1
	
	
	#print (repr(expAttr))
	#print (repr(attrlists))
	#print (repr(indexlists))

	outLists=[]
	# concatenate values in each link group
	for a in attrlists:
		outLists.append(zip(*a))

	#taken from http://code.activestate.com/recipes/577932-flatten-arraytuple/
	flatten = lambda arr: reduce(lambda x, y: ((isinstance(y, (list, tuple)) or x.append(y)) and x.extend(flatten(y))) or x, arr, [])

	#we need this info to maintain the order of formal attributes
	idx=flatten(indexlists)	

	log.debug("OUTLISTS")
	log.debug(outLists)

	#create cartesian product of grouped variables, this way, only those not in the same group get expanded
	relList=list(itertools.product(*outLists))
	log.debug(relList)

	cnt=0
	#iterate over cartesian product

	#print (repr(relList))

	for element in relList:
		log.debug("MEH")
		log.debug(repr(element))
		log.debug(repr(otherAttrs))


		# CONSTRUCT OTHER ATTRS FOR THIS REL
		rel_other_attrs=dict()
		for k in otherAttrs:
			log.debug(repr(otherAttrs[k]))
			oa_eles=1
			if isinstance(otherAttrs[k], list):
				oa_eles=len(otherAttrs[k])
			if oa_eles!=1 and oa_eles < len(relList):
				raise IncorrectNumberOfBindingsForStatementVariable("Attribute " + str(k) + " has incorrect number of bindings.")
			kval=None
			if oa_eles==1:
				kval=otherAttrs[k]
			else:
				kval=otherAttrs[k][cnt]
			rel_other_attrs[k]=kval
			
		
		#print (element)
		
		out=flatten(element)

		#print (out)

		#reorder based on original ordering


		# THIS IS THE ASSOC ORDER BUG
		#outordered=[out[i] for i in idx]
		outordered=[x for _,x in sorted(zip(idx, out))]
	

		#create expanded relation	
		if getIdent:
			make_rel(new_entity, rel,idents[cnt], outordered, rel_other_attrs)
		elif makeUUID:
			make_rel(new_entity, rel, prov.QualifiedName(GLOBAL_UUID_DEF_NS, str(uuid.uuid4())), outordered, rel_other_attrs)
		else:
			make_rel(new_entity, rel,idents, outordered, rel_other_attrs)
		cnt+=1
#bind_dicts=[]
bind_dict = dict()

for index, row in data.iterrows():
    rtemplate = template
    for col in data.columns.values:
        if col in bindmap:

            outstatement = "var:" + bindmap[col][
                "varname"] + " a prov:Entity ;\n"
            #print repr(row)
            outval = str(row[col].encode('utf8', 'replace'))

            if bindmap[col]["val"] == "iri":
                outval = prov.QualifiedName(prov.Namespace(outNSpref, outNS),
                                            urllib.quote(outval))

#print col + " " + bindmap[col]["varname"] + " " + outval
            ID = "var:" + bindmap[col]["varname"]
            if ID not in bind_dict:
                bind_dict[ID] = outval
            else:
                if not isinstance(bind_dict[ID], list):
                    tmp = list()
                    tmp.append(bind_dict[ID])
                    bind_dict[ID] = tmp
                if outval not in bind_dict[ID]:
                    bind_dict[ID].append(outval)
            #outval=row[col]
            if bindmap[col]["val"] == "literal":
                outval = '"' + outval + '"'
示例#28
0
outNS = "http://example.com#"

#bind_dicts=[]
bind_dict = dict()
bindfile_dict = dict()

for index, row in data.iterrows():
    rtemplate = template
    for col in data.columns.values:
        if col in bindmap:

            outval = row[col]

            if bindmap[col]["val"] == "iri":
                outval = prov.QualifiedName(
                    prov.Namespace(outNSpref, outNS),
                    urllib.quote(str(outval.encode('utf8', 'replace'))))

#print col + " " + bindmap[col]["varname"] + " " + outval
            ID = "var:" + bindmap[col]["varname"]
            #ID = prov.QualifiedName(prov.Namespace("var", "http://openprovenance.org/var#"), urllib.quote(bindmap[col]["varname"]))

            #prepare data for bindings dict

            if ID not in bind_dict:
                bind_dict[ID] = outval
            else:
                if not isinstance(bind_dict[ID], list):
                    tmp = list()
                    tmp.append(bind_dict[ID])
                    bind_dict[ID] = tmp
bind_dicts = []

for index, row in data.iterrows():
    rtemplate = template
    bind_dict = {}
    for col in data.columns.values:
        if col in bindmap:

            outstatement = "var:" + bindmap[col][
                "varname"] + " a prov:Entity ;\n"
            #print repr(row)
            outval = str(row[col].encode('utf8', 'replace'))

            if bindmap[col]["val"] == "iri":
                outval = prov.QualifiedName(prov.Namespace(outNSpref, outNS),
                                            urllib.quote(outval))

#print col + " " + bindmap[col]["varname"] + " " + outval
            bind_dict["var:" + bindmap[col]["varname"]] = outval
            #outval=row[col]
            if bindmap[col]["val"] == "literal":
                outval = '"' + outval + '"'

            if bindmap[col]["type"] == "attr":
                outstatement = outstatement + "\ttmpl:2dvalue_0_0 " + str(
                    outval) + " .\n"
            else:
                outstatement = outstatement + "\ttmpl:value_0 " + str(
                    outval) + " .\n"
            rtemplate = rtemplate + outstatement
bindfile_dict=dict()


for index, row in data.iterrows():
    rtemplate=template
    for col in data.columns.values:
        if col in bindmap:

            outval=row[col]

	    if bindmap[col]["val"]=="float":
		outval=float(outval)

	    outiri=None
            if bindmap[col]["val"]=="iri":
		outiri=prov.QualifiedName(prov.Namespace(outNSpref,outNS), urllib.quote(str(outval.encode('utf8', 'replace'))))
                outval=outNSpref+":"+urllib.quote(str(outval.encode('utf8', 'replace')))

	    ID = "var:"+bindmap[col]["varname"]
	    #prepare data for bindings dict
	
	    if ID not in bind_dict:
            	bind_dict[ID]=outval
	    else:
		if not isinstance(bind_dict[ID], list):
			tmp=list()
			tmp.append(bind_dict[ID])
			bind_dict[ID]=tmp
		if not bindmap[col]["uniqueOnly"] or  outval not in bind_dict[ID]:
			bind_dict[ID].append(outval)