def create_entity(graph, fs_subject_id, filepath, hostname): """ Create a PROV entity for a file in a FreeSurfer directory """ # identify FreeSurfer terms based on directory and file names _, filename = os.path.split(filepath) relpath = filepath.split(fs_subject_id)[1].lstrip(os.path.sep) fstypes = relpath.split('/')[:-1] additional_types = relpath.split('/')[-1].split('.') file_md5_hash = hash_infile(filepath, crypto=hashlib.md5) file_sha512_hash = hash_infile(filepath, crypto=hashlib.sha512) if file_md5_hash is None: print('Empty file: %s' % filepath) #url = "file://%s%s" % (hostname, filepath) url = filepath url_get = prov.URIRef("http://localhost/file?file=%s" % filepath) url_get = prov.URIRef("file://%s" % filepath) obj_attr = [(prov.PROV["label"], filename), (fs["relative_path"], "%s" % relpath), (prov.PROV["location"], url_get), (crypto["md5"], "%s" % file_md5_hash), (crypto["sha"], "%s" % file_sha512_hash) ] for key in fstypes: obj_attr.append((nidm["tag"], key)) for key in additional_types: obj_attr.append((nidm["tag"], key)) for key, uris in fs_file_map: if key in filename: if key.rstrip('.').lstrip('.') not in fstypes + additional_types: obj_attr.append((nidm["tag"], key.rstrip('.').lstrip('.'))) for uri in uris: if isinstance(uri, tuple): obj_attr.append((uri[0], uri[1])) else: obj_attr.append((prov.PROV["type"], uri)) id = get_id() return graph.entity(id, obj_attr)
""" Retrieve the file into wd """ out_T1_files = [] filemap = {} for idx, info in enumerate(t1_result.bindings): o = urlparse.urlparse(info['?t1path']) if o.scheme.startswith('file'): uri = 'file://' + o.path else: uri = info['?t1path'] filename = os.path.join(cwd, 'file_%d_' % idx + os.path.split(o.path)[-1]) urllib.urlretrieve(uri, filename) if hash_infile(filename, crypto=hashlib.sha512) != str(info['?sha']): raise IOError("Hash of file doesn't match remote hash") out_T1_files.append(filename) filemap[filename] = (info['?sha'], info['?e']) """ Run bet and convert to rdf """ provgraph, rdfgraph = run_bet(out_T1_files, cwd) nipype_files = """ PREFIX nipype: <> select ?e ?value where { ?e a prov:Entity ; nipype:value ?value . FILTER(regex(?value, 'file://'))
subject = 'SAD_024' ingraph = pm.ProvBundle(identifier=get_id()) ingraph.add_namespace(foaf) ingraph.add_namespace(niiri) ingraph.add_namespace(nif) ingraph.add_namespace(crypto) agent = ingraph.agent(get_id(), { pm.PROV["type"]: pm.PROV["Person"], foaf["name"]: subject }) t1_collection = ingraph.collection(get_id()) ingraph.wasAttributedTo(t1_collection, agent) for t1path in T1s: file_md5_hash = hash_infile(t1path, crypto=hashlib.md5) file_sha512_hash = hash_infile(t1path, crypto=hashlib.sha512) url = "file://%s%s" % (getfqdn(), t1path) url_get = pm.URIRef(url) obj_attr = [(pm.PROV["location"], url_get), (crypto["md5"], "%s" % file_md5_hash), (crypto["sha"], "%s" % file_sha512_hash), (pm.PROV["type"], nif["nlx_inv_20090243"])] t1entity = ingraph.entity(get_id(), obj_attr) ingraph.hadMember(t1_collection, t1entity) rdfingraph = ingraph.rdf() print rdfingraph.serialize(format='turtle') """ The input graph as generated by the above code """
t1_result = rdfingraph.query(t1_query) """ Retrieve the file into wd """ out_T1_files = [] filemap = {} for idx, info in enumerate(t1_result.bindings): o = urlparse.urlparse(info['?t1path']) if o.scheme.startswith('file'): uri = 'file://' + o.path else: uri = info['?t1path'] filename = os.path.join(cwd, 'file_%d_' % idx + os.path.split(o.path)[-1]) urllib.urlretrieve(uri, filename) if hash_infile(filename, crypto=hashlib.sha512) != str(info['?sha']): raise IOError("Hash of file doesn't match remote hash") out_T1_files.append(filename) filemap[filename] = (info['?sha'], info['?e']) """ Run bet and convert to rdf """ provgraph, rdfgraph = run_bet(out_T1_files, cwd) nipype_files = """ PREFIX nipype: <> select ?e ?value where { ?e a prov:Entity ; nipype:value ?value . FILTER(regex(?value, 'file://')) }
subject = 'SAD_024' ingraph = pm.ProvBundle(identifier=get_id()) ingraph.add_namespace(foaf) ingraph.add_namespace(niiri) ingraph.add_namespace(nif) ingraph.add_namespace(crypto) agent = ingraph.agent(get_id(), {pm.PROV["type"]: pm.PROV["Person"], foaf["name"]: subject} ) t1_collection = ingraph.collection(get_id()) ingraph.wasAttributedTo(t1_collection, agent) for t1path in T1s: file_md5_hash = hash_infile(t1path, crypto=hashlib.md5) file_sha512_hash = hash_infile(t1path, crypto=hashlib.sha512) url = "file://%s%s" % (getfqdn(), t1path) url_get = pm.URIRef(url) obj_attr = [(pm.PROV["location"], url_get), (crypto["md5"], "%s" % file_md5_hash), (crypto["sha"], "%s" % file_sha512_hash), (pm.PROV["type"], nif["nlx_inv_20090243"]) ] t1entity = ingraph.entity(get_id(), obj_attr) ingraph.hadMember(t1_collection, t1entity) rdfingraph = ingraph.rdf() print rdfingraph.serialize(format='turtle') """