def export(self, experimentId, replace_protocols={}, filename=None, export_images=True): self.export_images = export_images # initialise the metadata counter metadataCounter = 1 experiment = Experiment.objects.get(id=experimentId) # TODO: what info do we put on label? profile = '"Scientific Dataset Profile 1.0"' \ ' xmlns="http://www.loc.gov/METS/"' \ ' xmlns:xlink="http://www.w3.org/1999/xlink"' \ ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' \ ' xsi:schemaLocation="http://www.loc.gov/METS/' \ ' http://www.loc.gov/standards/mets/mets.xsd"' _mets = mets(PROFILE=profile, LABEL="", TYPE="study", OBJID="A-{0}".format(metadataCounter)) _amdSec = amdSecType() # create a div entry for experiment experimentDiv = divType(DMDID="E-1", ADMID="A-{0}".format( metadataCounter), TYPE="investigation") parameterSets = ExperimentParameterSet.objects.filter( experiment=experiment) experimentMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISEXPERIMENT") if parameterSets: _xmlData = self.getTechMDXmlDataForParameterSets( experiment, parameterSets, "experiment") experimentMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=experimentMdWrap) _amdSec = amdSecType() _amdSec.add_techMD(_techMD) _xmlData = self.getDmdSecXmlDataForExperiment( experiment, "http://www.loc.gov/mods/v3") experimentMdWrap = mdWrap(MDTYPE="MODS", xmlData=_xmlData) _dmdSec = mdSecType(ID="E-1", mdWrap=experimentMdWrap) _mets.add_dmdSec(_dmdSec) _mets.add_amdSec(_amdSec) metadataCounter += 1 _structMap = structMapType(TYPE="logical", div=experimentDiv) datasets = Dataset.objects.filter(experiments=experiment) _fileGrp = fileGrpType(USE='original') _fileSec = fileSec() _fileSec.add_fileGrp(_fileGrp) fileCounter = 1 datasetCounter = 1 for dataset in datasets: # create a div entry for dataset datasetDiv = divType(DMDID="D-{0}".format( datasetCounter), ADMID="A-{0}".format(metadataCounter), TYPE="dataset") parameterSets = DatasetParameterSet.objects.filter(dataset=dataset) datasetMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISDATASET") if parameterSets: _xmlData = self.getTechMDXmlDataForParameterSets( experiment, parameterSets, "dataset") datasetMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=datasetMdWrap) _amdSec.add_techMD(_techMD) _xmlData = self.getDmdSecXmlDataForDataset( dataset, "http://www.loc.gov/mods/v3") datasetMdWrap = mdWrap(MDTYPE="MODS", xmlData=_xmlData) _dmdSec = mdSecType(ID="D-{0}".format(datasetCounter), mdWrap=datasetMdWrap) _mets.add_dmdSec(_dmdSec) datasetCounter += 1 metadataCounter += 1 experimentDiv.add_div(datasetDiv) for datafile in dataset.dataset_file_set.filter(verified=True): # add entry to fileSec _file = fileType( ID="F-{0}".format(fileCounter), MIMETYPE=datafile.mimetype, SIZE=datafile.size, CHECKSUM=datafile.sha512sum, CHECKSUMTYPE="SHA-512", OWNERID=datafile.filename, ADMID="A-{0}".format(metadataCounter)) protocol = datafile.protocol if protocol in replace_protocols: url = datafile.url.replace(protocol, replace_protocols[protocol]) else: url = datafile.url _file.add_FLocat(FLocat(LOCTYPE="URL", href=url, type_="simple")) _fileGrp.add_file(_file) # add entry to structMap datasetDiv.add_fptr(fptr(FILEID="F-{0}".format(fileCounter))) parameterSets = DatafileParameterSet.objects.filter( dataset_file=datafile) datafileMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISDATAFILE") if parameterSets: _xmlData = self.getTechMDXmlDataForParameterSets( experiment, parameterSets, "datafile") datafileMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=datafileMdWrap) _amdSec.add_techMD(_techMD) fileCounter += 1 metadataCounter += 1 _mets.add_structMap(_structMap) _mets.set_fileSec(_fileSec) # create teh mets header dateNow = datetime.now().isoformat(' ').replace( ' ', 'T').rsplit('.')[0] _metsHdr = metsHdr(CREATEDATE=dateNow, LASTMODDATE=dateNow) institution = agent(TYPE="ORGANIZATION", ROLE="DISSEMINATOR", name=experiment.institution_name) creator = agent(TYPE="OTHER", ROLE="CREATOR", name="METS Exporter 0.1") _metsHdr.add_agent(institution) _metsHdr.add_agent(creator) _mets.set_metsHdr(_metsHdr) # Use experiment directory, or temporary directory if unavailable dirname = experiment.get_or_create_directory() if dirname is None: from tempfile import mkdtemp dirname = mkdtemp() logger.debug('Using directory %s for METS export' % dirname) # Use generated filename if not provided if not filename: filename = 'mets_expid_%i.xml' % experiment.id filepath = join(dirname, filename) outfile = open(filepath, 'w') _mets.export(outfile=outfile, level=1) outfile.close() return filepath
def export(self, experimentId): # initialise the metadata counter metadataCounter = 1 experiment = Experiment.objects.get(id=experimentId) # TODO: what info do we put on label? profile = '"Scientific Dataset Profile 1.0"' \ ' xmlns="http://www.loc.gov/METS/"' \ ' xmlns:xlink="http://www.w3.org/1999/xlink"' \ ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' \ ' xsi:schemaLocation="http://www.loc.gov/METS/' \ ' http://www.loc.gov/standards/mets/mets.xsd"' _mets = mets(PROFILE=profile, LABEL="", TYPE="study", OBJID="A-{0}".format(metadataCounter)) _amdSec = amdSecType() # create a div entry for experiment experimentDiv = divType(DMDID="E-1", ADMID="A-{0}".format( metadataCounter), TYPE="investigation") parameterSets = ExperimentParameterSet.objects.filter( experiment=experiment) experimentMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISEXPERIMENT") if parameterSets.count() > 0: _xmlData = self.getTechMDXmlDataForParameterSets( parameterSets, "experiment") experimentMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=experimentMdWrap) _amdSec = amdSecType() _amdSec.add_techMD(_techMD) _xmlData = self.getDmdSecXmlDataForExperiment( experiment, "http://www.loc.gov/mods/v3") experimentMdWrap = mdWrap(MDTYPE="MODS", xmlData=_xmlData) _dmdSec = mdSecType(ID="E-1", mdWrap=experimentMdWrap) _mets.add_dmdSec(_dmdSec) _mets.add_amdSec(_amdSec) metadataCounter += 1 _structMap = structMapType(TYPE="logical", div=experimentDiv) datasets = Dataset.objects.filter(experiment=experiment) _fileGrp = fileGrpType(USE='original') _fileSec = fileSec() _fileSec.add_fileGrp(_fileGrp) fileCounter = 1 datasetCounter = 1 for dataset in datasets: # create a div entry for dataset datasetDiv = divType(DMDID="D-{0}".format( datasetCounter), ADMID="A-{0}".format(metadataCounter), TYPE="dataset") parameterSets = DatasetParameterSet.objects.filter(dataset=dataset) datasetMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISDATASET") if parameterSets.count() > 0: _xmlData = self.getTechMDXmlDataForParameterSets( parameterSets, "dataset") datasetMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=datasetMdWrap) _amdSec.add_techMD(_techMD) _xmlData = self.getDmdSecXmlDataForDataset( dataset, "http://www.loc.gov/mods/v3") datasetMdWrap = mdWrap(MDTYPE="MODS", xmlData=_xmlData) _dmdSec = mdSecType(ID="D-{0}".format(datasetCounter), mdWrap=experimentMdWrap) _mets.add_dmdSec(_dmdSec) datasetCounter += 1 metadataCounter += 1 experimentDiv.add_div(datasetDiv) datafiles = Dataset_File.objects.filter(dataset=dataset) for datafile in datafiles: # add entry to fileSec _file = fileType(ID="F-{0}".format(fileCounter), MIMETYPE=(datafile.mimetype or "application/octet-stream"), SIZE=datafile.size, CHECKSUM=(datafile.md5sum or "application/octet-stream"), CHECKSUMTYPE="MD5", OWNERID=datafile.filename, ADMID="A-{0}".format( metadataCounter)) _file.add_FLocat(FLocat(LOCTYPE="URL", href=datafile.url, type_="simple")) _fileGrp.add_file(_file) # add entry to structMap datasetDiv.add_fptr(fptr(FILEID="F-{0}".format(fileCounter))) parameterSets = DatafileParameterSet.objects.filter( dataset_file=datafile) datafileMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISDATAFILE") if parameterSets.count() > 0: _xmlData = self.getTechMDXmlDataForParameterSets( parameterSets, "datafile") datafileMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=datafileMdWrap) _amdSec.add_techMD(_techMD) fileCounter += 1 metadataCounter += 1 _mets.add_structMap(_structMap) _mets.set_fileSec(_fileSec) # create teh mets header dateNow = datetime.now().isoformat(' ').replace( ' ', 'T').rsplit('.')[0] _metsHdr = metsHdr(CREATEDATE=dateNow, LASTMODDATE=dateNow) institution = agent(TYPE="ORGANIZATION", ROLE="DISSEMINATOR", name=experiment.institution_name) creator = agent(TYPE="OTHER", ROLE="CREATOR", name="METS Exporter 0.1") _metsHdr.add_agent(institution) _metsHdr.add_agent(creator) _mets.set_metsHdr(_metsHdr) filename = join(experiment.get_or_create_directory(), 'mets_expid_%s.xml' % str(experiment.id)) outfile = open(filename, 'w') _mets.export(outfile=outfile, level=1) outfile.close() return filename
def export(self, experimentId, replace_protocols={}, filename=None, export_images=True, force_http_urls=False): self.export_images = export_images # initialise the metadata counter metadataCounter = 1 experiment = Experiment.objects.get(id=experimentId) # TODO: what info do we put on label? profile = '"Scientific Dataset Profile 1.0"' \ ' xmlns="http://www.loc.gov/METS/"' \ ' xmlns:xlink="http://www.w3.org/1999/xlink"' \ ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' \ ' xsi:schemaLocation="http://www.loc.gov/METS/' \ ' http://www.loc.gov/standards/mets/mets.xsd"' _mets = mets(PROFILE=profile, LABEL="", TYPE="study", OBJID="A-{0}".format(metadataCounter)) _amdSec = amdSecType() # create a div entry for experiment experimentDiv = divType(DMDID="E-1", ADMID="A-{0}".format(metadataCounter), TYPE="investigation") parameterSets = ExperimentParameterSet.objects.filter( experiment=experiment) experimentMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISEXPERIMENT") if parameterSets: _xmlData = self.getTechMDXmlDataForParameterSets( experiment, parameterSets, "experiment") experimentMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=experimentMdWrap) _amdSec = amdSecType() _amdSec.add_techMD(_techMD) _xmlData = self.getDmdSecXmlDataForExperiment( experiment, "http://www.loc.gov/mods/v3") experimentMdWrap = mdWrap(MDTYPE="MODS", xmlData=_xmlData) _dmdSec = mdSecType(ID="E-1", mdWrap=experimentMdWrap) _mets.add_dmdSec(_dmdSec) _mets.add_amdSec(_amdSec) metadataCounter += 1 _structMap = structMapType(TYPE="logical", div=experimentDiv) datasets = Dataset.objects.filter(experiments=experiment) _fileGrp = fileGrpType(USE='original') _fileSec = fileSec() _fileSec.add_fileGrp(_fileGrp) fileCounter = 1 datasetCounter = 1 for dataset in datasets: # create a div entry for dataset datasetDiv = divType(DMDID="D-{0}".format(datasetCounter), ADMID="A-{0}".format(metadataCounter), TYPE="dataset") parameterSets = DatasetParameterSet.objects.filter(dataset=dataset) datasetMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISDATASET") if parameterSets: _xmlData = self.getTechMDXmlDataForParameterSets( experiment, parameterSets, "dataset") datasetMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=datasetMdWrap) _amdSec.add_techMD(_techMD) _xmlData = self.getDmdSecXmlDataForDataset( dataset, "http://www.loc.gov/mods/v3") datasetMdWrap = mdWrap(MDTYPE="MODS", xmlData=_xmlData) _dmdSec = mdSecType(ID="D-{0}".format(datasetCounter), mdWrap=datasetMdWrap) _mets.add_dmdSec(_dmdSec) datasetCounter += 1 metadataCounter += 1 experimentDiv.add_div(datasetDiv) for datafile in dataset.dataset_file_set.filter(): replica = datafile.get_preferred_replica(verified=True) if not replica: continue # add entry to fileSec parameterSets = DatafileParameterSet.objects.filter( dataset_file=datafile) if not parameterSets: ADMID_val = None else: ADMID_val = "A-{0}".format(metadataCounter) _file = fileType(ID="F-{0}".format(fileCounter), MIMETYPE=datafile.mimetype, SIZE=datafile.size, CHECKSUM=datafile.sha512sum, CHECKSUMTYPE="SHA-512", OWNERID=datafile.filename, ADMID=ADMID_val) protocol = replica.protocol if protocol in replace_protocols: url = datafile.url.replace(protocol, replace_protocols[protocol]) else: url = replica.url if force_http_urls: import urlparse url = urlparse.urljoin(force_http_urls, replica.get_download_url()) _file.add_FLocat( FLocat(LOCTYPE="URL", href=url, type_="simple")) _fileGrp.add_file(_file) # add entry to structMap datasetDiv.add_fptr(fptr(FILEID="F-{0}".format(fileCounter))) datafileMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISDATAFILE") if parameterSets: _xmlData = self.getTechMDXmlDataForParameterSets( experiment, parameterSets, "datafile") datafileMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=datafileMdWrap) _amdSec.add_techMD(_techMD) fileCounter += 1 metadataCounter += 1 _mets.add_structMap(_structMap) _mets.set_fileSec(_fileSec) # create teh mets header dateNow = datetime.now().isoformat(' ').replace(' ', 'T').rsplit('.')[0] _metsHdr = metsHdr(CREATEDATE=dateNow, LASTMODDATE=dateNow) institution = agent(TYPE="ORGANIZATION", ROLE="DISSEMINATOR", name=experiment.institution_name) creator = agent(TYPE="OTHER", ROLE="CREATOR", name="METS Exporter 0.1") _metsHdr.add_agent(institution) _metsHdr.add_agent(creator) _mets.set_metsHdr(_metsHdr) # Use experiment directory, or temporary directory if unavailable dirname = experiment.get_or_create_directory() if dirname is None: from tempfile import mkdtemp dirname = mkdtemp() logger.debug('Using directory %s for METS export' % dirname) # Use generated filename if not provided if not filename: filename = 'mets_expid_%i.xml' % experiment.id filepath = join(dirname, filename) outfile = open(filepath, 'w') _mets.export(outfile=outfile, level=1) outfile.close() return filepath
def export_to_file(self, experiment, outfile, replace_protocols={}, export_images=True, force_http_urls=False): self.export_images = export_images # initialise the metadata counter metadataCounter = 1 # TODO: what info do we put on label? profile = '"Scientific Dataset Profile 1.0"' \ ' xmlns="http://www.loc.gov/METS/"' \ ' xmlns:xlink="http://www.w3.org/1999/xlink"' \ ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' \ ' xsi:schemaLocation="http://www.loc.gov/METS/' \ ' http://www.loc.gov/standards/mets/mets.xsd"' _mets = mets(PROFILE=profile, LABEL="", TYPE="study", OBJID="A-{0}".format(metadataCounter)) _amdSec = amdSecType() # create a div entry for experiment experimentDiv = divType(DMDID="E-1", ADMID="A-{0}".format( metadataCounter), TYPE="investigation") parameterSets = ExperimentParameterSet.objects.filter( experiment=experiment) experimentMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISEXPERIMENT") if parameterSets: _xmlData = self.getTechMDXmlDataForParameterSets( experiment, parameterSets, "experiment") experimentMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=experimentMdWrap) _amdSec = amdSecType() _amdSec.add_techMD(_techMD) _xmlData = self.getDmdSecXmlDataForExperiment( experiment, "http://www.loc.gov/mods/v3") experimentMdWrap = mdWrap(MDTYPE="MODS", xmlData=_xmlData) _dmdSec = mdSecType(ID="E-1", mdWrap=experimentMdWrap) _mets.add_dmdSec(_dmdSec) _mets.add_amdSec(_amdSec) metadataCounter += 1 _structMap = structMapType(TYPE="logical", div=experimentDiv) datasets = Dataset.objects.filter(experiments=experiment) _fileGrp = fileGrpType(USE='original') _fileSec = fileSec() _fileSec.add_fileGrp(_fileGrp) fileCounter = 1 datasetCounter = 1 for dataset in datasets: # create a div entry for dataset datasetDiv = divType(DMDID="D-{0}".format( datasetCounter), ADMID="A-{0}".format(metadataCounter), TYPE="dataset") parameterSets = DatasetParameterSet.objects.filter(dataset=dataset) datasetMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISDATASET") if parameterSets: _xmlData = self.getTechMDXmlDataForParameterSets( experiment, parameterSets, "dataset") datasetMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=datasetMdWrap) _amdSec.add_techMD(_techMD) _xmlData = self.getDmdSecXmlDataForDataset( dataset, "http://www.loc.gov/mods/v3") datasetMdWrap = mdWrap(MDTYPE="MODS", xmlData=_xmlData) _dmdSec = mdSecType(ID="D-{0}".format(datasetCounter), mdWrap=datasetMdWrap) _mets.add_dmdSec(_dmdSec) datasetCounter += 1 metadataCounter += 1 experimentDiv.add_div(datasetDiv) for datafile in dataset.dataset_file_set.filter(): replica = datafile.get_preferred_replica(verified=True) if not replica: continue # add entry to fileSec parameterSets = DatafileParameterSet.objects.filter( dataset_file=datafile) if not parameterSets: ADMID_val = None else: ADMID_val = "A-{0}".format(metadataCounter) _file = fileType( ID="F-{0}".format(fileCounter), MIMETYPE=datafile.mimetype, SIZE=datafile.size, CHECKSUM=datafile.sha512sum, CHECKSUMTYPE="SHA-512", OWNERID=datafile.filename, ADMID=ADMID_val) protocol = replica.protocol if protocol in replace_protocols: url = datafile.url.replace(protocol, replace_protocols[protocol]) else: url = replica.url if force_http_urls: import urlparse url = urlparse.urljoin(force_http_urls, replica.get_download_url()) _file.add_FLocat(FLocat(LOCTYPE="URL", href=url, type_="simple")) _fileGrp.add_file(_file) # add entry to structMap datasetDiv.add_fptr(fptr(FILEID="F-{0}".format(fileCounter))) datafileMdWrap = mdWrap(MDTYPE="OTHER", OTHERMDTYPE="TARDISDATAFILE") if parameterSets: _xmlData = self.getTechMDXmlDataForParameterSets( experiment, parameterSets, "datafile") datafileMdWrap.set_xmlData(_xmlData) # create an amdSec entry for the experiment _techMD = mdSecType(ID="A-{0}".format(metadataCounter), mdWrap=datafileMdWrap) _amdSec.add_techMD(_techMD) fileCounter += 1 metadataCounter += 1 _mets.add_structMap(_structMap) _mets.set_fileSec(_fileSec) # create the mets header dateNow = datetime.now().isoformat(' ').replace( ' ', 'T').rsplit('.')[0] _metsHdr = metsHdr(CREATEDATE=dateNow, LASTMODDATE=dateNow) institution = agent(TYPE="ORGANIZATION", ROLE="DISSEMINATOR", name=experiment.institution_name) creator = agent(TYPE="OTHER", ROLE="CREATOR", name="METS Exporter 0.1") _metsHdr.add_agent(institution) _metsHdr.add_agent(creator) _mets.set_metsHdr(_metsHdr) _mets.export(outfile=outfile, level=1)