def test_hpc_connection(user): """ :param user: user object :returns: True/False after trying to connect to the cluster, sets a flag if successful and returns True if flag is set as True without testing the connection first """ logger.debug("testing if user exists") try: hpcuser = HPCUser.objects.get(user=user) except (HPCUser.DoesNotExist, TypeError): return False #logger.debug(dir(hpcuser)) if hpcuser.testedConnection: #logger.debug("testConnection = True") return hpcuser.hpc_username myHPC = HPC(location="msg", username=hpcuser.hpc_username) if myHPC.testConnection(): hpcuser.testedConnection = True #logger.debug("tested for real: " + repr(hpcuser.testedConnection)) hpcuser.save() return hpcuser.hpc_username else: hpcuser.testedConnection = False hpcuser.save() return False
def _saveParameter(self, parameterTypeClass, parameterName, parameterValue, parameterSet): '''Save the metadata field in the database. Reference: http://stackoverflow.com/questions/452969/does-python-have-an-equivalent-to-java-class-forname ''' logger.debug('saving parameter %s: %s' % (parameterName, parameterValue)) if parameterName.is_numeric: parameter = \ getattr(models, parameterTypeClass)( parameterset=parameterSet, name=parameterName, string_value=None, numerical_value=float(parameterValue)) else: parameter = \ getattr(models, parameterTypeClass)( parameterset=parameterSet, name=parameterName, string_value=parameterValue, numerical_value=None) parameter.save()
def download_datafile(request, datafile_id): # todo handle missing file, general error datafile = Dataset_File.objects.get(pk=datafile_id) if has_datafile_access(datafile.id, request.user): url = datafile.url if url.startswith("http://") or url.startswith("https://") or url.startswith("ftp://"): return HttpResponseRedirect(datafile.url) else: file_path = join( settings.FILE_STORE_PATH, str(datafile.dataset.experiment.id), datafile.url.partition("//")[2] ) try: logger.debug(file_path) wrapper = FileWrapper(file(file_path)) response = HttpResponse(wrapper, mimetype="application/octet-stream") response["Content-Disposition"] = 'attachment; filename="' + datafile.filename + '"' # import os # response['Content-Length'] = os.path.getsize(file_path) return response except IOError: return return_response_not_found(request) else: return return_response_error(request)
def jobstatus(request, experiment_id): if not request.user.is_authenticated(): return "Not logged in" try: #utils.update_job_status(experiment_id=experiment_id, # user_id=request.user.id) jobs = Job.objects.filter(experiment=Experiment.objects.get( pk=experiment_id)) for job in jobs: job.updateStatus() datasets = jobs.values_list('dataset').distinct() logger.debug(repr(datasets)) disparray = [] for dataset in datasets: dataset = dataset[0] jobids = jobs.filter(dataset=dataset).values_list( 'jobid').distinct() jobidarray = [] for jobid in jobids: finished = True retrieved = True jobid = jobid[0] inttime = uuid.UUID(jobid).time submittime = datetime.datetime.fromtimestamp( (inttime - 0x01b21dd213814000L)*100/1e9) thesejobs = jobs.filter(jobid=jobid) jobdataarray = [] for job in thesejobs: if job.jobstatus.strip() != "Finished": finished = False if job.jobstatus.strip() != "Retrieved": retrieved = False jobdata = { 'status': job.jobstatus, 'hpcjobid': job.hpcjobid, 'submittime': job.submittime, } jobdataarray.append(jobdata) jobiddict = {'jobid': jobid, 'joblist': jobdataarray, 'finished': finished, 'retrieved': retrieved, 'submittime': submittime.strftime( "%d %b %Y, %H:%M:%S")} jobidarray.append(jobiddict) datasetdict = {'dataset': Dataset.objects.get( pk=dataset).description, 'jobidlist': jobidarray} disparray.append(datasetdict) logger.debug(repr(disparray)) c = Context({ #'jobs': jobs, 'disparray': disparray, }) except Experiment.DoesNotExist: return return_response_not_found(request) return render_to_response('mrtardis/jobstatus.html', c)
def testConnection(self): testhost = self.getOutputError("hostname")[0] #print testhost #print self.hostname logger.debug("testing connection in hpc.py") if testhost.strip() == self.hostname.strip(): return True else: return False
def dbSave(self, experiment_id, dataset, user): jobstatus = "submitted" for hpcid in self.idsOnHPC: logger.debug("about to save job") newJob = Job() newJob.experiment = Experiment.objects.get(id=experiment_id) newJob.dataset = dataset newJob.user = MrTUser.objects.get(user=user) newJob.jobid = self.jobid newJob.hpcjobid = hpcid newJob.jobstatus = jobstatus newJob.save()
def getParameterFromTechXML(tech_xml, parameter_name): prefix = tech_xml.getroot().prefix xmlns = tech_xml.getroot().nsmap[prefix] parameter_string = '' for parameter in parameter_name.split('/'): parameter_string = parameter_string + '/' + prefix + ':' \ + parameter elements = tech_xml.xpath('/' + parameter_string + '/text()', namespaces={prefix: xmlns}) logger.debug(elements) return getSingleResult(elements)
def duplicate_file_check_rename(copyto): """ Checks if the destination for the file already exists and returns a non-conflicting name :param copyto: The destination path to check :type copyto: string :rtype: The new non-conflicting path (the original path if no conflicts) """ i = 1 base, filename = path.split(copyto) name, ext = path.splitext(filename) result = copyto while path.exists(result): logger.debug('%s destination exists' % result) result = path.join(base, "{0}_{1}{2}".format(name, i, ext)) i += 1 return result
def parseMets(filename, createdBy, expId=None): """Parse the METS document using the SAX Parser classes provided in the metsparser module. Arguments: filename -- path of the document to parse (METS or notMETS) created_by -- a User instance expid -- the experiment ID to use Returns: The experiment ID """ import time startParseTime = time.time() logger.debug("parse experiment id: " + str(expId)) parser = make_parser(["drv_libxml2"]) parser.setFeature(feature_namespaces, 1) dataHolder = MetsDataHolder() # on the first pass, we'll parse the document just so we can # create the experiment's structure parser.setContentHandler(MetsExperimentStructCreator(dataHolder)) parser.parse(filename) # on the second pass, we'll parse the document so that we can tie # the metadata info with the experiment/dataset/datafile objects parser.setContentHandler(MetsMetadataInfoHandler(dataHolder, expId, createdBy)) parser.parse(filename) endParseTime = time.time() # time difference in seconds timeDiff = endParseTime - startParseTime logger.debug("time difference in seconds: %s" % (timeDiff)) return dataHolder.experimentDatabaseId
def register_experiment_xmldata_file( self, filename, created_by, expid=None, ): f = open(filename) firstline = f.readline() f.close() if firstline.startswith('<experiment'): logger.debug('processing simple xml') eid = self.process_simple(filename, created_by, expid) else: logger.debug('processing METS') eid = self.process_METS(filename, created_by, expid) return eid
def upload(request, *args, **kwargs): logger.debug("called upload") if request.method == 'POST': logger.debug("got POST") if request.FILES: logger.debug("got FILES") upload_received.send(sender='uploadify', data=request.FILES['Filedata']) return HttpResponse('True')
def stage_files(datafiles, experiment_id, staging=settings.STAGING_PATH, store=settings.FILE_STORE_PATH, ): """ move files from the staging area to the dataset. :param datafiles: one or more dataset files :type datafiles: :class:`tardis.tardis_portal.models.Dataset_File` :param experiment_id: the id of the experiment that the datafiles belong to :type experiment_id: string or int """ experiment_path = path.join(store, str(experiment_id)) if not path.exists(experiment_path): makedirs(experiment_path) if not isinstance(datafiles, list): datafiles = [datafiles] for datafile in datafiles: urlpath = datafile.url.partition('//')[2] todir = path.join(experiment_path, path.split(urlpath)[0]) if not path.exists(todir): makedirs(todir) copyfrom = path.join(staging, urlpath) # to be url copyto = path.join(experiment_path, urlpath) if path.exists(copyto): logger.error("can't stage %s destination exists" % copyto) # TODO raise error continue logger.debug('staging file: %s to %s' % (copyfrom, copyto)) datafile.size = path.getsize(copyfrom) datafile.save() shutil.move(copyfrom, copyto)
def get_or_create_user_ldap(email): # ignore the 'authcate' model fieldname.. adapted from monash auth authcate_user = None username = get_ldap_username_for_email(email) try: u = User.objects.get(username=username) logger.debug(u.get_profile()) # if, somehow someone else has created a user manually that has this # username if not u.get_profile().authcate_user: # see if this has already happened and a new user was assigned with # a diff username try: u_email = User.objects.get(email__exact=email, username=username) authcate_user = u_email except User.DoesNotExist, ue: pass # this is a rare case and will have to be handled later else:
def _setupJsonData(authForm, authenticationMethod, supportedAuthMethods): """Sets up the JSON data dictionary that will be sent back to the web client. :param authForm: the Authentication Form :param authenticationMethod: the user's authentication method :param supportedAuthMethods: is what's left of the list of authentication methods that the user is not using yet :returns: The data dictionary """ data = {} username = authForm.cleaned_data['username'] data['username'] = username data['authenticationMethod'] = authenticationMethod #data['authenticationMethodDesc'] = authenticationMethodDesc # flag to tell if there are any more auth methods that we can show # the user data['supportedAuthMethodsLen'] = len(supportedAuthMethods) logger.debug('Sending partial data to auth methods management page') return data
def clean(self): logger.debug("starting to clean MRparam form") cleaned_data = self.cleaned_data mol_weight = cleaned_data.get("mol_weight") if not mol_weight: sequence = cleaned_data.get("sequence") if sequence: mol_weight = utils.calcMW(sequence) cleaned_data["mol_weight"] = mol_weight else: raise forms.ValidationError("Please enter either a " + "number for the molecular weight or an amino acid " + "sequence for your input data.") logger.debug(repr(self._errors)) logger.debug("ending to clean MRparam form") return cleaned_data
def endElementNS(self, name, qname): # just get the element name without the namespace elName = name[1] if elName == "dmdSec": self.inDmdSec = False # if we currently processing an experiment structure, let's # save the institution value before we finalise the experiment if self.processExperimentStruct: self.metsObject.institution = self.institution # let's save the experiment in the DB self.modelExperiment = models.Experiment( id=self.tardisExpId, url=self.metsObject.url, approved=True, title=self.metsObject.title, institution_name=self.metsObject.institution, description=self.metsObject.description, created_by=self.createdBy, ) self.modelExperiment.save() self.holder.experimentDatabaseId = self.modelExperiment.id x = 0 for author in self.metsObject.authors: try: # check if the given author already exists in the DB author = models.Author.objects.get(name=SafeUnicode(author)) except models.Author.DoesNotExist: # create it otherwise author = models.Author(name=SafeUnicode(author)) author.save() author_experiment = models.Author_Experiment( experiment=self.modelExperiment, author=author, order=x ) author_experiment.save() x = x + 1 elif self.processDatasetStruct: # let's save the dataset in the DB self.modelDataset = models.Dataset(experiment=self.modelExperiment, description=self.metsObject.title) self.modelDataset.save() # let's also save the modelDataset in a dictionary so that we # can look it up easily later on when we start processing # the datafiles. self.datasetLookupDict[self.metsObject.id] = self.modelDataset self.metsObject = None self.processExperimentStruct = False self.processDatasetStruct = False elif elName == "title" and self.inDmdSec: self.grabTitle = False elif elName == "url" and self.processExperimentStruct: self.grabExperimentUrl = False elif elName == "abstract" and self.processExperimentStruct: self.grabAbstract = False elif elName == "name" and self.processExperimentStruct: self.inName = False elif elName == "namePart" and self.inName: self.grabMightBeAuthor = False elif elName == "roleTerm" and self.inName: self.grabRoleTerm = False self.mightBeAuthor = None elif elName == "name" and self.inInstitution: self.grabInstitution = False elif elName == "agent": self.inInstitution = False elif elName == "amdSec": # we're done processing the metadata entries self.inAmdSec = False # let's reset the cached experiment model object self.modelExperiment = None elif elName == "techMD" and self.inAmdSec: self.inTechMd = False self.metsObject = None self.processExperimentMetadata = False self.processDatasetMetadata = False self.processDatafileMetadata = False elif elName == "xmlData" and self.inTechMd: self.inXmlData = False elif elName != self.xmlDataChildElement and self.customHandler is not None: self.customHandler.endElement(elName) elif elName == self.xmlDataChildElement and self.inXmlData: if self.customHandler is not None: self.tempMetadataHolder = self.customHandler.metadataDict try: schema = models.Schema.objects.get(namespace__exact=self.elementNamespace) # get the associated parameter names for the given schema parameterNames = models.ParameterName.objects.filter( schema__namespace__exact=schema.namespace ).order_by("id") if self.processExperimentMetadata: # create a new parameter set for the metadata parameterSet = models.ExperimentParameterSet(schema=schema, experiment=self.modelExperiment) parameterSet.save() # now let's process the experiment parameters for parameterName in parameterNames: try: parameterValue = self.tempMetadataHolder[parameterName.name] if parameterValue != "": self._saveParameter("ExperimentParameter", parameterName, parameterValue, parameterSet) except KeyError: # we'll just pass as we don't really need to deal # with the current parameterName which is not # provided in the current section of the METS # document pass elif self.processDatasetMetadata: # create a new parameter set for the dataset metadata parameterSet = models.DatasetParameterSet(schema=schema, dataset=self.modelDataset) parameterSet.save() # now let's process the dataset parameters for parameterName in parameterNames: try: parameterValue = self.tempMetadataHolder[parameterName.name] if parameterValue != "": self._saveParameter("DatasetParameter", parameterName, parameterValue, parameterSet) except KeyError: # we'll just pass as we don't really need to deal # with the current parameterName which is not # provided in the current section of the METS # document logger.debug(str(parameterName) + " is not in the tempMetadataHolder") pass elif self.processDatafileMetadata: # create a new parameter set for the metadata parameterSet = models.DatafileParameterSet(schema=schema, dataset_file=self.modelDatafile) parameterSet.save() # now let's process the datafile parameters for parameterName in parameterNames: try: parameterValue = self.tempMetadataHolder[parameterName.name] if parameterValue != "": self._saveParameter("DatafileParameter", parameterName, parameterValue, parameterSet) except KeyError: # we'll just pass as we don't really need to deal # with the current parameterName which is not # provided in the current section of the METS # document pass except models.Schema.DoesNotExist: logger.warning("unsupported schema being ingested" + self.elementNamespace) # reset the current xmlData child element so that if a new # parameter set is read, we can process it again self.xmlDataChildElement = None self.customHandler = None elif elName == self.parameterName and self.xmlDataChildElement is not None: # reset self.parameterName to None so the next parameter can be # processed self.parameterName = None
def __init__(self, xmlString): self.tree = etree.parse(StringIO(xmlString)) logger.debug('(Initializing %s)' % self.tree)
def MRParams(request, dataset_id): """ shows the parameter entry form, takes request.GET["dataset_id"] as input. """ # return True #dataset_id = request.GET["dataset_id"] #getMTZfile mtz_file = utils.get_mtz_file(dataset_id) mtz_params = utils.processMTZ(mtz_file.get_storage_path()) tochoice = lambda x: (x, x) f_choices = map(tochoice, mtz_params["f_value"]) sigf_choices = map(tochoice, mtz_params["sigf_value"]) sg_num = mtz_params["spacegroup"] pdbfilelist = utils.get_pdb_files(dataset_id) rmsd_formfactory = formset_factory(RmsdForm) if request.method == 'POST': logger.debug("we're POSTing") param_form = MRForm(f_choices, sigf_choices, sg_num, request.POST) rmsd_formset = rmsd_formfactory(request.POST) logger.debug(repr(param_form.is_valid()) + repr(rmsd_formset.is_valid()) + repr(rmsd_formset.errors) + repr(request.POST)) if param_form.is_valid() and rmsd_formset.is_valid(): hpcUsername = MrTUser.objects.get(user=request.user).hpc_username newJob = hpcjob.HPCJob(hpcUsername) jobparameters = { "f_value": param_form.cleaned_data['f_value'], "sigf_value": param_form.cleaned_data['sigf_value'], "num_in_asym": param_form.cleaned_data['num_in_asym'], "ensemble_number": param_form.cleaned_data['ensemble_number'], "packing": param_form.cleaned_data['packing'], "space_group": param_form.cleaned_data['space_group'], } if "sg_all" in param_form.cleaned_data: if param_form.cleaned_data["sg_all"] == True: jobparameters["space_group"].append("ALL") jobparameters["rmsd"] = [] for form in rmsd_formset.forms: jobparameters["rmsd"].append(form.cleaned_data['rmsd']) jobparameters["mol_weight"] = param_form.cleaned_data['mol_weight'] filepaths = utils.get_pdb_files(dataset_id, storagePaths=True) + [mtz_file.get_storage_path()] logger.debug("params: " + repr(jobparameters)) logger.debug("files: " + repr(filepaths)) newJob.stage(jobparameters, filepaths) newJob.submit() dataset = Dataset.objects.get(pk=dataset_id) newJob.dbSave(dataset.experiment_id, dataset, request.user) c = Context({}) return render_to_response("mrtardis/running_job.html", c) else: param_form = MRForm(f_choices=f_choices, sigf_choices=sigf_choices, sg_num=sg_num) rmsd_formset = rmsd_formfactory() c = Context({ 'dataset_id': dataset_id, 'mtz_params': mtz_params, 'rmsd_formset': rmsd_formset, 'paramForm': param_form, 'fileName': mtz_file.filename, 'pdbfilelist': pdbfilelist, 'spacegroupname': utils.sgNumNameTrans(number=sg_num), }) return render_to_response("mrtardis/parameters.html", c)
flattened.append('--' + self.boundary + '--') flattened.append('') return '\r\n'.join(flattened) if __name__ == '__main__': # Create the form with simple fields form = MultiPartForm() form.add_field('firstname', 'Doug') form.add_field('lastname', 'Hellmann') # Add a fake file form.add_file('biography', 'bio.txt', fileHandle=StringIO('Python developer and blogger.')) # Build the request request = urllib2.Request('http://localhost:8080/') request.add_header('User-agent', 'PyMOTW (http://www.doughellmann.com/PyMOTW/)') body = str(form) request.add_header('Content-type', form.get_content_type()) request.add_header('Content-length', len(body)) request.add_data(body) logger.debug('OUTGOING DATA:') logger.debug(request.get_data()) logger.debug('SERVER RESPONSE:') logger.debug(urllib2.urlopen(request).read())
def process_simple(self, filename, created_by, eid): with open(filename) as f: e = 0 ds = 0 df = 0 current = None current_df_id = 0 mdelist = [] for line in f: line = line.strip() # logger.debug("LINE: %s, CURRENT: %s" % (line, current)) if line.startswith('<experiment>'): current = 'experiment' e += 1 ds = 0 df = 0 # initialize with empty strings to avoid key errors exp = {} exp['abstract'] = '' exp['organization'] = '' exp['title'] = '' exp['url'] = '' exp['starttime'] = None exp['endtime'] = None authors = list() elif line.startswith('<dataset>'): # commit any experiment if current = experiment if current == 'experiment': if not eid is None: experiment = Experiment.objects.get(pk=eid) else: experiment = Experiment() experiment.url = exp['url'] experiment.title = exp['title'] experiment.institution_name = exp['organization'] experiment.description = exp['abstract'] experiment.created_by = created_by experiment.start_time = exp['starttime'] experiment.end_time = exp['endtime'] experiment.save() author_experiments = \ Author_Experiment.objects.all() author_experiments = \ author_experiments.filter( experiment=experiment).delete() x = 0 for authorName in authors: author = \ Author(name=SafeUnicode(authorName)) author.save() author_experiment = \ Author_Experiment(experiment=experiment, author=author, order=x) author_experiment.save() x = x + 1 experiment.dataset_set.all().delete() if 'metadata' in exp: for md in exp['metadata']: xmlns = getXmlnsFromTechXMLRaw(md) logger.debug('schema %s' % xmlns) schema = None try: schema = Schema.objects.get( namespace__exact=xmlns) except Schema.DoesNotExist, e: logger.debug('schema not found: ' + e) if schema: parameternames = \ ParameterName.objects.filter( schema__namespace__exact=schema.namespace) parameternames = \ parameternames.order_by('id') tech_xml = getTechXMLFromRaw(md) parameterset = \ ExperimentParameterSet( schema=schema, experiment=experiment) parameterset.save() for pn in parameternames: logger.debug( "finding parameter " + pn.name + " in metadata") try: if pn.is_numeric: value = \ getParameterFromTechXML( tech_xml, pn.name) if value != None: ep = \ ExperimentParameter( parameterset=parameterset, name=pn, string_value=None, numerical_value=float(value)) ep.save() else: ep = \ ExperimentParameter( parameterset=parameterset, name=pn, string_value=getParameterFromTechXML( tech_xml, pn.name), numerical_value=None) ep.save() except e: logger.debug( 'error saving experiment ' + 'parameter: ' + e) current = 'dataset' ds = ds + 1 mdflist = [] mdslist = [] df = 0 dataset = dict() elif line.startswith('<file>'): if current == 'dataset': d = Dataset(experiment=experiment, description=dataset['description']) d.save() if 'metadata' in dataset: for md in dataset['metadata']: if 'metadata' in dataset: xmlns = getXmlnsFromTechXMLRaw(md) logger.debug( 'trying to find parameters with ' + 'an xmlns of ' + xmlns) schema = None try: schema = \ Schema.objects.get( namespace__exact=xmlns) except Schema.DoesNotExist, e: logger.debug('schema not found: ' + e) if schema: parameternames = \ ParameterName.objects.filter( schema__namespace__exact=schema.namespace) parameternames = \ parameternames.order_by('id') tech_xml = \ getTechXMLFromRaw(md) parameterset = \ DatasetParameterSet( schema=schema, dataset=d) parameterset.save() for pn in parameternames: logger.debug( "finding parameter " + pn.name + " in metadata") try: if pn.is_numeric: value = \ getParameterFromTechXML( tech_xml, pn.name) if value != None: dp = \ DatasetParameter( parameterset=parameterset, name=pn, string_value=None, numerical_value=float(value)) dp.save() else: dp = \ DatasetParameter( parameterset=parameterset, name=pn, string_value=getParameterFromTechXML( tech_xml, pn.name), numerical_value=None) dp.save() except e: logger.debug( 'error saving ' + 'experiment parameter: ' + e) else:
def process_METS( self, filename, created_by, expid=None, ): logger.debug('START EXP: ' + str(expid)) url = 'http://www.example.com' self.url = 'http://www.example.com' f = open(filename, 'r') xmlString = f.read() f.close() ep = ExperimentParser(str(xmlString)) del xmlString e = Experiment( id=expid, url=url, approved=True, title=ep.getTitle(), institution_name=ep.getAgentName('DISSEMINATOR'), description=ep.getAbstract(), created_by=created_by, ) e.save() url_path = self.url.rpartition('/')[0] + self.url.rpartition('/')[1] author_experiments = Author_Experiment.objects.all() author_experiments = \ author_experiments.filter(experiment=e).delete() x = 0 for authorName in ep.getAuthors(): author_experiment = Author_Experiment(experiment=e, author=SaveUnicode(authorName), order=x) author_experiment.save() x = x + 1 # looks like the intention here is to reload all the datasets from # scratch e.dataset_set.all().delete() # for each dataset... for dmdid in ep.getDatasetDMDIDs(): d = Dataset(experiment=e, description=ep.getDatasetTitle(dmdid)) d.save() # for each metadata element of this dataset... for admid in ep.getDatasetADMIDs(dmdid): techxml = ep.getTechXML(admid) prefix = techxml.getroot().prefix xmlns = techxml.getroot().nsmap[prefix] try: schema = Schema.objects.get(namespace__exact=xmlns) parameternames = \ ParameterName.objects.filter( schema__namespace__exact=schema.namespace) parameternames = parameternames.order_by('id') for pn in parameternames: if pn.is_numeric: value = ep.getParameterFromTechXML(techxml, pn.name) if value != None: dp = DatasetParameter(dataset=d, name=pn, string_value=None, numerical_value=float(value)) dp.save() else: dp = DatasetParameter(dataset=d, name=pn, string_value=ep.getParameterFromTechXML( techxml, pn.name), numerical_value=None) dp.save() except Schema.DoesNotExist: logger.debug('Schema ' + xmlns + " doesn't exist!") # todo replace with logging # for each file in the dataset... for fileid in ep.getFileIDs(dmdid): # if ep.getFileLocation(fileid).startswith('file://'): # absolute_filename = url_path + \ # ep.getFileLocation(fileid).partition('//')[2] # else: # absolute_filename = ep.getFileLocation(fileid).... if self.null_check(ep.getFileName(fileid)): filename = ep.getFileName(fileid) else: filename = ep.getFileLocation(fileid).rpartition('/')[2] # logger.debug(filename) url = ep.getFileLocation(fileid) protocol = url.partition('://')[0] datafile = Dataset_File(dataset=d, filename=filename, url=url, size=ep.getFileSize(fileid), protocol=protocol) datafile.save() # for each metadata element of this file... for admid in ep.getFileADMIDs(fileid): techxml = ep.getTechXML(admid) prefix = techxml.getroot().prefix xmlns = techxml.getroot().nsmap[prefix] try: schema = \ Schema.objects.get(namespace__exact=xmlns) parameternames = \ ParameterName.objects.filter( schema__namespace__exact=schema.namespace) parameternames = parameternames.order_by('id') for pn in parameternames: if pn.is_numeric: value = \ ep.getParameterFromTechXML(techxml, pn.name) if value != None: dp = \ DatafileParameter( dataset_file=datafile, name=pn, string_value=None, numerical_value=float(value)) dp.save() else: dp = \ DatafileParameter(dataset_file=datafile, name=pn, string_value=ep.getParameterFromTechXML( techxml, pn.name), numerical_value=None) dp.save() except Schema.DoesNotExist: xml_data = XML_data(datafile=datafile, xmlns=SafeUnicode(xmlns), data=SafeUnicode(techxml.getvalue())) xml_data.save() logger.debug('DONE EXP: ' + str(e.id)) return e.id
name=pn, string_value=None, numerical_value=float(value)) dp.save() else: dp = \ DatafileParameter( parameterset=parameterset, name=pn, string_value=getParameterFromTechXML( tech_xml, pn.name), numerical_value=None) dp.save() except e: logger.debug('error saving ' + 'experiment parameter: ' + e) except Schema.DoesNotExist, e: logger.debug('schema not found: ' + e) # commit any dataset if current = dataset current = 'file' df = df + 1 mdflist = [] datafile = dict() logger.debug('experiment: ' + str(e) + ' dataset: ' + str(ds) + ' datafile: ' + str(df)) elif line.startswith('<metadata'): md = ''