示例#1
0
def test_hpc_connection(user):
    """
    :param user: user object
    :returns: True/False after trying to connect to the cluster,
    sets a flag if successful and returns True if flag is set as
    True without testing the connection first
    """
    logger.debug("testing if user exists")
    try:
        hpcuser = HPCUser.objects.get(user=user)
    except (HPCUser.DoesNotExist, TypeError):
        return False
    #logger.debug(dir(hpcuser))
    if hpcuser.testedConnection:
        #logger.debug("testConnection = True")
        return hpcuser.hpc_username
    myHPC = HPC(location="msg", username=hpcuser.hpc_username)
    if myHPC.testConnection():
        hpcuser.testedConnection = True
        #logger.debug("tested for real: " + repr(hpcuser.testedConnection))
        hpcuser.save()
        return hpcuser.hpc_username
    else:
        hpcuser.testedConnection = False
        hpcuser.save()
        return False
示例#2
0
    def _saveParameter(self, parameterTypeClass, parameterName,
                       parameterValue, parameterSet):
        '''Save the metadata field in the database.

        Reference:
        http://stackoverflow.com/questions/452969/does-python-have-an-equivalent-to-java-class-forname

        '''
        logger.debug('saving parameter %s: %s' %
            (parameterName, parameterValue))
        if parameterName.is_numeric:
            parameter = \
                getattr(models, parameterTypeClass)(
                parameterset=parameterSet,
                name=parameterName,
                string_value=None,
                numerical_value=float(parameterValue))
        else:
            parameter = \
                getattr(models, parameterTypeClass)(
                parameterset=parameterSet,
                name=parameterName,
                string_value=parameterValue,
                numerical_value=None)
        parameter.save()
示例#3
0
def download_datafile(request, datafile_id):

    # todo handle missing file, general error
    datafile = Dataset_File.objects.get(pk=datafile_id)

    if has_datafile_access(datafile.id, request.user):
        url = datafile.url

        if url.startswith("http://") or url.startswith("https://") or url.startswith("ftp://"):
            return HttpResponseRedirect(datafile.url)
        else:
            file_path = join(
                settings.FILE_STORE_PATH, str(datafile.dataset.experiment.id), datafile.url.partition("//")[2]
            )
            try:
                logger.debug(file_path)
                wrapper = FileWrapper(file(file_path))

                response = HttpResponse(wrapper, mimetype="application/octet-stream")
                response["Content-Disposition"] = 'attachment; filename="' + datafile.filename + '"'

                # import os
                # response['Content-Length'] = os.path.getsize(file_path)

                return response

            except IOError:
                return return_response_not_found(request)
    else:

        return return_response_error(request)
示例#4
0
def jobstatus(request, experiment_id):
    if not request.user.is_authenticated():
        return "Not logged in"
    try:
        #utils.update_job_status(experiment_id=experiment_id,
        #                        user_id=request.user.id)
        jobs = Job.objects.filter(experiment=Experiment.objects.get(
                pk=experiment_id))
        for job in jobs:
            job.updateStatus()
        datasets = jobs.values_list('dataset').distinct()
        logger.debug(repr(datasets))
        disparray = []
        for dataset in datasets:
            dataset = dataset[0]
            jobids = jobs.filter(dataset=dataset).values_list(
                'jobid').distinct()
            jobidarray = []
            for jobid in jobids:
                finished = True
                retrieved = True
                jobid = jobid[0]
                inttime = uuid.UUID(jobid).time
                submittime = datetime.datetime.fromtimestamp(
                    (inttime - 0x01b21dd213814000L)*100/1e9)
                thesejobs = jobs.filter(jobid=jobid)
                jobdataarray = []
                for job in thesejobs:
                    if job.jobstatus.strip() != "Finished":
                        finished = False
                    if job.jobstatus.strip() != "Retrieved":
                        retrieved = False
                    jobdata = {
                        'status': job.jobstatus,
                        'hpcjobid': job.hpcjobid,
                        'submittime': job.submittime,
                        }
                    jobdataarray.append(jobdata)
                jobiddict = {'jobid': jobid,
                             'joblist': jobdataarray,
                             'finished': finished,
                             'retrieved': retrieved,
                             'submittime': submittime.strftime(
                        "%d %b %Y, %H:%M:%S")}
                jobidarray.append(jobiddict)
            datasetdict = {'dataset': Dataset.objects.get(
                    pk=dataset).description,
                           'jobidlist': jobidarray}
            disparray.append(datasetdict)
            logger.debug(repr(disparray))
        c = Context({
                #'jobs': jobs,
                'disparray': disparray,
            })
    except Experiment.DoesNotExist:
        return return_response_not_found(request)

    return render_to_response('mrtardis/jobstatus.html', c)
示例#5
0
 def testConnection(self):
     testhost = self.getOutputError("hostname")[0]
     #print testhost
     #print self.hostname
     logger.debug("testing connection in hpc.py")
     if testhost.strip() == self.hostname.strip():
         return True
     else:
         return False
示例#6
0
 def dbSave(self, experiment_id, dataset, user):
     jobstatus = "submitted"
     for hpcid in self.idsOnHPC:
         logger.debug("about to save job")
         newJob = Job()
         newJob.experiment = Experiment.objects.get(id=experiment_id)
         newJob.dataset = dataset
         newJob.user = MrTUser.objects.get(user=user)
         newJob.jobid = self.jobid
         newJob.hpcjobid = hpcid
         newJob.jobstatus = jobstatus
         newJob.save()
def getParameterFromTechXML(tech_xml, parameter_name):
    prefix = tech_xml.getroot().prefix
    xmlns = tech_xml.getroot().nsmap[prefix]

    parameter_string = ''
    for parameter in parameter_name.split('/'):
        parameter_string = parameter_string + '/' + prefix + ':' \
            + parameter

    elements = tech_xml.xpath('/' + parameter_string + '/text()',
                              namespaces={prefix: xmlns})

    logger.debug(elements)
    return getSingleResult(elements)
示例#8
0
def duplicate_file_check_rename(copyto):
    """
    Checks if the destination for the file already exists and returns
    a non-conflicting name

    :param copyto: The destination path to check
    :type copyto: string
    :rtype: The new non-conflicting path (the original path if no conflicts)
    """
    i = 1
    base, filename = path.split(copyto)
    name, ext = path.splitext(filename)
    result = copyto

    while path.exists(result):
        logger.debug('%s destination exists' % result)
        result = path.join(base, "{0}_{1}{2}".format(name, i, ext))
        i += 1
    return result
示例#9
0
def parseMets(filename, createdBy, expId=None):
    """Parse the METS document using the SAX Parser classes provided in the
    metsparser module.

    Arguments:
    filename -- path of the document to parse (METS or notMETS)
    created_by -- a User instance
    expid -- the experiment ID to use

    Returns:
    The experiment ID

    """

    import time

    startParseTime = time.time()

    logger.debug("parse experiment id: " + str(expId))

    parser = make_parser(["drv_libxml2"])
    parser.setFeature(feature_namespaces, 1)
    dataHolder = MetsDataHolder()

    # on the first pass, we'll parse the document just so we can
    # create the experiment's structure
    parser.setContentHandler(MetsExperimentStructCreator(dataHolder))
    parser.parse(filename)

    # on the second pass, we'll parse the document so that we can tie
    # the metadata info with the experiment/dataset/datafile objects
    parser.setContentHandler(MetsMetadataInfoHandler(dataHolder, expId, createdBy))
    parser.parse(filename)

    endParseTime = time.time()

    # time difference in seconds
    timeDiff = endParseTime - startParseTime
    logger.debug("time difference in seconds: %s" % (timeDiff))

    return dataHolder.experimentDatabaseId
    def register_experiment_xmldata_file(
        self,
        filename,
        created_by,
        expid=None,
        ):

        f = open(filename)

        firstline = f.readline()

        f.close()

        if firstline.startswith('<experiment'):
            logger.debug('processing simple xml')
            eid = self.process_simple(filename, created_by, expid)
        else:
            logger.debug('processing METS')
            eid = self.process_METS(filename, created_by, expid)

        return eid
示例#11
0
def upload(request, *args, **kwargs):
    logger.debug("called upload")
    if request.method == 'POST':
        logger.debug("got POST")
        if request.FILES:
            logger.debug("got FILES")
            upload_received.send(sender='uploadify', data=request.FILES['Filedata'])
    return HttpResponse('True')
示例#12
0
def stage_files(datafiles,
                experiment_id,
                staging=settings.STAGING_PATH,
                store=settings.FILE_STORE_PATH,
                ):
    """
    move files from the staging area to the dataset.

    :param datafiles: one or more dataset files
    :type datafiles: :class:`tardis.tardis_portal.models.Dataset_File`
    :param experiment_id: the id of the experiment that the datafiles belong to
    :type experiment_id: string or int
    """
    experiment_path = path.join(store, str(experiment_id))
    if not path.exists(experiment_path):
        makedirs(experiment_path)

    if not isinstance(datafiles, list):
        datafiles = [datafiles]
    for datafile in datafiles:
        urlpath = datafile.url.partition('//')[2]
        todir = path.join(experiment_path, path.split(urlpath)[0])
        if not path.exists(todir):
            makedirs(todir)

        copyfrom = path.join(staging, urlpath)  # to be url
        copyto = path.join(experiment_path, urlpath)
        if path.exists(copyto):
            logger.error("can't stage %s destination exists" % copyto)

            # TODO raise error

            continue

        logger.debug('staging file: %s to %s' % (copyfrom, copyto))
        datafile.size = path.getsize(copyfrom)
        datafile.save()
        shutil.move(copyfrom, copyto)
示例#13
0
def get_or_create_user_ldap(email):
    # ignore the 'authcate' model fieldname.. adapted from monash auth

    authcate_user = None
    username = get_ldap_username_for_email(email)
    try:

        u = User.objects.get(username=username)
        logger.debug(u.get_profile())

        # if, somehow someone else has created a user manually that has this
        # username
        if not u.get_profile().authcate_user:

            # see if this has already happened and a new user was assigned with
            # a diff username
            try:
                u_email = User.objects.get(email__exact=email, username=username)
                authcate_user = u_email
            except User.DoesNotExist, ue:

                pass  # this is a rare case and will have to be handled later
        else:
示例#14
0
def _setupJsonData(authForm, authenticationMethod, supportedAuthMethods):
    """Sets up the JSON data dictionary that will be sent back to the web
    client.

    :param authForm: the Authentication Form
    :param authenticationMethod: the user's authentication method
    :param supportedAuthMethods: is what's left of the list of authentication
        methods that the user is not using yet

    :returns: The data dictionary

    """
    data = {}
    username = authForm.cleaned_data['username']
    data['username'] = username
    data['authenticationMethod'] = authenticationMethod
    #data['authenticationMethodDesc'] = authenticationMethodDesc

    # flag to tell if there are any more auth methods that we can show
    # the user
    data['supportedAuthMethodsLen'] = len(supportedAuthMethods)

    logger.debug('Sending partial data to auth methods management page')
    return data
示例#15
0
    def clean(self):
        logger.debug("starting to clean MRparam form")
        cleaned_data = self.cleaned_data
        mol_weight = cleaned_data.get("mol_weight")

        if not mol_weight:
            sequence = cleaned_data.get("sequence")
            if sequence:
                mol_weight = utils.calcMW(sequence)
                cleaned_data["mol_weight"] = mol_weight
            else:
                raise forms.ValidationError("Please enter either a " +
                    "number for the molecular weight or an amino acid " +
                                            "sequence for your input data.")
        logger.debug(repr(self._errors))
        logger.debug("ending to clean MRparam form")
        return cleaned_data
示例#16
0
    def endElementNS(self, name, qname):
        # just get the element name without the namespace
        elName = name[1]

        if elName == "dmdSec":
            self.inDmdSec = False
            # if we currently processing an experiment structure, let's
            # save the institution value before we finalise the experiment
            if self.processExperimentStruct:
                self.metsObject.institution = self.institution

                # let's save the experiment in the DB
                self.modelExperiment = models.Experiment(
                    id=self.tardisExpId,
                    url=self.metsObject.url,
                    approved=True,
                    title=self.metsObject.title,
                    institution_name=self.metsObject.institution,
                    description=self.metsObject.description,
                    created_by=self.createdBy,
                )

                self.modelExperiment.save()

                self.holder.experimentDatabaseId = self.modelExperiment.id

                x = 0
                for author in self.metsObject.authors:
                    try:
                        # check if the given author already exists in the DB
                        author = models.Author.objects.get(name=SafeUnicode(author))
                    except models.Author.DoesNotExist:
                        # create it otherwise
                        author = models.Author(name=SafeUnicode(author))
                        author.save()

                    author_experiment = models.Author_Experiment(
                        experiment=self.modelExperiment, author=author, order=x
                    )
                    author_experiment.save()
                    x = x + 1

            elif self.processDatasetStruct:
                # let's save the dataset in the DB
                self.modelDataset = models.Dataset(experiment=self.modelExperiment, description=self.metsObject.title)
                self.modelDataset.save()

                # let's also save the modelDataset in a dictionary so that we
                # can look it up easily later on when we start processing
                # the datafiles.
                self.datasetLookupDict[self.metsObject.id] = self.modelDataset

            self.metsObject = None

            self.processExperimentStruct = False
            self.processDatasetStruct = False

        elif elName == "title" and self.inDmdSec:
            self.grabTitle = False

        elif elName == "url" and self.processExperimentStruct:
            self.grabExperimentUrl = False

        elif elName == "abstract" and self.processExperimentStruct:
            self.grabAbstract = False

        elif elName == "name" and self.processExperimentStruct:
            self.inName = False

        elif elName == "namePart" and self.inName:
            self.grabMightBeAuthor = False

        elif elName == "roleTerm" and self.inName:
            self.grabRoleTerm = False
            self.mightBeAuthor = None

        elif elName == "name" and self.inInstitution:
            self.grabInstitution = False

        elif elName == "agent":
            self.inInstitution = False

        elif elName == "amdSec":
            # we're done processing the metadata entries
            self.inAmdSec = False

            # let's reset the cached experiment model object
            self.modelExperiment = None

        elif elName == "techMD" and self.inAmdSec:
            self.inTechMd = False
            self.metsObject = None
            self.processExperimentMetadata = False
            self.processDatasetMetadata = False
            self.processDatafileMetadata = False

        elif elName == "xmlData" and self.inTechMd:
            self.inXmlData = False

        elif elName != self.xmlDataChildElement and self.customHandler is not None:
            self.customHandler.endElement(elName)

        elif elName == self.xmlDataChildElement and self.inXmlData:

            if self.customHandler is not None:
                self.tempMetadataHolder = self.customHandler.metadataDict

            try:
                schema = models.Schema.objects.get(namespace__exact=self.elementNamespace)

                # get the associated parameter names for the given schema
                parameterNames = models.ParameterName.objects.filter(
                    schema__namespace__exact=schema.namespace
                ).order_by("id")

                if self.processExperimentMetadata:

                    # create a new parameter set for the metadata
                    parameterSet = models.ExperimentParameterSet(schema=schema, experiment=self.modelExperiment)

                    parameterSet.save()

                    # now let's process the experiment parameters
                    for parameterName in parameterNames:
                        try:
                            parameterValue = self.tempMetadataHolder[parameterName.name]
                            if parameterValue != "":
                                self._saveParameter("ExperimentParameter", parameterName, parameterValue, parameterSet)
                        except KeyError:
                            # we'll just pass as we don't really need to deal
                            # with the current parameterName which is not
                            # provided in the current section of the METS
                            # document
                            pass

                elif self.processDatasetMetadata:

                    # create a new parameter set for the dataset metadata
                    parameterSet = models.DatasetParameterSet(schema=schema, dataset=self.modelDataset)
                    parameterSet.save()

                    # now let's process the dataset parameters
                    for parameterName in parameterNames:
                        try:
                            parameterValue = self.tempMetadataHolder[parameterName.name]
                            if parameterValue != "":
                                self._saveParameter("DatasetParameter", parameterName, parameterValue, parameterSet)
                        except KeyError:
                            # we'll just pass as we don't really need to deal
                            # with the current parameterName which is not
                            # provided in the current section of the METS
                            # document
                            logger.debug(str(parameterName) + " is not in the tempMetadataHolder")
                            pass

                elif self.processDatafileMetadata:

                    # create a new parameter set for the metadata
                    parameterSet = models.DatafileParameterSet(schema=schema, dataset_file=self.modelDatafile)
                    parameterSet.save()

                    # now let's process the datafile parameters
                    for parameterName in parameterNames:
                        try:
                            parameterValue = self.tempMetadataHolder[parameterName.name]
                            if parameterValue != "":
                                self._saveParameter("DatafileParameter", parameterName, parameterValue, parameterSet)
                        except KeyError:
                            # we'll just pass as we don't really need to deal
                            # with the current parameterName which is not
                            # provided in the current section of the METS
                            # document
                            pass

            except models.Schema.DoesNotExist:
                logger.warning("unsupported schema being ingested" + self.elementNamespace)

            # reset the current xmlData child element so that if a new
            # parameter set is read, we can process it again
            self.xmlDataChildElement = None
            self.customHandler = None

        elif elName == self.parameterName and self.xmlDataChildElement is not None:

            # reset self.parameterName to None so the next parameter can be
            # processed
            self.parameterName = None
 def __init__(self, xmlString):
     self.tree = etree.parse(StringIO(xmlString))
     logger.debug('(Initializing %s)' % self.tree)
示例#18
0
def MRParams(request, dataset_id):
    """
    shows the parameter entry form,
    takes request.GET["dataset_id"] as input.
    """
#    return True
    #dataset_id = request.GET["dataset_id"]
    #getMTZfile
    mtz_file = utils.get_mtz_file(dataset_id)
    mtz_params = utils.processMTZ(mtz_file.get_storage_path())
    tochoice = lambda x: (x, x)
    f_choices = map(tochoice, mtz_params["f_value"])
    sigf_choices = map(tochoice, mtz_params["sigf_value"])
    sg_num = mtz_params["spacegroup"]
    pdbfilelist = utils.get_pdb_files(dataset_id)
    rmsd_formfactory = formset_factory(RmsdForm)
    if request.method == 'POST':
        logger.debug("we're POSTing")
        param_form = MRForm(f_choices,
                            sigf_choices,
                            sg_num,
                            request.POST)
        rmsd_formset = rmsd_formfactory(request.POST)
        logger.debug(repr(param_form.is_valid()) +
                     repr(rmsd_formset.is_valid()) +
                     repr(rmsd_formset.errors) +
                     repr(request.POST))
        if param_form.is_valid() and rmsd_formset.is_valid():
            hpcUsername = MrTUser.objects.get(user=request.user).hpc_username
            newJob = hpcjob.HPCJob(hpcUsername)
            jobparameters = {
                "f_value": param_form.cleaned_data['f_value'],
                "sigf_value": param_form.cleaned_data['sigf_value'],
                "num_in_asym": param_form.cleaned_data['num_in_asym'],
                "ensemble_number": param_form.cleaned_data['ensemble_number'],
                "packing": param_form.cleaned_data['packing'],
                "space_group": param_form.cleaned_data['space_group'],
                }
            if "sg_all" in param_form.cleaned_data:
                if param_form.cleaned_data["sg_all"] == True:
                    jobparameters["space_group"].append("ALL")
            jobparameters["rmsd"] = []
            for form in rmsd_formset.forms:
                jobparameters["rmsd"].append(form.cleaned_data['rmsd'])
            jobparameters["mol_weight"] = param_form.cleaned_data['mol_weight']
            filepaths = utils.get_pdb_files(dataset_id,
                         storagePaths=True) + [mtz_file.get_storage_path()]
            logger.debug("params: " + repr(jobparameters))
            logger.debug("files: " + repr(filepaths))
            newJob.stage(jobparameters, filepaths)
            newJob.submit()
            dataset = Dataset.objects.get(pk=dataset_id)
            newJob.dbSave(dataset.experiment_id, dataset,
                          request.user)
            c = Context({})
            return render_to_response("mrtardis/running_job.html", c)
    else:
        param_form = MRForm(f_choices=f_choices,
                            sigf_choices=sigf_choices,
                            sg_num=sg_num)
        rmsd_formset = rmsd_formfactory()
    c = Context({
            'dataset_id': dataset_id,
            'mtz_params': mtz_params,
            'rmsd_formset': rmsd_formset,
            'paramForm': param_form,
            'fileName': mtz_file.filename,
            'pdbfilelist': pdbfilelist,
            'spacegroupname': utils.sgNumNameTrans(number=sg_num),
            })
    return render_to_response("mrtardis/parameters.html", c)
示例#19
0
        flattened.append('--' + self.boundary + '--')
        flattened.append('')
        return '\r\n'.join(flattened)


if __name__ == '__main__':

    # Create the form with simple fields
    form = MultiPartForm()
    form.add_field('firstname', 'Doug')
    form.add_field('lastname', 'Hellmann')

    # Add a fake file
    form.add_file('biography', 'bio.txt',
                  fileHandle=StringIO('Python developer and blogger.'))

    # Build the request
    request = urllib2.Request('http://localhost:8080/')
    request.add_header('User-agent',
                       'PyMOTW (http://www.doughellmann.com/PyMOTW/)')
    body = str(form)
    request.add_header('Content-type', form.get_content_type())
    request.add_header('Content-length', len(body))
    request.add_data(body)

    logger.debug('OUTGOING DATA:')
    logger.debug(request.get_data())

    logger.debug('SERVER RESPONSE:')
    logger.debug(urllib2.urlopen(request).read())
    def process_simple(self, filename, created_by, eid):

        with open(filename) as f:
            e = 0
            ds = 0
            df = 0
            current = None
            current_df_id = 0
            mdelist = []

            for line in f:
                line = line.strip()

                # logger.debug("LINE: %s, CURRENT: %s"  % (line, current))
                if line.startswith('<experiment>'):
                    current = 'experiment'
                    e += 1
                    ds = 0
                    df = 0
                    # initialize with empty strings to avoid key errors
                    exp = {}
                    exp['abstract'] = ''
                    exp['organization'] = ''
                    exp['title'] = ''
                    exp['url'] = ''
                    exp['starttime'] = None
                    exp['endtime'] = None
                    authors = list()

                elif line.startswith('<dataset>'):

                    # commit any experiment if current = experiment
                    if current == 'experiment':

                        if not eid is None:
                            experiment = Experiment.objects.get(pk=eid)
                        else:
                            experiment = Experiment()

                        experiment.url = exp['url']
                        experiment.title = exp['title']
                        experiment.institution_name = exp['organization']
                        experiment.description = exp['abstract']
                        experiment.created_by = created_by
                        experiment.start_time = exp['starttime']
                        experiment.end_time = exp['endtime']
                        experiment.save()

                        author_experiments = \
                            Author_Experiment.objects.all()
                        author_experiments = \
                            author_experiments.filter(
                            experiment=experiment).delete()

                        x = 0
                        for authorName in authors:
                            author = \
                                Author(name=SafeUnicode(authorName))
                            author.save()
                            author_experiment = \
                                Author_Experiment(experiment=experiment,
                                    author=author, order=x)
                            author_experiment.save()
                            x = x + 1

                        experiment.dataset_set.all().delete()

                        if 'metadata' in exp:
                            for md in exp['metadata']:
                                xmlns = getXmlnsFromTechXMLRaw(md)
                                logger.debug('schema %s' % xmlns)
                                schema = None
                                try:
                                    schema = Schema.objects.get(
                                        namespace__exact=xmlns)
                                except Schema.DoesNotExist, e:
                                    logger.debug('schema not found: ' + e)

                                if schema:
                                    parameternames = \
                                        ParameterName.objects.filter(
                                    schema__namespace__exact=schema.namespace)

                                    parameternames = \
                                        parameternames.order_by('id')

                                    tech_xml = getTechXMLFromRaw(md)

                                    parameterset = \
                                        ExperimentParameterSet(
                                        schema=schema, experiment=experiment)

                                    parameterset.save()

                                    for pn in parameternames:
                                        logger.debug(
                                            "finding parameter " +
                                            pn.name + " in metadata")
                                        try:
                                            if pn.is_numeric:
                                                value = \
                                                    getParameterFromTechXML(
                                                    tech_xml, pn.name)

                                                if value != None:
                                                    ep = \
                                                        ExperimentParameter(
                                                parameterset=parameterset,
                                                        name=pn,
                                                        string_value=None,
                                                numerical_value=float(value))
                                                    ep.save()
                                            else:
                                                ep = \
                                                    ExperimentParameter(
                                                    parameterset=parameterset,
                                                    name=pn,
                                        string_value=getParameterFromTechXML(
                                                    tech_xml, pn.name),
                                                    numerical_value=None)
                                                ep.save()
                                        except e:
                                            logger.debug(
                                                'error saving experiment ' +
                                                'parameter: ' + e)

                    current = 'dataset'
                    ds = ds + 1
                    mdflist = []
                    mdslist = []
                    df = 0
                    dataset = dict()

                elif line.startswith('<file>'):

                    if current == 'dataset':
                        d = Dataset(experiment=experiment,
                                description=dataset['description'])
                        d.save()

                        if 'metadata' in dataset:
                            for md in dataset['metadata']:
                                if 'metadata' in dataset:
                                    xmlns = getXmlnsFromTechXMLRaw(md)

                                    logger.debug(
                                        'trying to find parameters with ' +
                                        'an xmlns of ' + xmlns)

                                    schema = None
                                    try:
                                        schema = \
                                            Schema.objects.get(
                                            namespace__exact=xmlns)
                                    except Schema.DoesNotExist, e:
                                        logger.debug('schema not found: ' + e)

                                    if schema:
                                        parameternames = \
                                            ParameterName.objects.filter(
                                    schema__namespace__exact=schema.namespace)

                                        parameternames = \
                                            parameternames.order_by('id')

                                        tech_xml = \
                                            getTechXMLFromRaw(md)

                                        parameterset = \
                                            DatasetParameterSet(
                                            schema=schema, dataset=d)

                                        parameterset.save()

                                        for pn in parameternames:
                                            logger.debug(
                                                "finding parameter " +
                                                pn.name + " in metadata")
                                            try:
                                                if pn.is_numeric:
                                                    value = \
                                                    getParameterFromTechXML(
                                                        tech_xml, pn.name)

                                                    if value != None:
                                                        dp = \
                                                            DatasetParameter(
                                                    parameterset=parameterset,
                                                            name=pn,
                                                            string_value=None,
                                                numerical_value=float(value))
                                                        dp.save()
                                                else:
                                                    dp = \
                                                        DatasetParameter(
                                                    parameterset=parameterset,
                                                        name=pn,
                                        string_value=getParameterFromTechXML(
                                                        tech_xml, pn.name),
                                                        numerical_value=None)
                                                    dp.save()
                                            except e:
                                                logger.debug(
                                                    'error saving ' +
                                                    'experiment parameter: ' +
                                                    e)
                    else:
    def process_METS(
        self,
        filename,
        created_by,
        expid=None,
        ):

        logger.debug('START EXP: ' + str(expid))

        url = 'http://www.example.com'
        self.url = 'http://www.example.com'

        f = open(filename, 'r')
        xmlString = f.read()
        f.close()

        ep = ExperimentParser(str(xmlString))

        del xmlString

        e = Experiment(
            id=expid,
            url=url,
            approved=True,
            title=ep.getTitle(),
            institution_name=ep.getAgentName('DISSEMINATOR'),
            description=ep.getAbstract(),
            created_by=created_by,
            )

        e.save()

        url_path = self.url.rpartition('/')[0] + self.url.rpartition('/')[1]

        author_experiments = Author_Experiment.objects.all()
        author_experiments = \
            author_experiments.filter(experiment=e).delete()

        x = 0
        for authorName in ep.getAuthors():
            author_experiment = Author_Experiment(experiment=e,
                    author=SaveUnicode(authorName), order=x)
            author_experiment.save()
            x = x + 1

        # looks like the intention here is to reload all the datasets from
        # scratch
        e.dataset_set.all().delete()

        # for each dataset...
        for dmdid in ep.getDatasetDMDIDs():
            d = Dataset(experiment=e,
                        description=ep.getDatasetTitle(dmdid))
            d.save()

            # for each metadata element of this dataset...
            for admid in ep.getDatasetADMIDs(dmdid):

                techxml = ep.getTechXML(admid)
                prefix = techxml.getroot().prefix
                xmlns = techxml.getroot().nsmap[prefix]

                try:

                    schema = Schema.objects.get(namespace__exact=xmlns)

                    parameternames = \
                        ParameterName.objects.filter(
                        schema__namespace__exact=schema.namespace)
                    parameternames = parameternames.order_by('id')

                    for pn in parameternames:

                        if pn.is_numeric:
                            value = ep.getParameterFromTechXML(techxml,
                                    pn.name)

                            if value != None:
                                dp = DatasetParameter(dataset=d,
                                        name=pn, string_value=None,
                                        numerical_value=float(value))
                                dp.save()
                        else:
                            dp = DatasetParameter(dataset=d, name=pn,
                                string_value=ep.getParameterFromTechXML(
                                techxml, pn.name), numerical_value=None)
                            dp.save()
                except Schema.DoesNotExist:

                    logger.debug('Schema ' + xmlns + " doesn't exist!")

                        # todo replace with logging

            # for each file in the dataset...
            for fileid in ep.getFileIDs(dmdid):

                # if ep.getFileLocation(fileid).startswith('file://'):
                #     absolute_filename = url_path + \
                #         ep.getFileLocation(fileid).partition('//')[2]
                # else:
                #     absolute_filename = ep.getFileLocation(fileid)....

                if self.null_check(ep.getFileName(fileid)):
                    filename = ep.getFileName(fileid)
                else:
                    filename = ep.getFileLocation(fileid).rpartition('/')[2]

                # logger.debug(filename)

                url = ep.getFileLocation(fileid)
                protocol = url.partition('://')[0]
                datafile = Dataset_File(dataset=d,
                                        filename=filename,
                                        url=url,
                                        size=ep.getFileSize(fileid),
                                        protocol=protocol)
                datafile.save()

                # for each metadata element of this file...
                for admid in ep.getFileADMIDs(fileid):

                    techxml = ep.getTechXML(admid)
                    prefix = techxml.getroot().prefix
                    xmlns = techxml.getroot().nsmap[prefix]

                    try:
                        schema = \
                            Schema.objects.get(namespace__exact=xmlns)

                        parameternames = \
                            ParameterName.objects.filter(
                            schema__namespace__exact=schema.namespace)
                        parameternames = parameternames.order_by('id')

                        for pn in parameternames:

                            if pn.is_numeric:
                                value = \
                                    ep.getParameterFromTechXML(techxml,
                                        pn.name)
                                if value != None:
                                    dp = \
                                        DatafileParameter(
                                        dataset_file=datafile, name=pn,
                                        string_value=None,
                                        numerical_value=float(value))
                                    dp.save()
                            else:
                                dp = \
                                    DatafileParameter(dataset_file=datafile,
                                    name=pn,
                                    string_value=ep.getParameterFromTechXML(
                                    techxml, pn.name), numerical_value=None)
                                dp.save()
                    except Schema.DoesNotExist:

                        xml_data = XML_data(datafile=datafile,
                                xmlns=SafeUnicode(xmlns),
                                data=SafeUnicode(techxml.getvalue()))
                        xml_data.save()

        logger.debug('DONE EXP: ' + str(e.id))

        return e.id
                                                    name=pn,
                                                    string_value=None,
                                                numerical_value=float(value))

                                                dp.save()
                                        else:
                                            dp = \
                                                DatafileParameter(
                                                parameterset=parameterset,
                                                name=pn,
                                        string_value=getParameterFromTechXML(
                                                tech_xml,
                                                pn.name), numerical_value=None)
                                            dp.save()
                                    except e:
                                        logger.debug('error saving ' +
                                            'experiment parameter: ' + e)
                            except Schema.DoesNotExist, e:
                                logger.debug('schema not found: ' + e)

                    # commit any dataset if current = dataset

                    current = 'file'
                    df = df + 1
                    mdflist = []
                    datafile = dict()
                    logger.debug('experiment: ' + str(e) + ' dataset: ' +
                        str(ds) + ' datafile: ' + str(df))

                elif line.startswith('<metadata'):

                    md = ''