def test_experiment(self): exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) exp.save() self.assertEqual(exp.title, 'test exp1') self.assertEqual(exp.url, None) self.assertEqual(exp.institution_name, 'monash') self.assertEqual(exp.approved, False) self.assertEqual(exp.handle, None) self.assertEqual(exp.created_by, self.user) self.assertEqual(exp.public_access, Experiment.PUBLIC_ACCESS_NONE) target_id = Experiment.objects.first().id self.assertEqual( exp.get_absolute_url(), '/experiment/view/%d/' % target_id, exp.get_absolute_url() + ' != /experiment/view/%d/' % target_id) self.assertEqual(exp.get_or_create_directory(), os.path.join(settings.FILE_STORE_PATH, str(exp.id)))
def register_experiment_ws_xmldata(request): ''' Web-service mechanism for registering an experiment, and triggering a corresponding file transfer. Although intended to be called as a web service, it actually works fine as a normal form, at /experiment/register ''' # --- start function body --- global experiment, idless_statuses, current_action, debug_POST experiment=None idless_statuses=[] logger.debug("Starting ingest process") # Check that we have received a form, abort otherwise try: if request.method != 'POST': # Happens when just viewing the form form = RegisterExperimentForm() # An unbound form return send_retry_response(request, form, '') logger.info("Starting experiment ingest processing") from datetime import datetime temp_title = "Ingest Received: " + \ datetime.now().strftime("%A, %d. %B %Y %I:%M%p") # A form bound to the POST data form = RegisterExperimentForm(request.POST, request.FILES) # Check that the form is filled out, abort otherwise. if not form.is_valid(): fail_message = "Form validation failure: <br/>" \ "Form Errors: " + str(form.errors) + "<br/>" \ try: add_status(RegistrationStatus.ERROR, fail_message) return send_retry_response(request, form, '') except Exception as ex: logger.error("Really an exception %s" % ex) logger.debug("Form validation: ok") xmldata = request.FILES['xmldata'] xmldata_meta = xmldata.name username = form.cleaned_data['username'] originid = form.cleaned_data['originid'] from_url = form.cleaned_data['from_url'] owners = request.POST.getlist('experiment_owner') debug_POST = "username: "******"<br/>" \ "xmldata: " + xmldata_meta + "<br/>" \ "originid: " + originid + "<br/>" \ "from_url: " + from_url + "<br/>" \ user = auth_service.authenticate(request=request, authMethod=localdb_auth_key) # Check user is authenticated, and user information is present, abort otherwise if not authentication_ok(user): return return_response_error(request) logger.debug("User authentication: ok") # Basic checks have passed, so create the experiment. global experiment experiment = Experiment(title=temp_title, approved=True, created_by=user,) experiment.save() # Now update old registration statuses with the new experiment number. for oldstatus in idless_statuses: rs = RegistrationStatus.objects.get(pk=oldstatus) rs.experiment=experiment rs.save() # If no owner provided, record a warning. check_owner(owners) # Write the submitted XML file to disk filename = path.join(experiment.get_or_create_directory(), 'mets_upload.xml') f = open(filename, 'wb+') for chunk in xmldata.chunks(): f.write(chunk) f.close() add_status(status=RegistrationStatus.PASS, message="Ingest Successfully Received") # Now process METS/XML file current_action = "Ingest Processing" try: _registerExperimentDocument(filename=filename, created_by=user, expid=experiment.id, owners=owners, username=username) except: add_status(status=RegistrationStatus.ERROR, message="METS metadata ingest failed", exception=True) return return_response_error(request) add_status(status=RegistrationStatus.PASS, message="Ingest Successfully Processed") if from_url: # form is ok, METS file ingested ok, and they also specified a file to transer current_action = 'File Transfer Request' logger.debug("transferring file") file_transfer_url = from_url + '/file_transfer/' do_file_transfer(file_transfer_url, originid, request) # Success: respond with just the ID of the newly created and processed experiment. response = HttpResponse(str(experiment.id), status=200) response['Location'] = request.build_absolute_uri( '/experiment/view/' + str(experiment.id)) return response except Exception as ex: add_status(RegistrationStatus.ERROR, "Unhandled exception in METS ingest.", exception=True)
def transfer_experiment(source): """ Pull public experiments from source into current mytardis. """ #TODO: Cleanup error messages #TODO: does not transfer liences as not part of METS format. #NOTE: As this is a pull we trust the data from the other tardis # Check identity of the feed from oaipmh.client import Client from oaipmh import error from oaipmh.metadata import MetadataRegistry, oai_dc_reader from django.core.cache import cache from django.utils.hashcompat import md5_constructor as md5 # The cache key consists of the task name and the MD5 digest # of the feed URL. cache_key = md5("token").hexdigest() lock_id = "%s-lock-%s" % ("consume_experiment", cache_key) LOCK_EXPIRE = 60 * 5 # cache.add fails if if the key already exists acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE) # memcache delete is very slow, but we have to use it to take # advantage of using add() for atomic locking release_lock = lambda: cache.delete(lock_id) registry = MetadataRegistry() registry.registerReader('oai_dc', oai_dc_reader) source_url = "%s/apps/oaipmh/?verb=Identify" % source client = Client(source_url, registry) try: identify = client.identify() except AttributeError as e: msg = "Error reading repos identity: %s:%s" % (source, e) logger.error(msg) raise ReposReadError(msg) except error.ErrorBase as e: msg = "OAIPMH error: %s" % e logger.error(msg) raise OAIPMHError(msg) except URLError as e: logger.error(e) raise repos = identify.baseURL() import urlparse repos_url = urlparse.urlparse(repos) dest_name = "%s://%s" % (repos_url.scheme, repos_url.netloc) if dest_name != source: msg = "Source directory reports incorrect name: %s" % dest_name logger.error(msg) raise BadAccessError(msg) # Get list of public experiments at sources registry = MetadataRegistry() registry.registerReader('oai_dc', oai_dc_reader) client = Client( source + "/apps/oaipmh/?verb=ListRecords&metadataPrefix=oai_dc", registry) try: exps_metadata = [ meta for (header, meta, extra) in client.listRecords(metadataPrefix='oai_dc') ] except AttributeError as e: msg = "Error reading experiment %s" % e logger.error(msg) raise OAIPMHError(msg) except error.NoRecordsMatchError as e: msg = "no public records found on source %s" % e logger.warn(msg) return local_ids = [] for exp_metadata in exps_metadata: exp_id = exp_metadata.getField('identifier')[0] user = exp_metadata.getField('creator')[0] found_user = _get_or_create_user(source, user) #make sure experiment is publicish try: xmldata = getURL("%s/apps/reposproducer/expstate/%s/" % (source, exp_id)) except HTTPError as e: msg = "cannot get public state of experiment %s" % exp_id logger.error(msg) raise BadAccessError(msg) try: exp_state = json.loads(xmldata) except ValueError as e: msg = "cannot parse public state of experiment %s" % exp_id logger.error(msg) raise BadAccessError(msg) if not exp_state in [ Experiment.PUBLIC_ACCESS_FULL, Experiment.PUBLIC_ACCESS_METADATA ]: msg = 'cannot ingest private experiments.' % exp_id logger.error(msg) raise BadAccessError(msg) # Get the usernames of isOwner django_user ACLs for the experiment try: xmldata = getURL("%s/apps/reposproducer/acls/%s/" % (source, exp_id)) except HTTPError as e: msg = "Cannot get acl list of experiment %s" % exp_id logger.error(msg) raise ReposReadError(msg) try: acls = json.loads(xmldata) except ValueError as e: msg = "cannot parse acl list of experiment %s" % exp_id logger.error(msg) raise BadAccessError(msg) owners = [] for acl in acls: if acl['pluginId'] == 'django_user' and acl['isOwner']: user = _get_or_create_user(source, acl['entityId']) owners.append(user.username) else: # FIXME: skips all other types of acl for now pass # Get the METS for the experiment metsxml = "" try: metsxml = getURL("%s/experiment/metsexport/%s/?force_http_urls" % (source, exp_id)) #metsxml = getURL("%s/experiment/metsexport/%s/" #% (source, exp_id)) except HTTPError as e: msg = "cannot get METS for experiment %s" % exp_id logger.error(msg) raise ReposReadError(msg) # load schema and parametername for experiment keys try: key_schema = Schema.objects.get(namespace=settings.KEY_NAMESPACE) except Schema.DoesNotExist as e: msg = "No ExperimentKeyService Schema found" logger.error(msg) raise BadAccessError(msg) try: key_name = ParameterName.objects.get(name=settings.KEY_NAME) except ParameterName.DoesNotExist as e: msg = "No ExperimentKeyService ParameterName found" logger.error(msg) raise BadAccessError(msg) try: xmldata = getURL("%s/apps/reposproducer/key/%s/" % (source, exp_id)) except HTTPError as e: msg = "cannot get key of experiment %s" % exp_id logger.error(msg) raise BadAccessError(msg) if not xmldata: logger.warn( "Unable to retrieve experiment %s key. Will try again later" % exp_id) return try: key_value = json.loads(xmldata) except ValueError as e: msg = "cannot parse key list of experiment %s" % exp_id logger.error(msg) raise BadAccessError(msg) if not key_value: logger.warn( "Unable to retrieve experiment %s key value. Will try again later" % exp_id) return logger.debug("retrieved key %s from experiment %s" % (key_value, exp_id)) exps = Experiment.objects.all() got_lock = True if not acquire_lock(): logger.warning("another worker has access to consume experiment") return duplicate_exp = 0 for exp in exps: #logger.warn("exp = %s" % exp.id) params = ExperimentParameter.objects.filter( name=key_name, parameterset__schema=key_schema, parameterset__experiment=exp) #logger.warn("params.count() = %s" % params.count()) if params.count() >= 1: key = params[0].string_value if key == key_value: duplicate_exp = exp.id #logger.warn("found duplicate for %s" % duplicate_exp) break if duplicate_exp: logger.warn( "Found duplicate experiment form %s exp %s to exp %s" % (source, exp_id, duplicate_exp)) if got_lock: release_lock() return # TODO: Need someway of updating and existing experiment. Problem is # that copy will have different id from original, so need unique identifier # to allow matching # We have not pulled everything we need from producer and are ready to create # experiment. # Make placeholder experiment and ready metadata e = Experiment( title='Placeholder Title', approved=True, created_by=found_user, public_access=exp_state, locked=False # so experiment can then be altered. ) e.save() # store the key #eps, was_created = ExperimentParameterSet.objects.\ # get_or_create(experiment=e, schema=key_schema) #if was_created: # logger.warn("was created") #ep, was_created = ExperimentParameter.objects.get_or_create(parameterset=eps, # name=key_name, # string_value=key_value) #if was_created: # logger.warn("was created again") #ep.save() if got_lock: release_lock() local_id = e.id filename = path.join(e.get_or_create_directory(), 'mets_upload.xml') f = open(filename, 'wb+') f.write(metsxml) f.close() # Ingest this experiment META data and isOwner ACLS eid = None try: eid, sync_path = _registerExperimentDocument(filename=filename, created_by=found_user, expid=local_id, owners=owners) logger.info('=== processing experiment %s: DONE' % local_id) except: # FIXME: what errors can mets return? msg = '=== processing experiment %s: FAILED!' \ % local_id logger.error(msg) raise MetsParseError(msg) # FIXME: if METS parse fails then we should go back and delete the placeholder experiment exp = Experiment.objects.get(id=eid) # so that tardis does not copy the data for datafile in exp.get_datafiles(): datafile.stay_remote = True datafile.save() #import nose.tools #nose.tools.set_trace() # FIXME: reverse lookup of URLs seem quite slow. # TODO: put this information into specific metadata schema attached to experiment exp.description += get_audit_message(source, exp_id) exp.save() local_ids.append(local_id) return local_ids
def transfer_experiment(source): """ Pull public experiments from source into current mytardis. """ #TODO: Cleanup error messages #TODO: does not transfer liences as not part of METS format. #NOTE: As this is a pull we trust the data from the other tardis # Check identity of the feed from oaipmh.client import Client from oaipmh import error from oaipmh.metadata import MetadataRegistry, oai_dc_reader from django.core.cache import cache from django.utils.hashcompat import md5_constructor as md5 # The cache key consists of the task name and the MD5 digest # of the feed URL. cache_key = md5("token").hexdigest() lock_id = "%s-lock-%s" % ("consume_experiment", cache_key) LOCK_EXPIRE = 60 * 5 # cache.add fails if if the key already exists acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE) # memcache delete is very slow, but we have to use it to take # advantage of using add() for atomic locking release_lock = lambda: cache.delete(lock_id) registry = MetadataRegistry() registry.registerReader('oai_dc', oai_dc_reader) source_url = "%s/apps/oaipmh/?verb=Identify" % source client = Client(source_url, registry) try: identify = client.identify() except AttributeError as e: msg = "Error reading repos identity: %s:%s" % (source, e) logger.error(msg) raise ReposReadError(msg) except error.ErrorBase as e: msg = "OAIPMH error: %s" % e logger.error(msg) raise OAIPMHError(msg) except URLError as e: logger.error(e) raise repos = identify.baseURL() import urlparse repos_url = urlparse.urlparse(repos) dest_name = "%s://%s" % (repos_url.scheme, repos_url.netloc) if dest_name != source: msg = "Source directory reports incorrect name: %s" % dest_name logger.error(msg) raise BadAccessError(msg) # Get list of public experiments at sources registry = MetadataRegistry() registry.registerReader('oai_dc', oai_dc_reader) client = Client(source + "/apps/oaipmh/?verb=ListRecords&metadataPrefix=oai_dc", registry) try: exps_metadata = [meta for (header, meta, extra) in client.listRecords(metadataPrefix='oai_dc')] except AttributeError as e: msg = "Error reading experiment %s" % e logger.error(msg) raise OAIPMHError(msg) except error.NoRecordsMatchError as e: msg = "no public records found on source %s" % e logger.warn(msg) return local_ids = [] for exp_metadata in exps_metadata: exp_id = exp_metadata.getField('identifier')[0] user = exp_metadata.getField('creator')[0] found_user = _get_or_create_user(source, user) #make sure experiment is publicish try: xmldata = getURL("%s/apps/reposproducer/expstate/%s/" % (source, exp_id)) except HTTPError as e: msg = "cannot get public state of experiment %s" % exp_id logger.error(msg) raise BadAccessError(msg) try: exp_state = json.loads(xmldata) except ValueError as e: msg = "cannot parse public state of experiment %s" % exp_id logger.error(msg) raise BadAccessError(msg) if not exp_state in [Experiment.PUBLIC_ACCESS_FULL, Experiment.PUBLIC_ACCESS_METADATA]: msg = 'cannot ingest private experiments.' % exp_id logger.error(msg) raise BadAccessError(msg) # Get the usernames of isOwner django_user ACLs for the experiment try: xmldata = getURL("%s/apps/reposproducer/acls/%s/" % (source, exp_id)) except HTTPError as e: msg = "Cannot get acl list of experiment %s" % exp_id logger.error(msg) raise ReposReadError(msg) try: acls = json.loads(xmldata) except ValueError as e: msg = "cannot parse acl list of experiment %s" % exp_id logger.error(msg) raise BadAccessError(msg) owners = [] for acl in acls: if acl['pluginId'] == 'django_user' and acl['isOwner']: user = _get_or_create_user(source, acl['entityId']) owners.append(user.username) else: # FIXME: skips all other types of acl for now pass # Get the METS for the experiment metsxml = "" try: metsxml = getURL("%s/experiment/metsexport/%s/?force_http_urls" % (source, exp_id)) #metsxml = getURL("%s/experiment/metsexport/%s/" #% (source, exp_id)) except HTTPError as e: msg = "cannot get METS for experiment %s" % exp_id logger.error(msg) raise ReposReadError(msg) # load schema and parametername for experiment keys try: key_schema = Schema.objects.get(namespace=settings.KEY_NAMESPACE) except Schema.DoesNotExist as e: msg = "No ExperimentKeyService Schema found" logger.error(msg) raise BadAccessError(msg) try: key_name = ParameterName.objects.get(name=settings.KEY_NAME) except ParameterName.DoesNotExist as e: msg = "No ExperimentKeyService ParameterName found" logger.error(msg) raise BadAccessError(msg) try: xmldata = getURL("%s/apps/reposproducer/key/%s/" % (source, exp_id)) except HTTPError as e: msg = "cannot get key of experiment %s" % exp_id logger.error(msg) raise BadAccessError(msg) if not xmldata: logger.warn("Unable to retrieve experiment %s key. Will try again later" % exp_id) return try: key_value = json.loads(xmldata) except ValueError as e: msg = "cannot parse key list of experiment %s" % exp_id logger.error(msg) raise BadAccessError(msg) if not key_value: logger.warn("Unable to retrieve experiment %s key value. Will try again later" % exp_id) return logger.debug("retrieved key %s from experiment %s" % (key_value, exp_id)) exps = Experiment.objects.all() got_lock = True if not acquire_lock(): logger.warning("another worker has access to consume experiment") return duplicate_exp = 0 for exp in exps: #logger.warn("exp = %s" % exp.id) params = ExperimentParameter.objects.filter(name=key_name, parameterset__schema=key_schema, parameterset__experiment=exp) #logger.warn("params.count() = %s" % params.count()) if params.count() >= 1: key = params[0].string_value if key == key_value: duplicate_exp = exp.id #logger.warn("found duplicate for %s" % duplicate_exp) break if duplicate_exp: logger.warn("Found duplicate experiment form %s exp %s to exp %s" % (source, exp_id, duplicate_exp)) if got_lock: release_lock() return # TODO: Need someway of updating and existing experiment. Problem is # that copy will have different id from original, so need unique identifier # to allow matching # We have not pulled everything we need from producer and are ready to create # experiment. # Make placeholder experiment and ready metadata e = Experiment( title='Placeholder Title', approved=True, created_by=found_user, public_access=exp_state, locked=False # so experiment can then be altered. ) e.save() # store the key #eps, was_created = ExperimentParameterSet.objects.\ # get_or_create(experiment=e, schema=key_schema) #if was_created: # logger.warn("was created") #ep, was_created = ExperimentParameter.objects.get_or_create(parameterset=eps, # name=key_name, # string_value=key_value) #if was_created: # logger.warn("was created again") #ep.save() if got_lock: release_lock() local_id = e.id filename = path.join(e.get_or_create_directory(), 'mets_upload.xml') f = open(filename, 'wb+') f.write(metsxml) f.close() # Ingest this experiment META data and isOwner ACLS eid = None try: eid, sync_path = _registerExperimentDocument(filename=filename, created_by=found_user, expid=local_id, owners=owners) logger.info('=== processing experiment %s: DONE' % local_id) except: # FIXME: what errors can mets return? msg = '=== processing experiment %s: FAILED!' \ % local_id logger.error(msg) raise MetsParseError(msg) # FIXME: if METS parse fails then we should go back and delete the placeholder experiment exp = Experiment.objects.get(id=eid) # so that tardis does not copy the data for datafile in exp.get_datafiles(): datafile.stay_remote = True datafile.save() #import nose.tools #nose.tools.set_trace() # FIXME: reverse lookup of URLs seem quite slow. # TODO: put this information into specific metadata schema attached to experiment exp.description += get_audit_message(source, exp_id) exp.save() local_ids.append(local_id) return local_ids
def register_experiment_ws_xmldata(request): status = '' if request.method == 'POST': # If the form has been submitted... # A form bound to the POST data form = RegisterExperimentForm(request.POST, request.FILES) if form.is_valid(): # All validation rules pass xmldata = request.FILES['xmldata'] username = form.cleaned_data['username'] origin_id = form.cleaned_data['originid'] from_url = form.cleaned_data['from_url'] user = auth_service.authenticate(request=request, authMethod=localdb_auth_key) if user: if not user.is_active: return return_response_error(request) else: return return_response_error(request) e = Experiment( title='Placeholder Title', approved=True, created_by=user, ) e.save() eid = e.id filename = path.join(e.get_or_create_directory(), 'mets_upload.xml') f = open(filename, 'wb+') for chunk in xmldata.chunks(): f.write(chunk) f.close() logger.info('=== processing experiment: START') owners = request.POST.getlist('experiment_owner') try: _registerExperimentDocument(filename=filename, created_by=user, expid=eid, owners=owners, username=username) logger.info('=== processing experiment %s: DONE' % eid) except: logger.exception('=== processing experiment %s: FAILED!' % eid) return return_response_error(request) if from_url: logger.debug('=== sending file request') logger.info('Sending received_remote signal') from tardis.tardis_portal.signals import received_remote received_remote.send(sender=Experiment, instance=e, uid=origin_id, from_url=from_url) response = HttpResponse(str(eid), status=200) response['Location'] = request.build_absolute_uri( '/experiment/view/' + str(eid)) return response else: form = RegisterExperimentForm() # An unbound form c = Context({ 'form': form, 'status': status, 'subtitle': 'Register Experiment', 'searchDatafileSelectionForm': getNewSearchDatafileSelectionForm()}) return HttpResponse(render_response_index(request, 'tardis_portal/register_experiment.html', c))