def publish_dataverse(self, sub_id): # get url for dataverse self.host = Submission().get_dataverse_details(sub_id) self.headers = {'X-Dataverse-key': self.host['apikey']} submission = Submission().get_record(sub_id) dvAlias = submission['accessions']['dataverse_alias'] dsId = submission['accessions']['dataset_id'] conn = self._get_connection() dv = conn.get_dataverse(dvAlias) # ds = dv.get_dataset_by_doi(dsDoi) if not dv.is_published: dv.publish() # POST http://$SERVER/api/datasets/$id/actions/:publish?type=$type&key=$apiKey url = submission['destination_repo']['url'] url = url + '/api/datasets/' + str( dsId) + '/actions/:publish?type=major' print(url) resp = requests.post(url, data={ 'type': 'major', 'key': self.host['apikey'] }, headers=self.headers) if resp.status_code != 200 or resp.status_code != 201: raise OperationFailedError('The Dataset could not be published. ' + resp.content) doc = Submission().mark_as_published(sub_id) return doc
def dc_dict_to_dc(self, sub_id): # get file metadata, call converter to strip out dc fields s = Submission().get_record(ObjectId(sub_id)) f_id = s["bundle"][0] items = CgCoreSchemas().extract_repo_fields(str(f_id), "ckan") Submission().update_meta(sub_id, json.dumps(items))
def test_submit_existing_cgcore_dataverse(self): # method will test the submission of a copo cgcore record to an existing dataset within a dataverse s = Submission().get_record(self.s_dv) request = self.client.post(path='/rest/submit_to_repo/', data={"sub_id": s["_id"]}) s = Submission().get_record(self.s_dv) self.assertTrue("result" in s["accessions"][0]) self.assertTrue("id" in s["accessions"][0]["result"])
def get_repo_info(request, sub=None): # this ajax method is called when user clicks "inspect repo" button on submission view try: if not sub: sub_id = request.GET['sub_id'] else: sub_id = sub s = Submission().get_record(ObjectId(sub_id)) repo = s['destination_repo'] # if sub type is cg_core, do conversion from interim to dc if s["is_cg"]: if repo["type"] == "dataverse": ds().dc_dict_to_dc(sub_id) elif repo["type"] == "ckan": ckan().dc_dict_to_dc(sub_id) elif repo["type"] == "dspace": dspace().dc_dict_to_dc(sub_id) except Exception as e: print(e) return HttpResponse( json.dumps({ "status": 404, "message": "error getting dataverse" })) s = Submission().get_record(ObjectId(sub_id)) out = { 'repo_type': repo['type'], 'repo_url': repo['url'], 'meta': s["meta"] } return HttpResponse(json.dumps(out))
def update_submission_repo_data(request): task = request.POST['task'] submission_id = request.POST['submission_id'] if task == 'change_destination': custom_repo_id = request.POST['custom_repo_id'] submission_id = request.POST['submission_id'] s = Submission().update_destination_repo(repo_id=custom_repo_id, submission_id=submission_id) s['record_id'] = str(submission_id) clear_submission_metadata(request) get_repo_info(request, sub=submission_id) return HttpResponse(json_util.dumps(s)) elif task == 'change_meta': meta = json.loads(request.POST['meta']) new_or_existing = meta["new_or_existing"] if request.POST.get("type") == "dspace": if new_or_existing == "new": r_type = request.POST["type"] # add meta to separate dict field meta["new_or_existing"] = new_or_existing meta["repo_type"] = r_type m = Submission().get_record(ObjectId(submission_id))["meta"] meta["fields"] = m elif request.POST.get("type") == "dataverse" or request.POST.get( "type") == "ckan": if new_or_existing == "new": m = Submission().get_record(ObjectId(submission_id))["meta"] meta["fields"] = m meta["repo_type"] = request.POST["type"] # now update submission record if type(meta) == type(dict()): meta = json.dumps(meta) s = Submission().update_meta(submission_id=submission_id, meta=meta) return HttpResponse(json.dumps(s))
def test_dspace_existing_submission(self): # pass to submit method s = Submission().get_record(self.s_ckan_new) request = self.client.post(path='/rest/submit_to_repo/', data={"sub_id": s["_id"]}) s = Submission().get_record(self.s_ckan_new) self.assertTrue("result" in s["accessions"][0]) self.assertTrue("id" in s["accessions"][0]["result"])
def tearDownClass(cls): u = User.objects.get(pk=1) u.delete() Profile().get_collection_handle().remove({"copo_id": "000000000"}) DataFile().get_collection_handle().remove({"_id": cls.d}) # Submission().get_collection_handle().remove({"_id": cls.s_dv}) Submission().get_collection_handle().remove({"_id": cls.s_ckan_new}) Submission().get_collection_handle().remove({"_id": cls.s_ckan_existing})
def tearDownClass(cls): u = User.objects.get(username=settings.TEST_USER_NAME) u.delete() Profile().get_collection_handle().remove({"copo_id": "000000000"}) DataFile().get_collection_handle().remove({"test_file": True}) Repository().get_collection_handle().remove({"_id": cls.r["_id"]}) Submission().get_collection_handle().remove({"_id": cls.s_dv}) Submission().get_collection_handle().remove({"_id": cls.s_ds_new}) Submission().get_collection_handle().remove({"_id": cls.s_ds_existing})
def submit(self, sub_id, dataFile_ids): profile_id = data_utils.get_current_request().session.get('profile_id') s = Submission().get_record(ObjectId(sub_id)) # get url for dataverse self.host = Submission().get_dataverse_details(sub_id) self.headers = {'X-Dataverse-key': self.host['apikey']} # if dataset id in submission meta, we are adding to existing dataset, otherwise # we are creating a new dataset new_or_existing = s['meta']['new_or_existing'] return self._add_to_dspace(s, new_or_existing)
def test_dataverse_submission(self): s = Submission().get_record(self.s_dv) request = self.client.post(path='/rest/submit_to_repo/', data={"sub_id": s["_id"]}) self.assertEqual(request.status_code, 200, "error submitting to dataverse") s = Submission().get_record(self.s_dv) self.assertTrue("accessions" in s, "accessions not in submission") self.assertTrue(s["accessions"]["dataset_doi"].startswith("doi"), "doi not present in submission") self.assertTrue(s["accessions"]["dataset_edit_uri"].startswith("http"), "edit uri not present in submission")
def dc_dict_to_dc(self, sub_id): # get file metadata, call converter to strip out dc fields s = Submission().get_record(ObjectId(sub_id)) f_id = s["bundle"][0] items = CgCoreSchemas().extract_repo_fields(str(f_id), "dataverse") temp_id = "copo:" + str(sub_id) # add the submission_id to the dataverse metadata to allow backwards treversal from dataverse items.append({ "dc": "dc.relation", "copo_id": "submission_id", "vals": temp_id }) Submission().update_meta(sub_id, json.dumps(items))
def setUpClass(cls): settings.UNIT_TESTING = True # create user cls.user = User.objects.create_user(username='******', first_name="jonny", last_name="appleseed", email='*****@*****.**', password='******') cls.user.save() # create profile p_dict = {"copo_id": "000000000", "description": "Test Description", "user_id": 1, "title": "Test Title"} cls.pid = Profile().save_record(dict(), **p_dict) # create datafile p = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures", "dummy_datafile_cgcore.json") with open(p) as f: p_dict = json.loads(f.read()) p_dict["file_location"] = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures", "fish.png") p_dict["name"] = "fish.png" profile = Profile().get_collection_handle().find_one({"copo_id": "000000000"}) p_dict["profile_id"] = str(cls.pid["_id"]) cls.d = DataFile().get_collection_handle().insert(p_dict) # create submission p = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures", "dummy_cgcore_dataverse_submission_existing.json") with open(p) as f: p_dict = json.loads(f.read()) p_dict["bundle_meta"][0]["file_path"] = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures", "fish.png") p_dict["bundle_meta"][0]["file_id"] = str(cls.d) p_dict["profile_id"] = str(cls.pid["_id"]) p_dict["bundle"].append(str(cls.d)) cls.s_dv = Submission().get_collection_handle().insert(p_dict)
def publish_article(self, article_id): endpoint = 'account/articles/{}/publish'.format(article_id) post = self.BASE_URL.format(endpoint=endpoint) resp = requests.post(post, headers=self.HEADERS) if resp.status_code == 200 or resp.status_code == 201: Submission().mark_figshare_article_published(article_id) return resp
def __init__(self, submission_id=str()): self.submission_id = submission_id self.submission_record = dict() self.file_path = str() self.host = str() self.api_token = str() self.headers = dict() if self.submission_id: # get submission record self.submission_record = Submission().get_record( self.submission_id) # set up submission parameters... # submission path dir = os.path.join(os.path.dirname(__file__), "data") self.file_path = os.path.join( os.path.join(dir, self.submission_id), 'dataverse') # dataverse host self.host = self.submission_record.get("destination_repo", dict()).get("url", str()) # api_token self.api_token = self.submission_record.get( "destination_repo", dict()).get("apikey", str()) # headers self.headers = {'X-Dataverse-key': self.api_token}
def do_submission_xml(sub_id): sub = Submission().get_record(sub_id) dfs = list() for d in sub["bundle"]: dfs.append(DataFile().get_record(d)) df = dfs[0] submission = Element("SUBMISSION") # get names of files in bundle and append here # do alias alias = make_alias(sub) submission.set("alias", alias + "_sub") submission.set( "broker_name", df["description"]["attributes"]["study_type"]["study_broker"]) submission.set( "center_name", df["description"]["attributes"]["study_type"] ["study_analysis_center_name"]) submission_date = datetime.datetime.now().isoformat() submission.set("submission_date", submission_date) submission.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") submission.set( "xsi:noNamespaceSchemaLocation", "ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/SRA.submission.xsd") contacts = Element("CONTACTS") copo_contact = Element("CONTACT") copo_contact.set("inform_on_error", "*****@*****.**") copo_contact.set("inform_on_status", "*****@*****.**") copo_contact.set("name", "COPO Support") contacts.append(copo_contact) people = Person(sub["profile_id"]).get_people_for_profile() for p in people: c = Element("CONTACT") c.set("name", p["firstName"] + " " + p["lastName"]) if [ x for x in p["roles"] if x["annotationValue"] == "SRA Inform On Status" ]: c.set("inform_on_status", p["email"]) if [ x for x in p["roles"] if x["annotationValue"] == "SRA Inform On Error" ]: c.set("inform_on_error", p["email"]) contacts.append(c) submission.append(contacts) actions = Element("ACTIONS") action = Element("ACTION") add = Element("ADD") add.set("schema", "analysis") add.set("source", "analysis.xml") action.append(add) actions.append(action) submission.append(actions) return prettify(submission)
def do_get_submission_accessions(self): target_id = self.param_dict.get("target_id", str()) submission_record = Submission().get_record(target_id) self.context[ "submission_accessions"] = htags.generate_submission_accessions_data( submission_record) return self.context
def __init__(self, sub_id=None): if sub_id: self.host = Submission().get_dataverse_details(sub_id) self.headers = {'X-CKAN-API-Key': self.host['apikey']} self.hostname = self.host["url"] if self.host["url"].endswith(".org"): self.host["url"] = self.host["url"] + "/api/3/action/"
def _add_to_dataverse(self): """ function adds datafiles to a dataset :return: """ sub = self.submission_record # check for dataverse alias alias = sub.get("meta", dict()).get( "dataverse_alias", str()) or sub.get("meta", dict()).get( "alias", str()) if not alias: return {"status": 404, "message": "\n Error getting dataverse"} # check for dataset doi doi = sub.get("meta", dict()).get("doi", str()) if not doi: return {"status": 404, "message": "\n Error getting dataset"} # add file to dataset result = self.send_files_curl(persistent_id=doi) if result is True: # store accessions and clear submission dv_response_data = self.get_dataverse_details(alias) ds_response_data = self.get_dataset_details(doi) dataset_title = [ x["value"] for x in ds_response_data.get( "latestVersion", dict()).get("metadataBlocks", dict()).get( "citation", dict()).get("fields", dict()) if x.get("typeName", str()) == "title" ] acc = dict() acc['dataset_id'] = ds_response_data.get("id", str()) acc['dataset_doi'] = doi acc['dataverse_alias'] = alias acc['dataverse_title'] = dv_response_data.get("name", "N/A") acc['dataset_title'] = "N/A" if dataset_title: if isinstance(dataset_title, list): acc['dataset_title'] = dataset_title[0] elif isinstance(dataset_title, str): acc['dataset_title'] = dataset_title sub['accessions'] = acc sub['target_id'] = sub.pop('_id', self.submission_id) Submission().save_record(dict(), **sub) self.clear_submission_metadata() return result
def test_dspace_existing_submission(self): request = self.client.post(path='/rest/submit_to_repo/', data={"sub_id": self.s_ds_existing}) self.assertEqual(request.status_code, 200) s = Submission().get_record(self.s_ds_existing) self.assertTrue( s["accessions"][0]["dspace_instance"].startswith("http")) self.assertTrue("uuid" in s["accessions"][0]) self.assertTrue( s["accessions"][0]["retrieveLink"].startswith("/rest/bitstreams/"))
def get_existing_metadata(request): # get base metadata for view showing new dspace item try: sub_id = request.GET["submission_id"] except KeyError: return HttpResponse(json.dumps({})) sub = Submission().get_record(ObjectId(sub_id)) out = sub["meta"] return HttpResponse(json.dumps(out))
def get_dataverse_content(request): id = request.GET['id'] url = Submission().get_dataverse_details(request.GET['submission_id']) dv_url = url['url'] + '/api/v1/dataverses/' + id + '/contents' resp_dv = requests.get(dv_url).content.decode('utf-8') ids = json.loads(resp_dv) if not ids['data']: return HttpResponse( json.dumps({"no_datasets": "No datasets found in this dataverse."})) return HttpResponse(json.dumps(ids['data']))
def publish_figshare(request): sub_id = request.POST['submission_id'] s = Submission().get_record(sub_id) resp = FigshareSubmit(sub_id).publish_article(s['accession']) return HttpResponse( json.dumps({ 'status_code': resp.status_code, 'location': json.loads(resp.content.decode('utf8'))['location'] }))
def _update_dspace_submission(self, sub, dspace_url, data_id, item_id): data_url = dspace_url + "/rest/bitstreams/" + str(data_id) meta_url = dspace_url + "/rest/items/" + str(item_id) + "?expand=all" resp = requests.get(data_url) data = json.loads(resp.content.decode('utf-8')) if "uuid" not in data: data["uuid"] = data.pop("id") data['dspace_instance'] = dspace_url data["item_id"] = item_id data["meta_url"] = meta_url Submission().insert_dspace_accession(sub, data)
def submit(self, sub_id, dataFile_ids): submission_record = Submission().get_record(sub_id) # bundle_meta, if present, should provide a better picture of what datafiles need to be uploaded if "bundle_meta" in submission_record: pending_files = [ x["file_id"] for x in submission_record['bundle_meta'] if not x["upload_status"] ] dataFile_ids = pending_files # physically transfer files path2library = os.path.join(BASE_DIR, REPOSITORIES['ASPERA']['resource_path']) # change these to be collected properly user_name = REPOSITORIES['ASPERA']['user_token'] password = REPOSITORIES['ASPERA']['password'] # create transfer record transfer_token = RemoteDataFile().create_transfer(sub_id)['_id'] self.submission = Submission().get_record(sub_id) self.profile = Profile().get_record(self.submission['profile_id']) remote_path = d_utils.get_ena_remote_path(sub_id) # get each file in the bundle file_path = [] for idx, f_id in enumerate(dataFile_ids): mongo_file = DataFile().get_record(ObjectId(f_id)) self.d_files.append(mongo_file) file_path.append(mongo_file.get("file_location", str())) case = self._do_aspera_transfer(transfer_token=transfer_token, user_name=user_name, password=password, remote_path=remote_path, file_path=file_path, path2library=path2library, sub_id=sub_id) return case
def search_dataverse(request): box = request.GET['box'] q = request.GET['q'] url = Submission().get_dataverse_details(request.GET['submission_id']) dv_url = url['url'] + '/api/v1/search' payload = {'q': q, 'per_page': 100, 'show_entity_ids': True, 'type': box} resp = requests.get(url=dv_url, params=payload) if not resp.status_code == 200: return HttpResponse(None) resp = resp.content.decode('utf-8') return HttpResponse(resp)
def do_study_xml(sub_id): # get submission object from mongo sub = Submission().get_record(sub_id) # get datafile objects dfs = list() for d in sub["bundle"]: dfs.append(DataFile().get_record(d)) df = dfs[0] # get profile object p = Profile().get_record(df["profile_id"]) # Do STUDY_SET study_set = Element("STUDY_SET") study_set.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") study_set.set("xsi:noNamespaceSchemaLocation", "ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/SRA.study.xsd") # Do STUDY study = Element("STUDY") study.set("alias", str(sub["_id"])) study.set( "center_name", df["description"]["attributes"]["study_type"] ["study_analysis_center_name"]) study_set.append(study) # Do DESCRIPTOR descriptor = Element("DESCRIPTOR") # create element, append to parent and add text SubElement(descriptor, "STUDY_TITLE").text = p["title"] study_type = Element("STUDY_TYPE") es = get_study_type_enumeration( df["description"]["attributes"]["study_type"]["study_type"]) # es = df["description"]["attributes"]["study_type"]["study_type"] study_type.set("existing_study_type", es) descriptor.append(study_type) SubElement(descriptor, "STUDY_ABSTRACT").text = p["description"] study.append(descriptor) # Do STUDY_ATTRIBUTES study_attributes = Element("STUDY_ATTRIBUTES") # do attribute for date study_attribute = Element("STUDY_ATTRIBUTE") SubElement(study_attribute, "TAG").text = "Submission Date" SubElement(study_attribute, "VALUE").text = datetime.datetime.now().strftime('%Y-%m-%d') study_attributes.append(study_attribute) # here we can loop to add other STUDY_ATTRIBUTES study.append(study_attributes) return prettify(study_set)
def resolve_submission_id(request, submission_id): sub = Submission().get_record(submission_id) # get all file metadata output = dict() files = list() for f in sub.get("bundle", list()): file = DataFile().get_record(f) files.append(file["description"]["attributes"]) output["files"] = files output["accessions"] = sub["accessions"] output["metadata"] = {} output["metadata"]["dc"] = sub["meta"]["fields"] return HttpResponse(j.dumps(output))
def copo_get_submission_table_data(request): profile_id = request.POST.get('profile_id') submission = Submission(profile_id=profile_id).get_all_records(sort_by="date_created", sort_direction="-1") for s in submission: s['date_created'] = s['date_created'].strftime('%d %b %Y - %I:%M %p') s['date_modified'] = s['date_modified'].strftime('%d %b %Y - %I:%M %p') s['display_name'] = REPO_NAME_LOOKUP[s['repository']] if s['complete'] == 'false' or s['complete'] == False: s['status'] = 'Pending' else: s['status'] = 'Submitted' out = j.dumps(submission) return HttpResponse(out)
def _update_submission_record(self, sub, dataset, dataverse, dv_storageIdentifier=None): # add mongo_file id acc = dict() acc['storageIdentifier'] = dv_storageIdentifier acc['mongo_file_id'] = dataset.id acc['dataset_doi'] = dataset.doi acc['dataset_edit_media_uri'] = dataset.edit_media_uri acc['dataset_edit_uri'] = dataset.edit_uri acc['dataset_is_deleted'] = dataset.is_deleted acc['dataset_title'] = dataset.title acc['dataverse_title'] = dataset.dataverse.title acc['dataverse_alias'] = dataset.dataverse.alias acc['dataset_id'] = dataset._id # save accessions to mongo profile record sub['accessions'] = acc sub['complete'] = True sub['target_id'] = str(sub.pop('_id')) Submission().save_record(dict(), **sub) Submission().mark_submission_complete(sub['target_id']) return True
def get_existing_study_options(): from dal.copo_da import Submission subs = Submission().get_complete() out = list() out.append({"value": "required", "label": "-- select one --"}) out.append({"value": "none", "label": "Not in COPO"}) for s in subs: try: out.append({ "value": s['profile_id'], "label": s['accessions']['project']['accession'] }) except: pass return out