示例#1
0
def get_upload_information(request):
    submission_id = request.GET.get('submission_id')

    # tonietuk's intercept starts
    if not submission_id:
        data = {'found': False}
        return HttpResponse(json.dumps(data))
    # tonietuk's intercept ends

    # get submission collection and check status
    sub = Submission().get_record(submission_id)
    if sub:
        if not sub['complete'] or sub['complete'] == 'false':
            rem = RemoteDataFile().get_by_sub_id(submission_id)
            if rem:
                speeds = rem['transfer_rate'][-100:]
                complete = rem['pct_completed']
                data = {'speeds': speeds, 'complete': complete, 'finished': False, 'found': True}
                return HttpResponse(json.dumps(data))
        else:
            # elapsed = str(parser.parse(sub['completed_on']) - parser.parse(sub['commenced_on']))
            # data = {'upload_time': str(elapsed), 'completed_on': sub['completed_on'], 'article_id': sub.get('article_id'), 'finished': True, 'found': True}
            data = {'sub_id': str(sub['_id']), 'status': sub['status'], 'accessions': sub['accessions'],
                    'repo': sub['repository'], 'completed_on': sub['completed_on'].strftime("%Y-%m-%d %H:%M:%S"),
                    'article_id': sub.get('article_id'), 'finished': True, 'found': True}
            return HttpResponse(json.dumps(data))

    data = {'found': False}
    return HttpResponse(json.dumps(data))
示例#2
0
 def test_dspace_existing_submission(self):
     # pass to submit method
     s = Submission().get_record(self.s_ckan_new)
     request = self.client.post(path='/rest/submit_to_repo/', data={"sub_id": s["_id"]})
     s = Submission().get_record(self.s_ckan_new)
     self.assertTrue("result" in s["accessions"][0])
     self.assertTrue("id" in s["accessions"][0]["result"])
示例#3
0
 def test_submit_existing_cgcore_dataverse(self):
     # method will test the submission of a copo cgcore record to an existing dataset within a dataverse
     s = Submission().get_record(self.s_dv)
     request = self.client.post(path='/rest/submit_to_repo/', data={"sub_id": s["_id"]})
     s = Submission().get_record(self.s_dv)
     self.assertTrue("result" in s["accessions"][0])
     self.assertTrue("id" in s["accessions"][0]["result"])
示例#4
0
    def dc_dict_to_dc(self, sub_id):
        # get file metadata, call converter to strip out dc fields
        s = Submission().get_record(ObjectId(sub_id))
        f_id = s["bundle"][0]
        items = CgCoreSchemas().extract_repo_fields(str(f_id), "ckan")

        Submission().update_meta(sub_id, json.dumps(items))
示例#5
0
def get_repo_info(request, sub=None):
    # this ajax method is called when user clicks "inspect repo" button on submission view
    try:
        if not sub:
            sub_id = request.GET['sub_id']
        else:
            sub_id = sub
        s = Submission().get_record(ObjectId(sub_id))
        repo = s['destination_repo']
        # if sub type is cg_core, do conversion from interim to dc
        if s["is_cg"]:
            if repo["type"] == "dataverse":
                ds().dc_dict_to_dc(sub_id)
            elif repo["type"] == "ckan":
                ckan().dc_dict_to_dc(sub_id)
            elif repo["type"] == "dspace":
                dspace().dc_dict_to_dc(sub_id)
    except Exception as e:
        print(e)
        return HttpResponse(
            json.dumps({
                "status": 404,
                "message": "error getting dataverse"
            }))
    s = Submission().get_record(ObjectId(sub_id))
    out = {
        'repo_type': repo['type'],
        'repo_url': repo['url'],
        'meta': s["meta"]
    }

    return HttpResponse(json.dumps(out))
示例#6
0
def update_submission_repo_data(request):
    task = request.POST['task']
    submission_id = request.POST['submission_id']
    if task == 'change_destination':
        custom_repo_id = request.POST['custom_repo_id']
        submission_id = request.POST['submission_id']
        s = Submission().update_destination_repo(repo_id=custom_repo_id,
                                                 submission_id=submission_id)
        s['record_id'] = str(submission_id)
        clear_submission_metadata(request)
        get_repo_info(request, sub=submission_id)
        return HttpResponse(json_util.dumps(s))
    elif task == 'change_meta':
        meta = json.loads(request.POST['meta'])
        new_or_existing = meta["new_or_existing"]
        if request.POST.get("type") == "dspace":
            if new_or_existing == "new":
                r_type = request.POST["type"]
                # add meta to separate dict field
                meta["new_or_existing"] = new_or_existing
                meta["repo_type"] = r_type
                m = Submission().get_record(ObjectId(submission_id))["meta"]
                meta["fields"] = m
        elif request.POST.get("type") == "dataverse" or request.POST.get(
                "type") == "ckan":
            if new_or_existing == "new":
                m = Submission().get_record(ObjectId(submission_id))["meta"]
                meta["fields"] = m
                meta["repo_type"] = request.POST["type"]

        # now update submission record
        if type(meta) == type(dict()):
            meta = json.dumps(meta)
        s = Submission().update_meta(submission_id=submission_id, meta=meta)
        return HttpResponse(json.dumps(s))
示例#7
0
    def __init__(self, submission_id=str()):
        self.submission_id = submission_id

        self.submission_record = dict()
        self.file_path = str()
        self.host = str()
        self.api_token = str()
        self.headers = dict()

        if self.submission_id:
            # get submission record
            self.submission_record = Submission().get_record(
                self.submission_id)

            # set up submission parameters...

            # submission path
            dir = os.path.join(os.path.dirname(__file__), "data")
            self.file_path = os.path.join(
                os.path.join(dir, self.submission_id), 'dataverse')

            # dataverse host
            self.host = self.submission_record.get("destination_repo",
                                                   dict()).get("url", str())

            # api_token
            self.api_token = self.submission_record.get(
                "destination_repo", dict()).get("apikey", str())

            # headers
            self.headers = {'X-Dataverse-key': self.api_token}
示例#8
0
    def publish_dataverse(self, sub_id):
        # get url for dataverse
        self.host = Submission().get_dataverse_details(sub_id)
        self.headers = {'X-Dataverse-key': self.host['apikey']}
        submission = Submission().get_record(sub_id)
        dvAlias = submission['accessions']['dataverse_alias']
        dsId = submission['accessions']['dataset_id']
        conn = self._get_connection()
        dv = conn.get_dataverse(dvAlias)
        # ds = dv.get_dataset_by_doi(dsDoi)
        if not dv.is_published:
            dv.publish()
        # POST http://$SERVER/api/datasets/$id/actions/:publish?type=$type&key=$apiKey
        url = submission['destination_repo']['url']
        url = url + '/api/datasets/' + str(
            dsId) + '/actions/:publish?type=major'
        print(url)
        resp = requests.post(url,
                             data={
                                 'type': 'major',
                                 'key': self.host['apikey']
                             },
                             headers=self.headers)
        if resp.status_code != 200 or resp.status_code != 201:
            raise OperationFailedError('The Dataset could not be published. ' +
                                       resp.content)

        doc = Submission().mark_as_published(sub_id)

        return doc
示例#9
0
 def tearDownClass(cls):
     u = User.objects.get(pk=1)
     u.delete()
     Profile().get_collection_handle().remove({"copo_id": "000000000"})
     DataFile().get_collection_handle().remove({"_id": cls.d})
     # Submission().get_collection_handle().remove({"_id": cls.s_dv})
     Submission().get_collection_handle().remove({"_id": cls.s_ckan_new})
     Submission().get_collection_handle().remove({"_id": cls.s_ckan_existing})
示例#10
0
 def tearDownClass(cls):
     u = User.objects.get(username=settings.TEST_USER_NAME)
     u.delete()
     Profile().get_collection_handle().remove({"copo_id": "000000000"})
     DataFile().get_collection_handle().remove({"test_file": True})
     Repository().get_collection_handle().remove({"_id": cls.r["_id"]})
     Submission().get_collection_handle().remove({"_id": cls.s_dv})
     Submission().get_collection_handle().remove({"_id": cls.s_ds_new})
     Submission().get_collection_handle().remove({"_id": cls.s_ds_existing})
示例#11
0
    def process_request(self, request):
        url = request.get_full_path()
        if url.startswith('/copo', 0, 5):

            doc = Submission().get_incomplete_submissions_for_user(request.user.id, figshare)
            data_dict = dict()
            token = None

            if doc.count() > 0:

                if 'code' in request.GET and 'state' in request.GET:

                    token_obtained = True

                    for d in doc:
                        if d.get('token_obtained') == 'false':
                            token_obtained = False
                            break

                    if not token_obtained:

                        # get new token from Figshare
                        code = request.GET.get('code')
                        client_id = FIGSHARE_CREDENTIALS['client_id']
                        token_url = FIGSHARE_API_URLS['authorization_token']

                        # now get token
                        data = {
                            'client_id': client_id,
                            'code': code,
                            'client_secret': FIGSHARE_CREDENTIALS['client_secret'],
                            'grant_type': 'authorization_code',
                            'scope': 'all'
                        }
                        try:
                            r = requests.post(token_url, data)
                            data_dict = ast.literal_eval(r.content.decode('utf-8'))
                            token = data_dict['token']
                            t = Figshare().put_token_for_user(user_id=ThreadLocal.get_current_user().id, token=token)
                            if t:
                                # mark fighshare submissions for this user as token obtained
                                Submission().mark_all_token_obtained(user_id=request.user.id)

                                # if all is well, the access token will be stored in FigshareSubmussionCollection
                        except Exception as e:
                            print(e)

                    else:
                        # retrieve token
                        token = Figshare().get_token_for_user(user_id=ThreadLocal.get_current_user().id)


                        # request.session['partial_submissions'] = doc
            else:
                request.session['partial_submissions'] = None
示例#12
0
    def submit(self, sub_id, dataFile_ids):
        profile_id = data_utils.get_current_request().session.get('profile_id')
        s = Submission().get_record(ObjectId(sub_id))

        # get url for dataverse
        self.host = Submission().get_dataverse_details(sub_id)
        self.headers = {'X-Dataverse-key': self.host['apikey']}

        # if dataset id in submission meta, we are adding to existing dataset, otherwise
        #  we are creating a new dataset
        new_or_existing = s['meta']['new_or_existing']
        return self._add_to_dspace(s, new_or_existing)
示例#13
0
 def test_dataverse_submission(self):
     s = Submission().get_record(self.s_dv)
     request = self.client.post(path='/rest/submit_to_repo/',
                                data={"sub_id": s["_id"]})
     self.assertEqual(request.status_code, 200,
                      "error submitting to dataverse")
     s = Submission().get_record(self.s_dv)
     self.assertTrue("accessions" in s, "accessions not in submission")
     self.assertTrue(s["accessions"]["dataset_doi"].startswith("doi"),
                     "doi not present in submission")
     self.assertTrue(s["accessions"]["dataset_edit_uri"].startswith("http"),
                     "edit uri not present in submission")
示例#14
0
 def dc_dict_to_dc(self, sub_id):
     # get file metadata, call converter to strip out dc fields
     s = Submission().get_record(ObjectId(sub_id))
     f_id = s["bundle"][0]
     items = CgCoreSchemas().extract_repo_fields(str(f_id), "dataverse")
     temp_id = "copo:" + str(sub_id)
     # add the submission_id to the dataverse metadata to allow backwards treversal from dataverse
     items.append({
         "dc": "dc.relation",
         "copo_id": "submission_id",
         "vals": temp_id
     })
     Submission().update_meta(sub_id, json.dumps(items))
示例#15
0
def resolve_submission_id(request, submission_id):
    sub = Submission().get_record(submission_id)
    # get all file metadata
    output = dict()
    files = list()
    for f in sub.get("bundle", list()):
        file = DataFile().get_record(f)
        files.append(file["description"]["attributes"])
    output["files"] = files
    output["accessions"] = sub["accessions"]
    output["metadata"] = {}
    output["metadata"]["dc"] = sub["meta"]["fields"]
    return HttpResponse(j.dumps(output))
示例#16
0
    def setUpClass(cls):
        settings.UNIT_TESTING = True
        # create user
        cls.user = User.objects.create_user(username='******', first_name="jonny", last_name="appleseed",
                                            email='*****@*****.**', password='******')
        cls.user.save()

        # create profile
        p_dict = {"copo_id": "000000000", "description": "Test Description", "user_id": 1, "title": "Test Title"}
        cls.pid = Profile().save_record(dict(), **p_dict)

        # create datafile
        p = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures", "dummy_datafile_cgcore.json")
        with open(p) as f:
            p_dict = json.loads(f.read())
        p_dict["file_location"] = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures", "fish.png")
        p_dict["name"] = "fish.png"
        profile = Profile().get_collection_handle().find_one({"copo_id": "000000000"})
        p_dict["profile_id"] = str(cls.pid["_id"])
        cls.d = DataFile().get_collection_handle().insert(p_dict)

        # create submission
        p = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures",
                         "dummy_cgcore_dataverse_submission_existing.json")
        with open(p) as f:
            p_dict = json.loads(f.read())
        p_dict["bundle_meta"][0]["file_path"] = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures",
                                                             "fish.png")
        p_dict["bundle_meta"][0]["file_id"] = str(cls.d)
        p_dict["profile_id"] = str(cls.pid["_id"])
        p_dict["bundle"].append(str(cls.d))
        cls.s_dv = Submission().get_collection_handle().insert(p_dict)
示例#17
0
 def publish_article(self, article_id):
     endpoint = 'account/articles/{}/publish'.format(article_id)
     post = self.BASE_URL.format(endpoint=endpoint)
     resp = requests.post(post, headers=self.HEADERS)
     if resp.status_code == 200 or resp.status_code == 201:
         Submission().mark_figshare_article_published(article_id)
     return resp
示例#18
0
def do_submission_xml(sub_id):
    sub = Submission().get_record(sub_id)
    dfs = list()
    for d in sub["bundle"]:
        dfs.append(DataFile().get_record(d))
    df = dfs[0]

    submission = Element("SUBMISSION")
    # get names of files in bundle and append here
    # do alias
    alias = make_alias(sub)
    submission.set("alias", alias + "_sub")
    submission.set(
        "broker_name",
        df["description"]["attributes"]["study_type"]["study_broker"])
    submission.set(
        "center_name", df["description"]["attributes"]["study_type"]
        ["study_analysis_center_name"])
    submission_date = datetime.datetime.now().isoformat()
    submission.set("submission_date", submission_date)
    submission.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
    submission.set(
        "xsi:noNamespaceSchemaLocation",
        "ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/SRA.submission.xsd")

    contacts = Element("CONTACTS")
    copo_contact = Element("CONTACT")
    copo_contact.set("inform_on_error", "*****@*****.**")
    copo_contact.set("inform_on_status", "*****@*****.**")
    copo_contact.set("name", "COPO Support")
    contacts.append(copo_contact)

    people = Person(sub["profile_id"]).get_people_for_profile()
    for p in people:
        c = Element("CONTACT")
        c.set("name", p["firstName"] + " " + p["lastName"])
        if [
                x for x in p["roles"]
                if x["annotationValue"] == "SRA Inform On Status"
        ]:
            c.set("inform_on_status", p["email"])
        if [
                x for x in p["roles"]
                if x["annotationValue"] == "SRA Inform On Error"
        ]:
            c.set("inform_on_error", p["email"])
        contacts.append(c)
    submission.append(contacts)

    actions = Element("ACTIONS")
    action = Element("ACTION")
    add = Element("ADD")
    add.set("schema", "analysis")
    add.set("source", "analysis.xml")
    action.append(add)
    actions.append(action)
    submission.append(actions)

    return prettify(submission)
示例#19
0
    def do_get_submission_accessions(self):
        target_id = self.param_dict.get("target_id", str())
        submission_record = Submission().get_record(target_id)

        self.context[
            "submission_accessions"] = htags.generate_submission_accessions_data(
                submission_record)
        return self.context
示例#20
0
    def __init__(self, sub_id=None):
        if sub_id:
            self.host = Submission().get_dataverse_details(sub_id)
            self.headers = {'X-CKAN-API-Key': self.host['apikey']}
            self.hostname = self.host["url"]
            if self.host["url"].endswith(".org"):

                self.host["url"] = self.host["url"] + "/api/3/action/"
示例#21
0
    def _add_to_dataverse(self):
        """
        function adds datafiles to a dataset
        :return:
        """
        sub = self.submission_record

        # check for dataverse alias

        alias = sub.get("meta", dict()).get(
            "dataverse_alias", str()) or sub.get("meta", dict()).get(
                "alias", str())

        if not alias:
            return {"status": 404, "message": "\n Error getting dataverse"}

        # check for dataset doi
        doi = sub.get("meta", dict()).get("doi", str())

        if not doi:
            return {"status": 404, "message": "\n Error getting dataset"}

        # add file to dataset
        result = self.send_files_curl(persistent_id=doi)

        if result is True:
            # store accessions and clear submission
            dv_response_data = self.get_dataverse_details(alias)
            ds_response_data = self.get_dataset_details(doi)

            dataset_title = [
                x["value"] for x in ds_response_data.get(
                    "latestVersion", dict()).get("metadataBlocks", dict()).get(
                        "citation", dict()).get("fields", dict())
                if x.get("typeName", str()) == "title"
            ]

            acc = dict()
            acc['dataset_id'] = ds_response_data.get("id", str())
            acc['dataset_doi'] = doi
            acc['dataverse_alias'] = alias
            acc['dataverse_title'] = dv_response_data.get("name", "N/A")
            acc['dataset_title'] = "N/A"

            if dataset_title:
                if isinstance(dataset_title, list):
                    acc['dataset_title'] = dataset_title[0]
                elif isinstance(dataset_title, str):
                    acc['dataset_title'] = dataset_title

            sub['accessions'] = acc
            sub['target_id'] = sub.pop('_id', self.submission_id)
            Submission().save_record(dict(), **sub)

            self.clear_submission_metadata()

        return result
示例#22
0
 def test_dspace_existing_submission(self):
     request = self.client.post(path='/rest/submit_to_repo/',
                                data={"sub_id": self.s_ds_existing})
     self.assertEqual(request.status_code, 200)
     s = Submission().get_record(self.s_ds_existing)
     self.assertTrue(
         s["accessions"][0]["dspace_instance"].startswith("http"))
     self.assertTrue("uuid" in s["accessions"][0])
     self.assertTrue(
         s["accessions"][0]["retrieveLink"].startswith("/rest/bitstreams/"))
示例#23
0
def get_existing_metadata(request):
    # get base metadata for view showing new dspace item
    try:
        sub_id = request.GET["submission_id"]
    except KeyError:
        return HttpResponse(json.dumps({}))
    sub = Submission().get_record(ObjectId(sub_id))

    out = sub["meta"]
    return HttpResponse(json.dumps(out))
示例#24
0
    def _do_file_transfer(self):
        submission_record = Submission().get_record(self.submission_id)

        # do we have files to be uploaded?
        bundle_df = pd.DataFrame(submission_record.get("bundle_meta", list()))

        if len(bundle_df
               ) == 0:  # insufficient information to proceed - no bundle meta
            return

        pending_df = bundle_df[bundle_df['upload_status'] == False]

        if len(pending_df) > 0:
            path2library = os.path.join(
                BASE_DIR, REPOSITORIES['ASPERA']['resource_path'])

            user_name = REPOSITORIES['ASPERA']['user_token']
            password = REPOSITORIES['ASPERA']['password']

            # compose remote file directory
            remote_path = d_utils.get_ena_remote_path(self.submission_id)

            self._do_aspera_transfer(user_name=user_name,
                                     password=password,
                                     remote_path=remote_path,
                                     file_path=list(pending_df['file_path']),
                                     path2library=path2library)
        else:
            # no files to be uploaded
            transfer_fields = dict()
            transfer_fields["transfer_status"] = "completed"
            transfer_fields["pct_completed"] = '100'
            transfer_fields["current_time"] = datetime.now().strftime(
                "%d-%m-%Y %H:%M:%S")

            # save collected metadata to the transfer record
            RemoteDataFile().update_transfer(self.transfer_token,
                                             transfer_fields)

            self.context["ena_status"] = "files_transferred"

        return
示例#25
0
 def _update_dspace_submission(self, sub, dspace_url, data_id, item_id):
     data_url = dspace_url + "/rest/bitstreams/" + str(data_id)
     meta_url = dspace_url + "/rest/items/" + str(item_id) + "?expand=all"
     resp = requests.get(data_url)
     data = json.loads(resp.content.decode('utf-8'))
     if "uuid" not in data:
         data["uuid"] = data.pop("id")
     data['dspace_instance'] = dspace_url
     data["item_id"] = item_id
     data["meta_url"] = meta_url
     Submission().insert_dspace_accession(sub, data)
示例#26
0
def get_dataverse_content(request):
    id = request.GET['id']
    url = Submission().get_dataverse_details(request.GET['submission_id'])
    dv_url = url['url'] + '/api/v1/dataverses/' + id + '/contents'
    resp_dv = requests.get(dv_url).content.decode('utf-8')
    ids = json.loads(resp_dv)
    if not ids['data']:
        return HttpResponse(
            json.dumps({"no_datasets":
                        "No datasets found in this dataverse."}))
    return HttpResponse(json.dumps(ids['data']))
示例#27
0
def publish_figshare(request):
    sub_id = request.POST['submission_id']
    s = Submission().get_record(sub_id)
    resp = FigshareSubmit(sub_id).publish_article(s['accession'])
    return HttpResponse(
        json.dumps({
            'status_code':
            resp.status_code,
            'location':
            json.loads(resp.content.decode('utf8'))['location']
        }))
示例#28
0
    def submit(self, sub_id, dataFile_ids):
        submission_record = Submission().get_record(sub_id)

        # bundle_meta, if present, should provide a better picture of what datafiles need to be uploaded
        if "bundle_meta" in submission_record:
            pending_files = [
                x["file_id"] for x in submission_record['bundle_meta']
                if not x["upload_status"]
            ]
            dataFile_ids = pending_files

        # physically transfer files
        path2library = os.path.join(BASE_DIR,
                                    REPOSITORIES['ASPERA']['resource_path'])

        # change these to be collected properly
        user_name = REPOSITORIES['ASPERA']['user_token']
        password = REPOSITORIES['ASPERA']['password']

        # create transfer record
        transfer_token = RemoteDataFile().create_transfer(sub_id)['_id']
        self.submission = Submission().get_record(sub_id)

        self.profile = Profile().get_record(self.submission['profile_id'])
        remote_path = d_utils.get_ena_remote_path(sub_id)

        # get each file in the bundle
        file_path = []
        for idx, f_id in enumerate(dataFile_ids):
            mongo_file = DataFile().get_record(ObjectId(f_id))
            self.d_files.append(mongo_file)
            file_path.append(mongo_file.get("file_location", str()))

        case = self._do_aspera_transfer(transfer_token=transfer_token,
                                        user_name=user_name,
                                        password=password,
                                        remote_path=remote_path,
                                        file_path=file_path,
                                        path2library=path2library,
                                        sub_id=sub_id)
        return case
示例#29
0
def search_dataverse(request):
    box = request.GET['box']
    q = request.GET['q']
    url = Submission().get_dataverse_details(request.GET['submission_id'])
    dv_url = url['url'] + '/api/v1/search'
    payload = {'q': q, 'per_page': 100, 'show_entity_ids': True, 'type': box}
    resp = requests.get(url=dv_url, params=payload)
    if not resp.status_code == 200:
        return HttpResponse(None)
    resp = resp.content.decode('utf-8')

    return HttpResponse(resp)
示例#30
0
def do_study_xml(sub_id):
    # get submission object from mongo
    sub = Submission().get_record(sub_id)
    # get datafile objects
    dfs = list()
    for d in sub["bundle"]:
        dfs.append(DataFile().get_record(d))
    df = dfs[0]
    # get profile object
    p = Profile().get_record(df["profile_id"])

    # Do STUDY_SET
    study_set = Element("STUDY_SET")
    study_set.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
    study_set.set("xsi:noNamespaceSchemaLocation",
                  "ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/SRA.study.xsd")

    # Do STUDY
    study = Element("STUDY")
    study.set("alias", str(sub["_id"]))
    study.set(
        "center_name", df["description"]["attributes"]["study_type"]
        ["study_analysis_center_name"])
    study_set.append(study)

    # Do DESCRIPTOR
    descriptor = Element("DESCRIPTOR")
    # create element, append to parent and add text
    SubElement(descriptor, "STUDY_TITLE").text = p["title"]
    study_type = Element("STUDY_TYPE")
    es = get_study_type_enumeration(
        df["description"]["attributes"]["study_type"]["study_type"])
    # es = df["description"]["attributes"]["study_type"]["study_type"]
    study_type.set("existing_study_type", es)
    descriptor.append(study_type)
    SubElement(descriptor, "STUDY_ABSTRACT").text = p["description"]
    study.append(descriptor)

    # Do STUDY_ATTRIBUTES
    study_attributes = Element("STUDY_ATTRIBUTES")
    # do attribute for date
    study_attribute = Element("STUDY_ATTRIBUTE")
    SubElement(study_attribute, "TAG").text = "Submission Date"
    SubElement(study_attribute,
               "VALUE").text = datetime.datetime.now().strftime('%Y-%m-%d')
    study_attributes.append(study_attribute)

    # here we can loop to add other STUDY_ATTRIBUTES

    study.append(study_attributes)

    return prettify(study_set)
示例#31
0
def copo_get_submission_table_data(request):
    profile_id = request.POST.get('profile_id')
    submission = Submission(profile_id=profile_id).get_all_records(sort_by="date_created", sort_direction="-1")
    for s in submission:
        s['date_created'] = s['date_created'].strftime('%d %b %Y - %I:%M %p')
        s['date_modified'] = s['date_modified'].strftime('%d %b %Y - %I:%M %p')
        s['display_name'] = REPO_NAME_LOOKUP[s['repository']]
        if s['complete'] == 'false' or s['complete'] == False:
            s['status'] = 'Pending'
        else:
            s['status'] = 'Submitted'

    out = j.dumps(submission)
    return HttpResponse(out)
示例#32
0
def get_existing_study_options():
    from dal.copo_da import Submission
    subs = Submission().get_complete()
    out = list()
    out.append({"value": "required", "label": "-- select one --"})
    out.append({"value": "none", "label": "Not in COPO"})
    for s in subs:
        try:
            out.append({
                "value": s['profile_id'],
                "label": s['accessions']['project']['accession']
            })
        except:
            pass
    return out
示例#33
0
    def _submit(self, sub_id, dataFile_ids):

        for f_id in dataFile_ids:

            mongo_file = DataFile().get_record(f_id)

            c = ChunkedUpload.objects.get(pk=int(mongo_file["file_id"]))

            file_path = os.path.join(self.MEDIA_ROOT, str(c.file))
            orig_name = c.filename

            sub = mongo_file['description']['attributes']
            data = dict()
            data['defined_type'] = sub.get('type_category', dict()).get('type')
            data['title'] = sub.get('title_author_description', dict()).get('title')
            authors = sub.get('title_author_description', dict()).get('author').split(',')
            lst = list()
            for x in authors:
                lst.append({'name': x})
            data['authors'] = lst
            data['description'] = sub.get('title_author_description', dict()).get('description')
            cat = sub.get('type_category', dict()).get('categories')
            cat = cat.split(',')
            cat = list(map(int, cat))
            data['categories'] = cat
            data['tags'] = sub.get('tags', dict()).get('keywords').split(',')
            data['references'] = sub.get('tags', dict()).get('references').split(',')
            for idx, x in enumerate(data['references']):
                if (not x.startswith('http')) or (not x.startswith('https')):
                    if (not x.startswith('www')):
                        data['references'][idx] = 'http://www.' + x
                    else:
                        data['references'][idx] = 'http://' + x
            data['funding'] = sub.get('tags', dict()).get('funding')
            data['licenses'] = sub.get('tags', dict()).get('licenses')
            data['publish'] = sub.get('figshare_publish', dict()).get('should_publish')


            # Create article
            #data = json.dumps({'title': orig_name, 'defined_type': 'figure'})
            endpoint = 'account/articles'
            resp = requests.post(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS, data=json.dumps(data))

            article_id = json.loads(resp.content.decode('utf8'))['locat`ion'].rsplit('/', 1)[1]

            # Get file info
            #with open(file_path, 'rb') as fin:
            #    fin.seek(0, 2)  # Go to end of file
            #    size = fin.tell()
            size = c.offset
            info = json.dumps({'name': orig_name, 'size': size })

            # Initiate upload
            endpoint = 'account/articles/{}/files'.format(article_id)
            resp = requests.post(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS, data=info)

            file_id = json.loads(resp.content.decode('utf-8'))['location'].rsplit('/', 1)[1]

            # Get upload/parts info
            endpoint = 'account/articles/{}/files/{}'.format(article_id, file_id)
            resp = requests.get(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS)

            url = '{upload_url}'.format(**json.loads(resp.content.decode('utf-8')))
            parts = json.loads(requests.get(url).content.decode('utf-8'))['parts']


            # start upload timer
            t = datetime.datetime.now()

            # Upload parts
            with open(file_path, 'rb') as fin:
                for idx, part in enumerate(parts):

                    percent_done = idx / len(parts) * 100
                    size = part['endOffset'] - part['startOffset'] + 1

                    address = '{}/{}'.format(url, part['partNo'])
                    x = datetime.datetime.now()
                    requests.put(address, data=fin.read(size))
                    delta = datetime.datetime.now() - x
                    # calculate current upload rate in MB per second
                    bw = (size / delta.total_seconds()) / 1000 / 1000
                    fields = {'transfer_rate': bw, 'pct_completed': percent_done}
                    RemoteDataFile().update_transfer(self.transfer_token, fields)

            # Mark file upload as completed
            upload_time = datetime.datetime.now() - t
            requests.post(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS)

            fields = {'pct_completed': 100, 'transfer_status': 'success', 'completed_on':str(datetime.datetime.now()), 'article_id': article_id}
            RemoteDataFile().update_transfer(self.transfer_token, fields)

            if data['publish'] == 'True':
                # publish api
                endpoint = 'account/articles/{}/publish'.format(article_id)
                resp = requests.post(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS)
                location = json.loads(resp.content.decode('utf8'))['location']
                # get accession data
                endpoint = 'articles/{}'.format(article_id)
                resp = requests.get(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS)
                # save accessions to mongo profile record
                s = Submission().get_record(sub_id)
                s['accession'] = json.loads(resp.content.decode('utf8'))['figshare_url']
                s['complete'] = True
                s['status'] = 'published'
                s['target_id'] = str(s.pop('_id'))
                Submission().save_record(dict(), **s)
            else:
                # save accessions to mongo profile record
                s = Submission().get_record(sub_id)
                s['accession'] = article_id
                s['complete'] = True
                s['status'] = 'not published'
                s['target_id'] = str(s.pop('_id'))
                Submission().save_record(dict(), **s)


        # mark submission as complete
        Submission().mark_submission_complete(sub_id)
示例#34
0
    def _do_aspera_transfer(self, transfer_token=None, user_name=None, password=None, remote_path=None, file_path=None,
                            path2library=None, sub_id=None):

        # check submission status
        submission_status = Submission().isComplete(sub_id)

        if not submission_status or submission_status == 'false':

            lg.log('Starting aspera transfer', level=Loglvl.INFO, type=Logtype.FILE)

            kwargs = dict(target_id=sub_id, commenced_on=str(datetime.now()))
            Submission().save_record(dict(), **kwargs)

            # k is a loop counter which keeps track of the number of files transfered
            k = -1
            f_str = str()
            for f in file_path:
                f_str = f_str + ' ' + f
            cmd = "./ascp -d -QT -l300M -L- {f_str!s} {user_name!s}:{remote_path!s}".format(**locals())
            lg.log(cmd, level=Loglvl.INFO, type=Logtype.FILE)
            os.chdir(path2library)

            try:
                thread = pexpect.spawn(cmd, timeout=None)
                thread.expect(["assword:", pexpect.EOF])
                thread.sendline(password)

                cpl = thread.compile_pattern_list([pexpect.EOF, '(.+)'])

                while True:
                    i = thread.expect_list(cpl, timeout=None)
                    if i == 0:  # EOF! Possible error point if encountered before transfer completion
                        print("Process termination - check exit status!")
                        break
                    elif i == 1:
                        pexp_match = thread.match.group(1)
                        prev_file = ''
                        tokens_to_match = ["Mb/s"]
                        units_to_match = ["KB", "MB"]
                        time_units = ['d', 'h', 'm', 's']
                        end_of_transfer = False

                        if all(tm in pexp_match.decode("utf-8") for tm in tokens_to_match):
                            fields = {
                                "transfer_status": "transferring",
                                "current_time": datetime.now().strftime("%d-%m-%Y %H:%M:%S")
                            }

                            tokens = pexp_match.decode("utf-8").split(" ")

                            for token in tokens:
                                if not token == '':
                                    if "file" in token:
                                        fields['file_path'] = token.split('=')[-1]
                                        if prev_file != fields['file_path']:
                                            k = k + 1
                                        prev_file == fields['file_path']
                                    elif '%' in token:
                                        pct = float((token.rstrip("%")))
                                        # pct = (1/len(file_path) * pct) + (k * 1/len(file_path) * 100)
                                        fields['pct_completed'] = pct
                                        # flag end of transfer
                                        print(str(transfer_token) + ":  " + str(pct) + '% transfered')
                                        if token.rstrip("%") == 100:
                                            end_of_transfer = True
                                    elif any(um in token for um in units_to_match):
                                        fields['amt_transferred'] = token
                                    elif "Mb/s" in token or "Mbps" in token:
                                        t = token[:-4]
                                        if '=' in t:
                                            fields['transfer_rate'] = t[t.find('=') + 1:]
                                        else:
                                            fields['transfer_rate'] = t
                                    elif "status" in token:
                                        fields['transfer_status'] = token.split('=')[-1]
                                    elif "rate" in token:
                                        fields['transfer_rate'] = token.split('=')[-1]
                                    elif "elapsed" in token:
                                        fields['elapsed_time'] = token.split('=')[-1]
                                    elif "loss" in token:
                                        fields['bytes_lost'] = token.split('=')[-1]
                                    elif "size" in token:
                                        fields['file_size_bytes'] = token.split('=')[-1]

                                    elif "ETA" in token:
                                        eta = tokens[-2]
                                        estimated_completion = ""
                                        eta_split = eta.split(":")
                                        t_u = time_units[-len(eta_split):]
                                        for indx, eta_token in enumerate(eta.split(":")):
                                            if eta_token == "00":
                                                continue
                                            estimated_completion += eta_token + t_u[indx] + " "
                                        fields['estimated_completion'] = estimated_completion
                            RemoteDataFile().update_transfer(transfer_token, fields)

                kwargs = dict(target_id=sub_id, completed_on=datetime.now())
                Submission().save_record(dict(), **kwargs)
                # close thread
                thread.close()
                lg.log('Aspera Transfer completed', level=Loglvl.INFO, type=Logtype.FILE)

            except OSError:
                return redirect('web.apps.web_copo.views.goto_error', request=HttpRequest(),
                                message='There appears to be an issue with EBI.')

        # setup paths for conversion directories
        conv_dir = os.path.join(self._dir, sub_id)
        if not os.path.exists(os.path.join(conv_dir, 'json')):
            os.makedirs(os.path.join(conv_dir, 'json'))
        json_file_path = os.path.join(conv_dir, 'json', 'isa_json.json')
        xml_dir = conv_dir
        xml_path = os.path.join(xml_dir, 'run_set.xml')

        #  Convert COPO JSON to ISA JSON
        lg.log('Obtaining ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE)
        conv = cnv.Investigation(submission_token=sub_id)
        meta = conv.get_schema()
        json_file = open(json_file_path, '+w')
        # dump metadata to output file
        json_file.write(dumps(meta))
        json_file.close()

        # Validate ISA_JSON
        lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE)
        with open(json_file_path) as json_file:
            v = isajson.validate(json_file)
            lg.log(v, level=Loglvl.INFO, type=Logtype.FILE)

        # convert to SRA with isatools converter
        lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE)
        sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get("properties", dict())
        datafilehashes = conv.get_datafilehashes()
        json2sra.convert2(json_fp=open(json_file_path), path=conv_dir, sra_settings=sra_settings,
                          datafilehashes=datafilehashes, validate_first=False)

        # finally submit to SRA
        lg.log('Submitting XMLS to ENA via CURL', level=Loglvl.INFO, type=Logtype.FILE)
        submission_file = os.path.join(xml_dir, 'submission.xml')
        project_file = os.path.join(xml_dir, 'project_set.xml')
        sample_file = os.path.join(xml_dir, 'sample_set.xml')
        experiment_file = os.path.join(xml_dir, 'experiment_set.xml')
        run_file = os.path.join(xml_dir, 'run_set.xml')

        curl_cmd = 'curl -k -F "SUBMISSION=@' + submission_file + '" \
         -F "PROJECT=@' + os.path.join(remote_path, project_file) + '" \
         -F "SAMPLE=@' + os.path.join(remote_path, sample_file) + '" \
         -F "EXPERIMENT=@' + os.path.join(remote_path, experiment_file) + '" \
         -F "RUN=@' + os.path.join(remote_path, run_file) + '" \
         "https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA%20Webin-39233%20Apple123"'

        output = subprocess.check_output(curl_cmd, shell=True)
        lg.log(output, level=Loglvl.INFO, type=Logtype.FILE)
        lg.log("Extracting fields from receipt", level=Loglvl.INFO, type=Logtype.FILE)

        xml = ET.fromstring(output)

        accessions = dict()

        # get project accessions
        project = xml.find('./PROJECT')
        project_accession = project.get('accession', default='undefined')
        project_alias = project.get('alias', default='undefined')
        accessions['project'] = {'accession': project_accession, 'alias': project_alias}

        # get experiment accessions
        experiment = xml.find('./EXPERIMENT')
        experiment_accession = experiment.get('accession', default='undefined')
        experiment_alias = experiment.get('alias', default='undefined')
        accessions['experiment'] = {'accession': experiment_accession, 'alias': experiment_alias}

        # get submission accessions
        submission = xml.find('./SUBMISSION')
        submission_accession = submission.get('accession', default='undefined')
        submission_alias = submission.get('alias', default='undefined')
        accessions['submission'] = {'accession': submission_accession, 'alias': submission_alias}

        # get run accessions
        run = xml.find('./RUN')
        run_accession = run.get('accession', default='undefined')
        run_alias = run.get('alias', default='undefined')
        accessions['run'] = {'accession': run_accession, 'alias': run_alias}

        # get sample accessions
        samples = xml.findall('./SAMPLE')
        sample_accessions = list()
        for sample in samples:
            sample_accession = sample.get('accession', default='undefined')
            sample_alias = sample.get('alias', default='undefined')
            s = {'sample_accession': sample_accession, 'sample_alias': sample_alias}
            for bio_s in sample:
                s['biosample_accession'] = bio_s.get('accession', default='undefined')
            sample_accessions.append(s)
        accessions['sample'] = sample_accessions

        # save accessions to mongo profile record
        s = Submission().get_record(sub_id)
        s['accessions'] = accessions
        s['complete'] = True
        s['target_id'] = str(s.pop('_id'))
        Submission().save_record(dict(), **s)