示例#1
0
    def get(self, study_id):
        log_request(request)
        # param validation
        if study_id is None:
            abort(404)

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]
        else:
            # user token is required
            abort(401)

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        isa_study, isa_inv, std_path = iac.get_isa_study(
            study_id,
            user_token,
            skip_load_tables=True,
            study_location=study_location)

        samples = read_characteristics_from_sample_sheet(
            study_location, isa_study)
        return totuples(samples, 'organisms')
示例#2
0
    def post(self):

        log_request(request)
        parser = reqparse.RequestParser()

        parser.add_argument('term', help="Ontology term")
        term = None
        if request.args:
            args = parser.parse_args(req=request)
            term = args['term']

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        if user_token is None:
            abort(403)

        # Need to check that the user is actually an active user, ie the user_token exists
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions('MTBLS1', user_token)
        if not is_curator:
            abort(403)

        logger = logging.getLogger('wslog')
        try:
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
            table_df = pd.read_csv(file_name, sep="\t", encoding='utf-8')
            table_df = table_df.replace(np.nan, '', regex=True)

            if term:
                try:
                    temp = table_df[table_df['PROPERTY_VALUE'].str.contains(term, na=False, case=False)]
                    l = temp.index.values.tolist()
                    for i in l:
                        query = table_df.iloc[i]['PROPERTY_VALUE']
                        attribute_name = 'factor'
                        res = ','.join(searchStudies(query, user_token, feature=attribute_name))
                        table_df.iloc[i]['STUDY'] = res
                        table_df.to_csv(file_name, sep='\t', index=False, encoding='utf-8')
                except Exception as e:
                    logger.error('Fail to find term in the spreadsheet' + term + str(e))


            else:
                for i in range(len(table_df)):
                    query = table_df.iloc[i]['PROPERTY_VALUE']
                    attribute_name = 'factor'
                    res = ','.join(searchStudies(query, user_token, feature=attribute_name))
                    table_df.iloc[i]['STUDY'] = res
                    table_df.to_csv(file_name, sep='\t', index=False, encoding='utf-8')
        except Exception as e:
            logger.error('Fail to load metabolights-zooma.tsv' + str(e))
示例#3
0
    def post(self):

        log_request(request)
        parser = reqparse.RequestParser()

        parser.add_argument('term', help="Ontology term")
        term = None
        if request.args:
            args = parser.parse_args(req=request)
            term = args['term']

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        if user_token is None:
            abort(403)

        # Need to check that the user is actually an active user, ie the user_token exists
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions('MTBLS1', user_token)
        if not is_curator:
            abort(403)

        logger = logging.getLogger('wslog')
        try:
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
            table_df = pd.read_csv(file_name, sep="\t", encoding='utf-8')
            table_df = table_df.replace(np.nan, '', regex=True)

            if term:
                try:
                    temp = table_df[table_df['PROPERTY_VALUE'].str.contains(
                        term, na=False, case=False)]
                    l = temp.index.values.tolist()
                    for i in l:
                        query = table_df.iloc[i]['PROPERTY_VALUE']
                        attribute_name = 'factor'
                        res = ','.join(
                            searchStudies(query,
                                          user_token,
                                          feature=attribute_name))
                        table_df.iloc[i]['STUDY'] = res
                        table_df.to_csv(file_name,
                                        sep='\t',
                                        index=False,
                                        encoding='utf-8')
                except Exception as e:
                    logger.error('Fail to find term in the spreadsheet' +
                                 term + str(e))
示例#4
0
    def put(self):
        log_request(request)
        parser = reqparse.RequestParser()

        parser.add_argument('studyID', help='Metabolights studyID')
        studyID = None
        if request.args:
            args = parser.parse_args(req=request)
            studyID = args['studyID']
            if studyID:
                studyID = studyID.strip().upper()
            else:
                abort(400)

        parser.add_argument('organism', help="study organism")

        org = 'hsa'
        if request.args:
            args = parser.parse_args(req=request)
            organism = args['organism']
            if organism:
                try:
                    org = get_kegg_organism_abbr(organism)
                except Exception as e:
                    logger.info(
                        "Can't find organism {organism} in KEGG".format(
                            organism=organism))
                    return "Can't find organism {organism} in KEGG".format(
                        organism=organism)
            else:
                abort(400)
        # module = "module load r-3.6.3-gcc-9.3.0-yb5n44y; module load pandoc-2.7.3-gcc-9.3.0-gctut72;"
        script = app.config.get('FELLA_PATHWAY_SCRPT')
        para = '-s {studyID} -o {organism}'.format(studyID=studyID,
                                                   organism=org)

        command = script + ' ' + para

        logger.info("Starting cluster job for FELLA pathway: " + command)
        status, message, job_out, job_err = lsf_job(
            app.config.get('LSF_COMMAND_BSUB'),
            job_param=command,
            send_email=True)

        if status:
            return {"success": message, "message": job_out, "errors": job_err}
        else:
            return {"error": message, "message": job_out, "errors": job_err}
    def get(self, study_id):

        log_request(request)
        # param validation
        if study_id is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if 'user_token' in request.headers:
            user_token = request.headers['user_token']

        if user_token is None:
            abort(401)

        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument('filename1', help='TSV filename one')
        parser.add_argument('filename2', help='TSV filename two')
        assay_filename = None
        if request.args:
            args = parser.parse_args(req=request)
            filename1 = args['filename1'].lower(
            ) if args['filename1'] else None
            filename2 = args['filename2'].lower(
            ) if args['filename2'] else None
        if not filename1 or not filename2:
            logger.warning("Missing TSV filenames.")
            abort(404, "Missing TSV filenames.")

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(
                401,
                "Study does not exist or your do not have access to this study."
            )

        location = study_location
        df1 = read_tsv(filename1)
        df2 = read_tsv(filename2)
        diff_df = diff_pd(df1, df2)
        return jsonify({"entries": diff_df})
    def get(self):
        """
        Return a single user by username. Checks the validity of the param, retrieves the API token from the header and
        checks its validity and what permissions are available to the bearer of the token.
        """

        log_request(request)

        # User authentication
        user_token = None

        if "user_token" in request.headers:
            user_token = request.headers["user_token"]
        else:
            # user token is required
            abort(401)

        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions('MTBLS1', user_token)

        if not read_access:
            abort(403)

        # pull username from query params.
        username = None

        user_parser = RequestParsers.username_parser()
        if request.args:
            args = user_parser.parse_args(req=request)
            username = args['username']

        # username has not been properly provided, abort with code 400 (bad request).
        if username is None:
            abort(400)

        # query the database for the user, and return the result of the query.
        return jsonify(get_user(username))
    def post(self, ):
        log_request(request)

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]
        else:
            # user token is required
            abort(401)

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions('MTBLS1', user_token)
        if not read_access:
            abort(403)

        first_name = None
        last_name = None
        email = None
        affiliation = None
        affiliation_url = None
        address = None
        orcid = None
        metaspace_api_key = None

        # body content validation
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            data = data_dict['user']
            try:
                first_name = data['firstName']
                last_name = data['lastName']
                email = data['email']
                affiliation = data['affiliation']
                affiliation_url = data['affiliation_url']
                address = data['address']
                orcid = data['orcid']
                metaspace_api_key = data['metaspace_api_key']
            except Exception as e:
                abort(412, str(e))
        except (ValidationError, Exception):
            abort(400, 'Incorrect JSON provided')

        password, password_encoded, api_token = get_new_password_and_api_token(
        )

        val_email(email)
        status, message = create_user(first_name, last_name, email,
                                      affiliation, affiliation_url, address,
                                      orcid, api_token, password_encoded,
                                      metaspace_api_key)

        if status:
            return {
                "user_name": email,
                "api_token": str(api_token),
                "password": str(password)
            }
        else:
            return {"Error": message}
    def put(self):
        log_request(request)
        parser = reqparse.RequestParser()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]
        else:
            abort(401)

        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions('MTBLS1', user_token)

        user_name = get_username_by_token(user_token)

        if not write_access:
            abort(403)

        # loading data
        data_dict = None
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
        except Exception as e:
            logger.info(e)
            abort(400)
        if not data_dict:
            abort(403)

        try:
            wks = getWorksheet(app.config.get('MTBLS_CURATION_LOG'), 'Studies',
                               app.config.get('GOOGLE_SHEET_TOKEN'))
        except Exception as e:
            logger.info('Fail to load worksheet.', e)
            print('Fail to load worksheet.', e)
            abort(400)
            return []

        output = {'success': [], 'un_success': []}

        editable_columns = [
            'Study Type', 'Species', 'Place Holder', 'Assigned to'
        ]
        for studyID, fields in data_dict.items():
            try:
                r = wks.find(studyID).row
                # r, _ = getCellCoordinate(app.config.get('MTBLS_CURATION_LOG'), 'Studies',
                #                          app.config.get('GOOGLE_SHEET_TOKEN'), studyID)
            except:
                logger.info('Can find {studyID} in curation log'.format(
                    studyID=studyID))
                print('Can find {studyID} in curation log'.format(
                    studyID=studyID))
                continue

            for field, value in fields.items():
                if field in editable_columns:
                    c = wks.find(field).col
                    # _, c = getCellCoordinate(app.config.get('MTBLS_CURATION_LOG'), 'Studies',
                    #                          app.config.get('GOOGLE_SHEET_TOKEN'), field)
                    if update_cell(wks, r, c, value):
                        output['success'].append(
                            "{user_name} updated {studyID} - {field} to {value}"
                            .format(user_name=user_name,
                                    studyID=studyID,
                                    field=field,
                                    value=value))
                        logger.info(
                            "{user_name} updated {studyID} - {field} to {value}"
                            .format(user_name=user_name,
                                    studyID=studyID,
                                    field=field,
                                    value=value))
                        print(
                            "{user_name} updated {studyID} - {field} to {value}"
                            .format(user_name=user_name,
                                    studyID=studyID,
                                    field=field,
                                    value=value))
                else:
                    logger.info(
                        'Permission denied modify {studyID} {field}'.format(
                            studyID=studyID, field=field))
                    print('Permission denied modify {studyID} {field}'.format(
                        studyID=studyID, field=field))
                    output['un_success'].append(
                        'Permission denied modify {studyID} {field}'.format(
                            studyID=studyID, field=field))
                    continue

        return jsonify(output)
    def get(self):
        log_request(request)
        parser = reqparse.RequestParser()

        # studyID
        parser.add_argument('studyID', help='studyID')
        studyID = None
        if request.args:
            args = parser.parse_args(req=request)
            studyID = args['studyID']
            if studyID:
                if ',' in studyID:
                    studyID = studyID.split(',')
                else:
                    studyID = [studyID]
                studyID = [x.upper() for x in studyID]

        # column
        parser.add_argument('field', help='column name(s)')
        field = None
        if request.args:
            args = parser.parse_args(req=request)
            field = args['field']
            if field:
                if ',' in field:
                    field = field.split(',')
                else:
                    field = [field]

        # page
        parser.add_argument('page', help='page number')
        page = None
        if request.args:
            args = parser.parse_args(req=request)
            page = args['page']
            if page != None:
                page = int(args['page'])

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]
        else:
            abort(401)

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions('MTBLS1', user_token)
        if not write_access:
            abort(403)

        # Load google sheet
        try:
            google_df = getGoogleSheet(app.config.get('MTBLS_CURATION_LOG'),
                                       'Studies',
                                       app.config.get('GOOGLE_SHEET_TOKEN'))
            google_df = google_df.set_index('MTBLS ID')
        except Exception as e:
            logger.info('Fail to load google sheet:', e)
            abort(404)
            return []

        if studyID == None or (len(studyID) > 100 and page != None):
            studyID = list(
                google_df.index.values)[100 * (page - 1):(100 * (page - 1) +
                                                          100)]

        # entire sheet
        if studyID == None and field == None:
            result = google_df.to_json(orient="index")

        # entire column
        elif studyID == None and len(field) > 0:
            result = google_df[field].to_json(orient="columns")

        # entire row
        elif len(studyID) > 0 and field == None:
            result = google_df.loc[studyID, :].to_json(orient="index")

        # combination
        else:
            result = google_df.loc[studyID, field].to_json(orient="index")

        return json.loads(result)
示例#10
0
    def post(self):
        log_request(request)
        parser = reqparse.RequestParser()

        # query field
        parser.add_argument('query', help='Report query')
        query = None
        if request.args:
            args = parser.parse_args(req=request)
            query = args['query']
            if query:
                query = query.strip()

        # study ID
        parser.add_argument('studyid', help='Study ID')
        studyid = None
        if request.args:
            args = parser.parse_args(req=request)
            studyid = args['studyid']
            if studyid:
                studyid = studyid.strip().upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]
        else:
            abort(401)

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions('MTBLS1', user_token)
        if not write_access:
            abort(403)

        reporting_path = app.config.get('MTBLS_FTP_ROOT') + app.config.get('REPORTING_PATH') + 'global/'
        file_name = ''
        res = ''

        if query == 'daily_stats':
            try:
                sql = open('./instance/study_report.sql', 'r').read()
                postgresql_pool, conn, cursor = get_connection()
                cursor.execute(sql)
                dates = cursor.fetchall()
                data = {}
                for dt in dates:
                    dict_temp = {dt[0].strftime('%Y-%m-%d'):
                                     {'studies_created': dt[1],
                                      'public': dt[2],
                                      'review': dt[3],
                                      'curation': dt[4],
                                      'user': dt[5]
                                      }
                                 }
                    data = {**data, **dict_temp}
                res = {"created_at": "2020-07-07", "updated_at": datetime.today().strftime('%Y-%m-%d'), 'data': data}
                file_name = 'daily_report.json'
            except Exception as e:
                logger.info(e)
                print(e)

        if query == 'user_stats':
            # try:
            file_name = 'study_report.json'
            study_data = readDatafromFile(reporting_path + file_name)
            sql = open('./instance/user_report.sql', 'r').read()
            postgresql_pool, conn, cursor = get_connection()
            cursor.execute(sql)
            result = cursor.fetchall()
            data = {}
            user_count = 0
            active_user = 0
            for dt in result:
                study_list = dt[6].split(",")
                studies = {}
                for x in study_list:
                    try:
                        temp = study_data['data'][x.strip()]
                        studies[x.strip()] = temp
                    except:
                        continue
                dict_temp = {str(dt[0]):
                                 {"name": dt[13],
                                  "user_email": str(dt[1]),
                                  "country_code": dt[2],
                                  "joindate": dt[12],
                                  "total": str(dt[5]),
                                  "submitted": str(dt[7]),
                                  "review": str(dt[9]),
                                  "curation": str(dt[8]),
                                  "public": str(dt[10]),
                                  "dormant": str(dt[11]),
                                  "affiliation": dt[3],
                                  "user_status": str(dt[4]),
                                  "studies": studies,
                                  }
                             }
                data = {**data, **dict_temp}
                user_count += 1
                if dt[4] == 2:
                    active_user += 1
                # data['user_count'] = str(user_count)
                # data['active_user'] = str(active_user)
            res = {"created_at": "2020-07-07", "updated_at": datetime.today().strftime('%Y-%m-%d'),
                   "user_count": str(user_count), "active_user": str(active_user),
                   "data": data}

            file_name = 'user_report.json'

        if query == 'study_stats':
            postgresql_pool, conn, cursor = get_connection()
            cursor.execute(
                "select acc from studies")
            studies = cursor.fetchall()
            data = {}
            for st in studies:
                print(st[0])
                study_files, latest_update_time = get_all_files(
                    app.config.get('STUDY_PATH') + str(st[0]))

                study_info = get_study(st[0])
                name = study_info.pop('submitter').split(',')
                country = study_info.pop('country').split(',')

                name_d = [{'name': x} for x in name]
                country_d = [{'country': x} for x in country]
                submitter = []
                for x in zip(name_d, country_d):
                    res = {}
                    for y in x:
                        res.update(y)
                    submitter.append(res)

                study_info['submitter'] = submitter
                study_info['latest_update_time'] = latest_update_time
                study_info['study_files'] = study_files

                dict_temp = {str(st[0]): study_info}
                data = {**data, **dict_temp}
            file_name = 'study_report.json'

            res = {'data': data}
            res["updated_at"] = datetime.today().strftime('%Y-%m-%d')

        if query == 'global':
            file_name = 'global.json'
            j_data = readDatafromFile(reporting_path + file_name)

            # load global.json and update
            if studyid:
                studyid = studyid.upper()
                # load global.json and clean the date set
                j_data = clean_json(j_data, studyid)

                # techniques
                res1 = get_techniques(studyID=studyid)
                for tech, value in res1['techniques'].items():
                    if tech in j_data['data']['techniques']:
                        print(tech)
                        j_data['data']['techniques'][tech] += value  # res['techniques'][tech]
                    else:
                        j_data['data']['techniques'].update({tech: value})

                # study_type
                res2 = get_studytype(studyID=studyid)
                j_data['data']['study_type']['targeted'] += res2['study_type']['targeted']
                j_data['data']['study_type']['untargeted'] += res2['study_type']['untargeted']
                j_data['data']['study_type']['targeted_untargeted'] += res2['study_type']['targeted_untargeted']

                # instruments & organisms
                ins, org = get_instruments_organism(studyID=studyid)
                for i, value in ins['instruments'].items():
                    if i not in j_data['data']['instruments']:
                        j_data['data']['instruments'].update({i: value})
                    else:
                        for studies, v in ins['instruments'][i].items():
                            j_data['data']['instruments'][i].update({studies: v})

                # organisms
                for o, org_part in org['organisms'].items():
                    if o not in j_data['data']['organisms']:
                        j_data['data']['organisms'].update({o: org_part})
                    else:
                        for org_p, studies in org_part.items():
                            if org_p not in j_data['data']['organisms'][o]:
                                j_data['data']['organisms'][o].update({org_p: studies})
                            else:
                                j_data['data']['organisms'][o][org_p] += studies

            # generate new global file
            else:
                # techniques
                techs = get_techniques()
                j_data['data']['techniques'] = techs['techniques']

                # study_type
                types = get_studytype()
                j_data['data']['study_type'] = types['study_type']

                # instruments & organisms
                i, s = get_instruments_organism()
                j_data['data']['instruments'] = i['instruments']
                j_data['data']['organisms'] = s['organisms']

                j_data["updated_at"] = datetime.today().strftime('%Y-%m-%d')

            res = j_data

        if query == 'file_extension':
            file_name = 'file_extension.json'

            postgresql_pool, conn, cursor = get_connection()
            cursor.execute(
                "select acc from studies where status = 3;")
            studies = cursor.fetchall()
            file_ext = []

            for studyID in studies:
                print(studyID[0])
                logger.info("Extracting study extension details: " + studyID[0])
                wd = os.path.join(app.config.get('STUDY_PATH'), studyID[0])

                try:
                    file_ext.append(get_file_extensions(studyID[0], wd))
                except:
                    print("Error extracting study extension details: " + studyID[0])

            res = {"created_at": "2020-03-22", "updated_at": datetime.today().strftime('%Y-%m-%d'), 'data': file_ext}

        # j_res = json.dumps(res,indent=4)
        writeDataToFile(reporting_path + file_name, res, True)

        return jsonify({"POST " + file_name: True})
示例#11
0
    def get(self):
        global start_date, query_field
        global end_date
        log_request(request)
        parser = reqparse.RequestParser()

        parser.add_argument('query', help='Report query')
        query = None
        if request.args:
            args = parser.parse_args(req=request)
            query = args['query']
            if query:
                query = query.strip()

        parser.add_argument('start', help='start date')
        if request.args:
            args = parser.parse_args(req=request)
            start = args['start']
            if start:
                start_date = datetime.strptime(start, '%Y%m%d')
            else:
                start_date = datetime.strptime('20110809', '%Y%m%d')

        parser.add_argument('end', help='end date')
        if request.args:
            args = parser.parse_args(req=request)
            end = args['end']
            if end:
                end_date = datetime.strptime(end, '%Y%m%d')
            else:
                end_date = datetime.today()

        parser.add_argument('studyStatus', help='studyStatus')
        studyStatus = None
        if request.args:
            args = parser.parse_args(req=request)
            studyStatus = args['studyStatus']
            if studyStatus:
                studyStatus = tuple([x.strip() for x in studyStatus.split(',')])

        parser.add_argument('queryFields', help='queryFields')
        query_field = None
        if request.args:
            args = parser.parse_args(req=request)
            queryFields = args['queryFields']
            if queryFields:
                query_field = tuple([x.strip().lower() for x in queryFields.split(',')])

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]
        else:
            abort(401)

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions('MTBLS1', user_token)
        if not write_access:
            if query in ['study_status', "global"]:
                studyStatus = ['public']
            else:
                abort(403)

        reporting_path = app.config.get('MTBLS_FTP_ROOT') + app.config.get('REPORTING_PATH') + 'global/'

        if query == 'daily_stats':
            file_name = 'daily_report.json'
            j_file = readDatafromFile(reporting_path + file_name)

            data_res = {}
            for date, report in j_file['data'].items():
                d = datetime.strptime(date, '%Y-%m-%d')
                if d >= start_date and d <= end_date:
                    if query_field != None:
                        slim_report = {k: report[k] for k in query_field}
                        data_res.update({date: slim_report})
                    else:
                        data_res.update({date: report})
                else:
                    continue
            j_file['data'] = data_res
            return jsonify(j_file)

        elif query == 'user_stats':
            file_name = 'user_report.json'
            j_file = readDatafromFile(reporting_path + file_name)
            return jsonify(j_file)

        elif query == 'global':
            file_name = 'global.json'
            j_file = readDatafromFile(reporting_path + file_name)
            return jsonify(j_file)

        elif query == 'file_extension':
            file_name = 'file_extension.json'
            j_file = readDatafromFile(reporting_path + file_name)
            return jsonify(j_file)

        elif query == 'study_status':
            file_name = 'study_report.json'
            j_file = readDatafromFile(reporting_path + file_name)
            data_res = {}

            for studyID, study_info in j_file['data'].items():
                d = datetime.strptime(study_info['submissiondate'], '%Y-%m-%d')
                status = study_info['status']

                if studyStatus == None:
                    if d >= start_date and d <= end_date:
                        data_res.update({studyID: study_info})
                    else:
                        continue
                else:
                    if d >= start_date and d <= end_date and status.lower() in studyStatus:
                        data_res.update({studyID: study_info})
                    else:
                        continue

            j_file['data'] = data_res
            return jsonify(j_file)
        else:
            file_name = ''
            abort(404)
示例#12
0
    def post(self, study_id, file_name):
        log_request(request)
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            data = data_dict['data']
            new_row = data['rows']
        except KeyError:
            new_row = None
            data = None

        if new_row is None:
            abort(
                417,
                "Please provide valid data for updated new row(s). The JSON string has to have a 'rows' element"
            )

        try:
            for element in new_row:
                element.pop(
                    'index', None
                )  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and TSV file name'
            )

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file name was not found")

        # Validate column names in new rows
        valid_column_name, message = validate_row(file_df, new_row, "post")
        if not valid_column_name:
            abort(417, message)

        if data:
            try:
                start_index = data['index']
                if start_index == -1:
                    start_index = 0
                start_index = start_index - 0.5

            except KeyError:
                start_index = len(file_df.index)

            # Map the complete row first, update with new_row
            complete_row = {}
            for col in file_df.columns:
                complete_row[col] = ""

            if not new_row:
                logger.warning(
                    "No new row information provided. Adding empty row " +
                    file_name + ", row " + str(complete_row))
            else:
                for row in new_row:
                    complete_row.update(row)
                    row = complete_row
                    line = pd.DataFrame(row, index=[start_index])
                    file_df = file_df.append(line, ignore_index=False)
                    file_df = file_df.sort_index().reset_index(drop=True)
                    start_index += 1

            file_df = file_df.replace(np.nan, '', regex=True)
            message = write_tsv(file_df, file_name)

        # Get an indexed header row
        df_header = get_table_header(file_df)

        # Get the updated data table
        try:
            df_data_dict = totuples(read_tsv(file_name), 'rows')
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        return {'header': df_header, 'data': df_data_dict, 'message': message}
示例#13
0
    def post(self, study_id):
        log_request(request)
        # param validation
        if study_id is None:
            abort(404)
        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument('existing_char_name', help="Characteristics name")
        parser.add_argument('existing_char_value',
                            help="Characteristics value")
        args = parser.parse_args()
        existing_characteristics_name = args['existing_char_name']
        existing_characteristics_value = args['existing_char_value']
        if existing_characteristics_name is None or existing_characteristics_value is None:
            abort(404)
        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]
        else:
            # user token is required
            abort(401)

        # check for keeping copies
        save_audit_copy = False
        save_msg_str = "NOT be"
        if "save_audit_copy" in request.headers and \
                request.headers["save_audit_copy"].lower() == 'true':
            save_audit_copy = True
            save_msg_str = "be"

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        isa_study, isa_inv, std_path = iac.get_isa_study(
            study_id,
            user_token,
            skip_load_tables=True,
            study_location=study_location)

        # body content validation
        updated_characteristics = None
        new_column_name = None
        onto = None
        new_value = None
        new_url = None
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            data = data_dict['characteristics']
            # if partial=True missing fields will be ignored
            try:
                # result = SampleSchema().load(data, many=True, partial=False)
                # We do not have to map the charcaeristics json to a schema a we are using this to directly
                # update the dataframe. The ontology we use more than one time, so map that

                new_column_name = data[0]['characteristicsName']
                char_type = data[0]['characteristicsType']
                new_value = char_type['annotationValue']
                new_url = char_type['termAccession']
                term_source = char_type['termSource']
                onto = OntologySource(name=term_source['name'],
                                      version=term_source['version'],
                                      file=term_source['file'],
                                      description=term_source['description'])

                # Check that the ontology is referenced in the investigation
                add_ontology_to_investigation(isa_inv, onto.name, onto.version,
                                              onto.file, onto.description)

            except Exception as e:
                abort(412)

        except (ValidationError, Exception):
            abort(400)

        # update Study Factor details
        logger.info('Updating Study Characteristics details for %s', study_id)

        if existing_characteristics_name != new_column_name:  # update the column header value for characteristics
            update_ontolgies_in_isa_tab_sheets('characteristics',
                                               existing_characteristics_name,
                                               new_column_name, study_location,
                                               isa_study)
        # Now, it the cell values that needs updating
        update_characteristics_in_sample_sheet(onto.name, new_url,
                                               new_column_name,
                                               existing_characteristics_value,
                                               new_value, study_location,
                                               isa_study)

        logger.info("A copy of the previous files will %s saved", save_msg_str)
        iac.write_isa_study(isa_inv,
                            user_token,
                            std_path,
                            save_investigation_copy=save_audit_copy)
        logger.info('Updated %s', existing_characteristics_value)

        return {"Success": " Sample sheet updated"}
示例#14
0
    def post(self):
        log_request(request)
        parser = reqparse.RequestParser()

        parser.add_argument('source', help='source to update')
        source = None
        if request.args:
            args = parser.parse_args(req=request)
            source = args['source']
            if source:
                source = source.strip()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]
        else:
            abort(401)

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions('MTBLS1', user_token)
        if not is_curator:
            abort(403)

        if source == 'curation log-Database Query':
            try:
                logger.info('Updating curation log-Database Query')
                curation_log_database_query()
                return jsonify({'curation log update': True})
            except Exception as e:
                logger.info(e)
                print(e)
        elif source == 'curation log-Database update':
            try:
                logger.info('Updating curation log-Database update')
                curation_log_database_update()
                return jsonify({'Database update': True})
            except Exception as e:
                logger.info(e)
                print(e)
        elif source == 'MTBLS statistics':
            try:
                logger.info('Updating MTBLS statistics')
                MTBLS_statistics_update()
                return jsonify({'success': True})
            except Exception as e:
                logger.info(e)
                print(e)

        elif source == 'empty studies':
            try:
                logger.info('Get list of empty studies')
                blank_inv, no_inv = get_empty_studies()

                return jsonify({
                    'Investigation files check': {
                        'Empty investigation': {
                            'counts': len(blank_inv),
                            'list': blank_inv
                        },
                        'Missing investigation': {
                            'counts': len(no_inv),
                            'list': no_inv
                        }
                    }
                })
            except Exception as e:
                logger.info(e)
                print(e)
        elif source == 'MARIANA study_classify':
            data = {
                'data': {
                    **untarget_NMR(),
                    **untarget_LCMS(),
                    **NMR_and_LCMS()
                }
            }
            time_stamp = {
                "created_at": "2020-07-20",
                "updated_at": datetime.today().strftime('%Y-%m-%d')
            }
            res = {**time_stamp, **data}
            file_name = 'study_classify.json'
            file_path = app.config.get('MTBLS_FTP_ROOT') + app.config.get(
                'MARIANA_PATH')
            writeDataToFile(file_path + file_name, res, True)
            return jsonify(res)
        elif source == 'ftp file permission':
            submit, curation, review = file_permission()
            if len(submit) + len(curation) + len(review) == 0:
                return jsonify({'result': 'Nothing to change'})
            else:
                res = {
                    "Change ftp folder access permission": {
                        'Submission studies (770)': submit,
                        'In curation studies (750)': curation,
                        'In review studies (550)': review
                    }
                }
                return jsonify(res)
        elif source == 'test cronjob':
            pass
        else:
            abort(400)
示例#15
0
    def get(self):
        log_request(request)
        parser = reqparse.RequestParser()

        parser.add_argument('studyID', help='Metabolights studyID')
        studyID = None
        if request.args:
            args = parser.parse_args(req=request)
            studyID = args['studyID']
            if studyID:
                studyID = studyID.strip().upper()

        parser.add_argument('kegg_only', help="only return kegg IDs")
        if request.args:
            args = parser.parse_args(req=request)
            kegg = args['kegg_only']
            if not kegg:
                kegg_only = False
            elif kegg and kegg.lower() in ['true', '1']:
                kegg_only = True
            elif kegg and kegg.lower() in ['false', '0']:
                kegg_only = False
            else:
                abort(400)

        # chebiID = []
        # keggID = []
        result = {}
        # if len(request.data.decode('utf-8')) > 0:
        #     try:
        #         data_dict = json.loads(request.data.decode('utf-8'))
        #         chebiID = data_dict['CHEBIID']
        #         keggID = data_dict['KEGGID']
        #     except Exception as e:
        #         logger.info(e)
        #         print(e)
        #         abort(400)

        if studyID:
            uni_organism = uniqueOrganism(studyID)
            if len(uni_organism) > 1:
                res = {org: [] for org in uni_organism}

                # get list of ISA files
                try:
                    assay_file, investigation_file, sample_file, maf_file = getFileList(
                        studyID)
                except:
                    assay_file, investigation_file, sample_file, maf_file = '', '', '', ''
                    print('Fail to load study ', studyID)

                # sample
                sample = get_sample_file(studyID=studyID,
                                         sample_file_name=sample_file)
                sample = sample[['Sample Name', 'Characteristics[Organism]']]
                organisms = list(sample['Characteristics[Organism]'].unique())

                # maf
                from collections import defaultdict
                result = defaultdict(list, {key: [] for key in organisms})
                for maf_name in maf_file:
                    res = maf_reader(studyID, maf_name, sample_df=sample)
                    for i, j in res.items():
                        result[i].extend(j)

                result = dict(result)
            elif len(uni_organism) == 1:
                query = '''SELECT DISTINCT DATABASE_IDENTIFIER FROM MAF_INFO WHERE ACC = '{studyID}' AND (DATABASE_IDENTIFIER <> '') IS NOT FALSE'''.format(
                    studyID=studyID)

                postgresql_pool, conn, cursor = get_connection()
                cursor.execute(query)
                # d= cursor.fetchall()
                ID = [r[0] for r in cursor.fetchall()]
                result = {uni_organism[0]: ID}
            else:
                abort(400)

            for org, ids in result.items():
                pair1 = match_chebi_kegg(
                    [x for x in ids if 'chebi' in x.lower()], [])
                pair2 = match_hmdb_kegg(
                    [x for x in ids if 'hmdb' in x.lower()], [])

                result[org] = {**pair1, **pair2}

        # elif len(chebiID) > 0 or len(keggID) > 0:
        #     result['input_ids'] = match_chebi_kegg(chebiID, keggID)

        if kegg_only:
            try:
                res = {
                    k: [x.lstrip('cpd:').upper() for x in list(v.values())]
                    for k, v in result.items() if len(v) > 0
                }
                result = {}
                for k in res.keys():
                    new_key = get_kegg_organism_abbr(k)
                    result[new_key] = res[k]
                return jsonify(result)
            except:
                return []
        else:
            return jsonify(result)
示例#16
0
    def post(self, study_id, file_name):
        log_request(request)
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_row = data_dict['data']
        except KeyError:
            new_row = None

        if new_row is None:
            abort(417, "Please provide valid data for updated new row(s). The JSON string has to have a 'data' element")

        try:
            for element in new_row:
                element.pop('index', None)  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and TSV file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file name was not found")

        # Validate column names in new rows
        valid_column_name, message = validate_row(file_df, new_row, "post")
        if not valid_column_name:
            abort(417, message)

        if new_row[0]:
            file_df = file_df.append(new_row, ignore_index=True)  # Add new row to the spreadsheet (TSV file)
        else:
            file_df = file_df.append(pd.Series(), ignore_index=True)

        message = write_tsv(file_df, file_name)

        # Get an indexed header row
        df_header = get_table_header(file_df)

        # Get the updated data table
        try:
            df_data_dict = totuples(read_tsv(file_name), 'rows')
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        return {'header': df_header, 'data': df_data_dict, 'message': message}