def LinearRegression_score(body):
    if 'X' in body['paras'].keys() and isinstance(body['paras']['X'], str):
        file_name = body['paras']['X']
        body['paras']['X'] = file.read_csv(file_name)

    if 'y' in body['paras'].keys() and isinstance(body['paras']['y'], str):
        file_name = body['paras']['y']
        body['paras']['y'] = file.read_csv(file_name)

    try:
        obj = load_obj(os.path.join('.', 'LinearRegression_constructor.npy'))
        res = obj.score(**body['paras'])
        save_obj(obj, 'LinearRegression')
    except Exception as e:
        return jsonify({'Error': str(e)})

    return jsonify({'return': str(res)})
        return np.sum(r / np.log2(np.arange(2, r.size + 2)))
    return 0.


# calculate nDCG
def ndcg_at_k(r, k):
    dcg_max = dcg_at_k(sorted(r, reverse=True), k)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k) / dcg_max


# load ranked data
# Creatinine
data = ufile.read_csv(
    "/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q1.csv"
)[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q2.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q3.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q4.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q5.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q6.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q7.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q8.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q9.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q10.csv")[1:]
# HbA1c

# Glucose

# no lab
示例#3
0
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/NoLab_top10mix_Q9.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/NoLab_top10mix_Q10.csv')

#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q1.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q2.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q3.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q4.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q5.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q6.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q7.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q8.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q9.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q10.csv')

data = ufile.read_csv(
    '/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q1.csv'
)
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q2.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q3.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q4.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q5.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q6.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q7.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q8.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q9.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q10.csv')

# load post-processed exp
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_exp/post_exp/Glucose_top10mix_post_Q1.csv")
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_exp/post_exp/Glucose_top10mix_post_Q2.csv")
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_exp/post_exp/Glucose_top10mix_post_Q3.csv")
示例#4
0
log = slogger('etacts-ext')

cache = Cache(
    config={
        'CACHE_TYPE': 'filesystem',
        'CACHE_DEFAULT_TIMEOUT': 86400,
        'CACHE_THRESHOLD': 500,
        'CACHE_DIR': 'app/resources/cache'
    })
'''
load the tag-to-semantic_category relationships
'''

# tag - semantic type
cvocab = ufile.read_csv('app/resources/cvocab.csv')
if cvocab is None:
    log.error('impossible to load the controlled vocabulary - interrupting')
    sys.exit()
type2tag = {}
tag2type = {}
for tag in cvocab:
    ptag = tag[0].strip()
    tkn = tag[1].replace('[', '').replace(']', '').split('\',')
    if len(tkn) > 3:
        continue
    ltype = set()
    for t in tkn:
        # type-to-tag
        typ = t.replace('\'', '').strip()
        ltag = type2tag.setdefault(typ, set())
示例#5
0
from flask.ext.cache import Cache
from log import slogger
import file as ufile
import sys

log = slogger ('etacts-ext')

cache = Cache(config={'CACHE_TYPE': 'filesystem', 'CACHE_DEFAULT_TIMEOUT': 86400, 'CACHE_THRESHOLD': 500, 'CACHE_DIR': 'app/resources/cache'})

'''
load the tag-to-semantic_category relationships
'''

# tag - semantic type
cvocab = ufile.read_csv ('app/resources/cvocab.csv')
if cvocab is None:
    log.error ('impossible to load the controlled vocabulary - interrupting')
    sys.exit()
type2tag = {}
tag2type = {}
for tag in cvocab:
    ptag = tag[0].strip()
    tkn = tag[1].replace('[','').replace(']','').split('\',')
    if len(tkn) > 3:
        continue
    ltype = set()
    for t in tkn:
        # type-to-tag
        typ = t.replace('\'','').strip()
        ltag = type2tag.setdefault(typ, set())
        ltag.add(ptag)
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Glucose_top10mix_sp_Q8.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Glucose_top10mix_sp_Q9.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Glucose_top10mix_sp_Q10.csv")[1:]

# load data: CE Glucose
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q1.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q2.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q3.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q4.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q5.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q6.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q7.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q8.csv")[1:]
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q9.csv")[1:]
data = ufile.read_csv(
    "/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q10.csv"
)[1:]

# extract id and question
id = [d[0] for d in data if d[0] is not '']
candidates = [d[1] for d in data if d[1] is not '']
# extract lab-value statement for 3 lab tests
glu_exp = [d[2] if d[2] is not '' else '[]' for d in data]
del glu_exp[1]
a1c_exp = [d[3] if d[3] is not '' else '[]' for d in data]
del a1c_exp[1]
cre_exp = [d[4] for d in data if d[4] is not '']
cre_exp = [d[4] if d[4] is not '' else '[]' for d in data]
del cre_exp[1]
# extract vector-space similarities
tfidf = [float(d[5]) for d in data if d[5] is not '']
import re, nltk, string
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import file as ufile
import pandas as pd

data = ufile.read_csv(
    "/Users/luyu/Downloads/ValX_demo/diabetes_criteria_test_20words_3lab20words.csv"
)

# extract lab-value statement for 3 lab tests
q_id = [d[0] for d in data]
q_inc = [d[1] for d in data]
q_ques = [d[2] for d in data]
q_fom = [d[3] for d in data]
glu_exp = [d[4] for d in data]
a1c_exp = [d[5] for d in data]
cre_exp = [d[6] for d in data]

stop = "gm|%,|hr|hrs|min|mins|minute|minutes|hour|hours|okay hour|day|days|week|weeks|month|months|yr|yrs|year|years".split(
    "|")
#for d in glu_exp:print(d)

glu_exp_n = []
cre_exp_n = []

for c in glu_exp:
    ch = ''.join([cc for cc in c])
    ch = ch.replace('[', '')
    ch = ch.replace(']', '')
    ch = ch.replace('"', '')
示例#8
0
import re, nltk, string
from nltk.corpus import stopwords
import file as ufile
import pandas as pd

# define a group of impossible units
stop = "gm|hr|hrs|min|mins|minute|minutes|hour|hours|okay hour|day|days|week|weeks|month|months|yr|yrs|year|years".split("|")

data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_exp/exp/Glucose_top10mix_exp_Q3.csv")
#ini = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_top10mix_csv/Glucose_top10mix_Q1.csv")

glu_exp = [d[2] for d in data]

out = []
# remove expressions containing impossible units
for d in data:
    #print(d[0], d[2], d[3], d[4])
    for s in stop:
        if s in d[2]:
            d[2] = '[' + d[2].split(s)[1][:-2] +']'
            d[2] = d[2].replace("s,',", "")
    out.append([d[0], d[1], d[2], d[3], d[4]])
    #print(d[0], d[2])

ufile.write_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_exp/post_exp/test.csv", out)
            elif float(seg[2]) > 107.0:
                lab_range.append(3)
    return min_max_normalization(lab_range)


# load data: Creatinine
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q1.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q2.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q3.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q4.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q5.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q6.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q7.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q8.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q9.csv')
candidates_d = ufile.read_csv(
    '/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q10.csv')

# load data: HbA1c
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q1.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q2.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q3.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q4.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q5.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q6.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q7.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q8.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q9.csv')
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q10.csv')

# load data: Glucose
#candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/glucose/glucose_Q1.csv')
import file as ufile
from random import sample
import pandas as pd
import re

# sample questions with 5-20 word length
data = ufile.read_csv(
    '/Users/luyu/Downloads/ValX_demo/diabetes_criteria_test.csv')
d_20w = []

for d in data:
    d[1] = d[1].replace('\n', '')
    if 5 <= len(d[1].split()) <= 20:
        d_20w.append(d)

# write data
#ufile.write_csv('/Users/luyu/Documents/Master Thesis/diabetes_criteria_test_5to20words.csv', d_20w)

# count valx-parsed questions with expressions
box = ufile.read_csv(
    '/Users/luyu/Documents/Master Thesis/diabetes_criteria_test_5to20words_parsed.csv'
)

q_id = [d[0] for d in box]
txt = [d[2] for d in box]
glu = [d[4] for d in box]
a1c = [d[5] for d in box]
cre = [d[6] for d in box]

with_exp = []
no_exp = []
示例#11
0
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/glucose/glucose_Q10.csv')

# load data: HbA1c
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q1.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q2.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q3.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q4.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q5.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q6.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q7.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q8.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q9.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q10.csv')

# load data: Creatinine
data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q1.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q2.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q3.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q4.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q5.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q6.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q7.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q8.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q9.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q10.csv')

# load data: no lab
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/noLab/noLab_Q1.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/noLab/noLab_Q2.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/noLab/noLab_Q3.csv')
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/noLab/noLab_Q4.csv')
示例#12
0
        cat_pos_tag.append(
            len([ff[1] for ff in nltk.pos_tag(f.split()) if 'VB' in ff[1]]))
    return min_max_normalization(cat_pos_tag)


# get number of adjectives
def num_of_adj(cat):
    cat_pos_tag = []
    for f in cat:
        cat_pos_tag.append(
            len([ff[1] for ff in nltk.pos_tag(f.split()) if 'JJ' in ff[1]]))
    return min_max_normalization(cat_pos_tag)


# load data
creatinine_d = ufile.read_csv(
    '/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q1.csv')
#creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q2.csv')
#creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q3.csv')
#creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q4.csv')
#creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q5.csv')
#creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q6.csv')
#creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q7.csv')
#creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q8.csv')
#creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q9.csv')
#creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q10.csv')

#hba1c_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q1.csv')
#hba1c_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q2.csv')
#hba1c_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q3.csv')
#hba1c_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q4.csv')
#hba1c_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q5.csv')
            elif 0.84 <= float(seg[2]) <= 1.21:
                lab_range.append(2)
            elif 1.21 < float(seg[2]) < 20.0:
                lab_range.append(3)
            elif 20.0 < float(seg[2]) < 74.3:
                lab_range.append(1)
            elif 74.3 <= float(seg[2]) <= 107.0:
                lab_range.append(2)
            elif float(seg[2]) > 107.0:
                lab_range.append(3)
            else:
                lab_range.append(0)
    return lab_range

# load data
pool = ufile.read_csv('/Users/luyu/Documents/Master Thesis/Question_Pool.csv')
#del pool[0]

# extract expression for 3 lab
id = [d[0] for d in pool]
txt = [d[1] for d in pool]
glu_exp = [d[2] for d in pool]
a1c_exp = [d[3] for d in pool]
cre_exp = [d[4] for d in pool]

# post-processing glucose results
glu_exp_n = []
for c in glu_exp:
    ch = ''.join([cc for cc in c])
    ch = ch.replace('[', '')
    ch = ch.replace(']', '')