Пример #1
0
def get_order_type(dataset_id, ordertype):
    data = dt.get(dataset_id).aslist()
    #pat_ids = [int(d['PAT_ID'] for d in data])
    #if pat_ids == PAT_ID:
    filtered_data = [x for x in data if x['ORDER_CATALOG_TYPE'] == ordertype ] 
    filtered_data.sort(key=lambda x: to_data_time(x['ORDER_DTM']), reverse=True)
    return filtered_data
Пример #2
0
def calc_diff(id1, id2, direction, ops):
    """
  Calculate a detailed difference between two tables
  :param id1:
  :param id2:
  :param direction:
  :param ops:
  :return:
  """
    ds1 = dataset.get(id1)
    ds2 = dataset.get(id2)

    def stratify_matrix(m):
        # NOTE: ids must match with the hard-coded ones in taco/src/data_set_selector.ts -> prepareTCGAData()
        row_strat = dataset.get(m.id + '4CnmfClustering')
        col_strat = dataset.get(m.id + 'TreeClusterer1')

        data = m.asnumpy()
        rows = m.rows() if row_strat is None else row_strat.rows()
        cols = m.cols() if col_strat is None else col_strat.rows()

        if row_strat is not None:
            rowids = list(m.rowids())
            row_indices = [rowids.index(o) for o in row_strat.rowids()]
            data = data[row_indices, ...]

        if col_strat is not None:
            colids = list(m.colids())
            col_indices = [colids.index(o) for o in col_strat.rowids()]
            data = data[..., col_indices]

        return Table(rows, cols, data)

    # create the table object
    table1 = stratify_matrix(ds1)
    table2 = stratify_matrix(ds2)
    diff_finder = DiffFinder(table1, table2, ds1.rowtype, ds2.coltype,
                             direction)
    t2 = timeit.default_timer()
    d = diff_finder.generate_diff(ops)
    t3 = timeit.default_timer()
    _log.debug("TIMER: time to generate diff", t3 - t2)
    if isinstance(d, Diff):
        d.add_union(diff_finder.union)
    return d
Пример #3
0
def get_info(ids, dataset_id):
    data = dt.get(dataset_id).aslist()
    # find the first entry for each patient
    result = {}
    for id in ids:
        result[id] = []
    for row in data:
        if int(row['PAT_ID']) in ids:
            result[int(row['PAT_ID'])].append(row)
    return result
Пример #4
0
    def stratify_matrix(m):
        # NOTE: ids must match with the hard-coded ones in taco/src/data_set_selector.ts -> prepareTCGAData()
        row_strat = dataset.get(m.id + '4CnmfClustering')
        col_strat = dataset.get(m.id + 'TreeClusterer1')

        data = m.asnumpy()
        rows = m.rows() if row_strat is None else row_strat.rows()
        cols = m.cols() if col_strat is None else col_strat.rows()

        if row_strat is not None:
            rowids = list(m.rowids())
            row_indices = [rowids.index(o) for o in row_strat.rowids()]
            data = data[row_indices, ...]

        if col_strat is not None:
            colids = list(m.colids())
            col_indices = [colids.index(o) for o in col_strat.rowids()]
            data = data[..., col_indices]

        return Table(rows, cols, data)
Пример #5
0
def get_first_info(PAT_ID):
    data = dt.get('Orders').aslist()
    # find the first entry for each patient
    first_ent = []
    first_time = pd.datetime.today()
    for row in data:
        if row['PAT_ID'] == PAT_ID:
            if to_data_time(row['ORDER_DTM']) < first_time:
                first_ent = row
                first_time = to_data_time(row['ORDER_DTM'])

    return first_ent
Пример #6
0
def get_first_info(PAT_ID):
    data = dt.get('PRO').aslist()
    # find the first entry for each patient
    first_ent = []
    first_time = pd.datetime.today()
    for row in data:
        if row['PAT_ID'] == PAT_ID:
            if to_date_time(row['ASSESSMENT_START_DTM']) < first_time:
                first_ent = row
                first_time = to_date_time(row['ASSESSMENT_START_DTM'])

    return first_ent
Пример #7
0
def get_latest_info():
    data = dt.get('Demo').aslist()
    pat_ids = set([int(d['PAT_ID']) for d in data])

    pats = []
    pat_weights = []
    for id in pat_ids:
        temp_pat = []
        for row in data:
            if row['PAT_ID'] == id:
                temp_pat.append(row)
        temp_pat.sort(key=lambda x: to_date_time(x['ADM_DATE']), reverse=True)
        pats.append(temp_pat[0])
        pat_weights.append(
            [t['WEIGHT_KG'] for t in temp_pat if t['WEIGHT_KG']])

    return jsonify({'rows': pats, 'WEIGHT_KG': pat_weights})
Пример #8
0
def get_similarity_score(PAT_ID, dataset_id):
    data = dt.get(dataset_id).aslist()
    pat_ids = set([int(d['PAT_ID']) for d in data])

    # find the first entry for each patient
    pats = {}
    #for id in pat_ids:
    #    pats[id] = get_first_info(id)

    for row in data:
        pats[int(row['PAT_ID'])] = row

    scores = {}
    target_pat = pats[PAT_ID]

    for id in pat_ids:
        similarity_score = 0
        if id == PAT_ID:  ## NOTE!
            continue
        curr_pat = pats[id]
        for h in list(weights_Demo.keys()):
            if weights_Demo[h] == 0:
                continue
            elif not curr_pat[h] or not target_pat[h]:
                similarity_score += 0.5 * weights_Demo[h]

            elif h in categorical_data:
                similarity_score += weights_Demo[h] if target_pat[
                    h] == curr_pat[h] else 0
            elif h in numerical_data:
                similarity_score += weights_Demo[h] if target_pat[
                    h] == curr_pat[h] else 0  ## within range
            elif h in years:  ## age! ## within range
                similarity_score += weights_Demo[h] if float(
                    extract_year(target_pat[h])) == float(
                        extract_year(curr_pat[h])) else 0
            else:
                similarity_score += weights_Demo[h] if target_pat[
                    h] == curr_pat[h] else 0

        scores[id] = similarity_score

    return scores
Пример #9
0
def get_info_by_functions(id):
    my_data = dt.get(id)
    #range = rng.RangeElem(2, 5, 1)

    return jsonify({
        'id': id,
        'my_data': my_data,
        'rows': my_data.rows(),
        'data': my_data.asjson(),
        'rowids': my_data.rowids(),
        'aslist': my_data.aslist(),
        'aspandas': my_data.aspandas(),
        'columns': my_data.columns,
        'CSVEntry.to_description': my_data.to_description(),
        'CSVEntry.idtypes': my_data.idtypes()  #,
        #'asList( range(2,5,1) )': my_data.aslist(range),
        #'rows( range(2,5,1) )': my_data.rows(range),
        #'aspandas( range(2,5,1) )': my_data.aspandas(range)
    })
Пример #10
0
def get_all_info_for_pat(dataset_id, PAT_ID):
    my_data = dt.get(dataset_id)

    info_rows = []
    indices = []
    for i in range(0, len(my_data.rows())):
        if int(my_data.aspandas()['PAT_ID'][my_data.rows()[i]]) == int(PAT_ID):
            indices.append(i)
            info_rows.append(my_data.aslist()[i])

    if dataset_id == 'Demo':
        info_rows.sort(key=lambda r: to_date_time(r["ADM_DATE"]))
    elif dataset_id == 'PRO':
        info_rows.sort(key=lambda r: to_date_time(r["ASSESSMENT_START_DTM"]))
    elif dataset_id == 'PT':
        info_rows.sort(key=lambda r: to_date_time(r["ADM_DATE"]))
    elif dataset_id == 'VAS':
        info_rows.sort(key=lambda r: to_date_time(r["RECORDED_TIME"]))

    return info_rows
Пример #11
0
def get_stat(dataset):
    my_data = dt.get(dataset_hash['Demo'][dataset])
    data = my_data.aslist()
    length = 0
    gender = [0, 0]
    bmi = [0, 0, 0, 0, 0, 0, 0]
    age = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    today = pd.datetime.today()

    for row in data:
        length += 1
        if row['PAT_GENDER'] == 'F':
            gender[0] += 1
        elif row['PAT_GENDER'] == 'M':
            gender[1] += 1
        if not row['BMI']:
            bmi[0] += 1
        elif row['BMI'] <= 18:
            bmi[1] += 1
        elif row['BMI'] > 30:
            bmi[6] += 1
        else:
            bmi[int(math.ceil((row['BMI'] - 15) / 3))] += 1

        pat_age = int(
            (today - to_date_time(row['PAT_BIRTHDATE'])).days / 365.25)
        if pat_age <= 10:
            age[0] += 1
        elif pat_age > 100:
            age[10] += 1
        else:
            age[int(math.floor((pat_age - 1) / 10))] += 1

    return jsonify({
        'length': length,
        'GENDER': gender,
        'BMI': bmi,
        'AGE': age
    })
Пример #12
0
def get_data(id):
    return jsonify({'data': dt.get(id)})
Пример #13
0
def get_similarity_score(PAT_ID, dataset_id):
    data = dt.get(dataset_id).aslist()

    # divide data to 3 groups
    # find the first entry for each patient in each group

    pat_ids = set([int(d['PAT_ID']) for d in data])
    groups = {
        'PROMIS Bank v1.2 - Physical Function': {},
        'Oswestry Index (ODI)': {},
        'PROMIS Bank v1.0 - Depression': {}
    }

    for row in data:
        promisType = row['FORM']
        if int(row['PAT_ID']) in groups[promisType]:
            if to_date_time(groups[promisType][row['PAT_ID']]
                            ['ASSESSMENT_START_DTM']) > to_date_time(
                                row['ASSESSMENT_START_DTM']):
                groups[promisType][int(row['PAT_ID'])] = row
        else:
            groups[promisType][int(row['PAT_ID'])] = row

    physical = groups['PROMIS Bank v1.2 - Physical Function'][int(PAT_ID)]
    oswerstry = groups['Oswestry Index (ODI)'][int(PAT_ID)]
    depression = groups['PROMIS Bank v1.0 - Depression'][int(PAT_ID)]
    scores = {}

    for id in pat_ids:

        if id == int(PAT_ID):
            continue

        # only promis score
        similarity_score = 0
        if physical and id in groups['PROMIS Bank v1.2 - Physical Function']:
            similarity_score += weights_PRO['SCORE'] if abs(
                int(physical['SCORE']) -
                int(groups['PROMIS Bank v1.2 - Physical Function'][id]
                    ['SCORE'])) <= 10 else 0  ## within range of 10
        else:
            similarity_score += weights_PRO['SCORE'] * 0.5

        if oswerstry and id in groups['Oswestry Index (ODI)']:
            similarity_score += weights_PRO['SCORE'] if abs(
                int(oswerstry['SCORE']) -
                int(groups['Oswestry Index (ODI)'][id]['SCORE'])
            ) <= 10 else 0  ## within range of 10
        else:
            similarity_score += weights_PRO['SCORE'] * 0.5

        if depression and id in groups['PROMIS Bank v1.0 - Depression']:
            similarity_score += weights_PRO['SCORE'] if abs(
                int(depression['SCORE']) -
                int(groups['PROMIS Bank v1.0 - Depression'][id]['SCORE'])
            ) <= 10 else 0  ## within range of 10
        else:
            similarity_score += weights_PRO['SCORE'] * 0.5

        scores[id] = similarity_score

    return scores
Пример #14
0
def get_col_by_name(id, col_name):
    my_data = dt.get(id)
    cols = my_data.aspandas()

    return jsonify({'cols': cols[col_name]})
Пример #15
0
def get_col_titles(id):
    my_data = dt.get(id)
    return jsonify({'cols': my_data.columns})
Пример #16
0
def get_filteres_orders(dataset_id, order):
    data = dt.get(dataset_id).aslist()
    filtered_data = [x for x in data if x['PRIMARY_MNEMONIC'].upper() == order ] 
    filtered_data.sort(key=lambda x: to_data_time(x['ORDER_DTM']), reverse=True)
    return filtered_data
Пример #17
0
def get_all_rows(dataset_id):
    my_data = dt.get(dataset_id)
    rows = my_data.aslist()
    return jsonify({dataset_id: rows[:50]})
Пример #18
0
def get_row_by_index(id, index):
    my_data = dt.get(id)
    rows = my_data.aslist()

    return jsonify({'row': rows[int(index)]})