Exemplo n.º 1
0
def get_study_values(request, study_id, table_name, column_name):
    # FETCH the hard way from a dynamically created query

    (value_cursor, con) = get_cursor()

    personObj = BasePerson.factory_on_id(int(study_id))
    print("DEBUG", study_id)
    print("DEBUG", personObj)
    person_ids = personObj.get_study_person_ids(con)[:10]
    #def select_values(mapping, personObj,  value_cursor) :
    #""" Selects values from study tables.
    #    Mapping has keys from_table, from_column, optionally from_where_clause, from_where_column, has_date
    #    Returns value_rows with fields id_column_name, from_column, optionally date_value,
    #mapping = { 'from_table' : table_name, 'from_column' : column_name , 'from_where_clause':None, 'has_date':None, 'from_where_column':None}
    mapping = {'from_table': table_name, 'from_column': column_name}
    rows = select_values_from_dict(mapping, personObj, person_ids,
                                   value_cursor)
    value_cursor.close()
    con.close()

    # SERIALIZE
    json_list = list()
    for row in rows:
        print("DEBUG: get_study_values() ROW", row)
        serializer = StudyValueSerializer(row)
        serialized = serializer.data
        json_list.append(serialized)
    return (JsonResponse(json_list, safe=False,
                         status=200))  # JsonResponse application/json
Exemplo n.º 2
0
def get_ohdsi_value_by_vocabulary_concept(request, study_id, vocabulary_id,
                                          concept_code, table_name):

    # TODO marked for when we go beyon just picking the first value for each person
    (cursor, con) = get_cursor()
    personObj = BasePerson.factory_on_id(int(study_id))
    # date_column_name = personObj.get_date_column_for_table(mapping['from_table'])
    id_column_name = personObj.get_id_field_name()
    person_ids = personObj.get_study_person_ids(con)
    json_list = list()
    for person_id in person_ids:
        value_row = {}
        study_person_id = personObj.convert_person_id_to_study(person_id)
        value_row[id_column_name] = study_person_id
        tuples = fetch(con, table_name, person_id, vocabulary_id, concept_code)
        if (len(tuples) > 0):
            (value_as_number, value_as_string, value_as_concept_id,
             date) = tuples[0]
            value_row['value_as_number'] = value_as_number
            value_row['value_as_string'] = value_as_string
            value_row['value_as_concept_id'] = value_as_concept_id
            value_row['date'] = date
            json_list.append(value_row)

        #else :
        #    print("no value for ", study_id, vocabulary_id, concept_code, table_name, person_id)
    cursor.close()
    con.close()
    return (JsonResponse(json_list, safe=False,
                         status=200))  # JsonResponse application/json
Exemplo n.º 3
0
def get_study_value_by_table_column(request, study_id, table_name,
                                    column_name):
    (cursor, con) = get_cursor()
    personObj = BasePerson.factory_on_id(int(study_id))
    person_ids = personObj.get_study_person_ids(con)
    mapping_row = {'from_table': table_name, 'from_column': column_name}
    values = select_values_from_dict(mapping_row, personObj, person_ids,
                                     cursor)
    json_list = list()
    summary = _summarize_study_values(values, column_name)
    json_list.append(summary)
    cursor.close()
    con.close()
    return (JsonResponse(json_list, safe=False,
                         status=200))  # JsonResponse application/json
Exemplo n.º 4
0
def main(db_name, user_name, study_name, extraction_id):
    try:
        conn = psycopg2.connect(database=db_name, user=user_name)
        (study_id, observation_range_start, observation_range_end, _,
         _) = get_study_details(conn, study_name)
        extraction = Extract(conn)

        person_obj = BasePerson.factory_on_id(study_id)
        person_ids = person_obj.get_study_person_ids(conn)

        logger.info("extracting %d persons...", len(list(person_ids)))
        (melted_rows, column_names) = extraction.rule_driven_melted_extraction(
            person_ids, extraction_id)
        wide_rows = extraction.rule_driven_wide_extraction(
            person_ids, extraction_id)
        logger.info("...extracted %d persons.", len(list(person_ids)))

        #
        #    # VERIFY (TODO - resurrect ?)
        #    logg
        #    extraction._verify_extraction_matrix(melted_rows, extraction_id)
        #
        #
        #    # STATS
        #    # stat_type = [min, max, avg, n, sum, n_rules]
        #    # long_name -> stat_type -> value
        #    stats = extraction._get_extraction_matrix_stats(melted_rows, extraction_id)
        #    for to_column in stats:
        #        stats[to_column]['avg'] = float(stats[to_column]['sum']) / float(stats[to_column]['n'])
        #
        #    # if min values is stil maxint, something's fishy:
        #    ### ? logger.warn("min with issues (min == maxint) ...probably a phenotype that doesn't have stats because it has few enough distinct values to fall under the \"instances\" group:")
        #    for (to_column, col_stats) in stats.items():
        #        if col_stats['min'] == sys.maxsize:
        #            logger.info("bad minimums: %s n:%s sum:%s min/avg/max:%s ", to_column, col_stats['n'],
        #                    col_stats['sum'], (col_stats['min'], col_stats['avg'], col_stats['max']))
        #
        #
        #    for (to_column, col_stats) in stats.items():
        #        if col_stats['min'] != sys.maxsize:
        #            logger.info("ok minimum:%s n:%s sum:%s min/avg/max:%s ", to_column, col_stats['n'],
        #                    col_stats['sum'], (col_stats['min'], col_stats['avg'], col_stats['max']))
        #
        #    for to_column in stats:
        #        logger.info("STATSs: to_col:%s", to_column)
        #        for (concept, counts) in stats[to_column]['concepts'].items():
        #            logger.info("STATSs: col:%s vocab:%s term:%s concept:%s counts:%s", to_column, stats[to_column]['vocab'],
        #                stats[to_column]['term'], concept, counts)

        # PRINT
        csv_file = open(OUTPUT_BASE + '/' + study_name.lower() + '.csv', 'w+')
        logger.info("starting to write file %s", csv_file)
        extraction.print_extraction_header(melted_rows, wide_rows,
                                           column_names, csv_file)
        logger.info("...header in %s", csv_file)
        na_columns = extraction.print_extraction_data(melted_rows, wide_rows,
                                                      csv_file, study_name)
        ##os.close(csv_file)

        # NA SUMMARY (TODO, don't lose (forget about) this functionality)
        logger.info("summary:num_columns:%s", len(na_columns))
        for (term, count) in na_columns.items():
            logger.info("summary %s, %s", term, count)
        logger.warning("EXTRACT complete")

    except Exception as e:
        logger.error("extract main():%s", e)
        traceback.print_tb(e.__traceback__)
        raise e

    conn.commit()
    conn.close()