def get_study_values(request, study_id, table_name, column_name): # FETCH the hard way from a dynamically created query (value_cursor, con) = get_cursor() personObj = BasePerson.factory_on_id(int(study_id)) print("DEBUG", study_id) print("DEBUG", personObj) person_ids = personObj.get_study_person_ids(con)[:10] #def select_values(mapping, personObj, value_cursor) : #""" Selects values from study tables. # Mapping has keys from_table, from_column, optionally from_where_clause, from_where_column, has_date # Returns value_rows with fields id_column_name, from_column, optionally date_value, #mapping = { 'from_table' : table_name, 'from_column' : column_name , 'from_where_clause':None, 'has_date':None, 'from_where_column':None} mapping = {'from_table': table_name, 'from_column': column_name} rows = select_values_from_dict(mapping, personObj, person_ids, value_cursor) value_cursor.close() con.close() # SERIALIZE json_list = list() for row in rows: print("DEBUG: get_study_values() ROW", row) serializer = StudyValueSerializer(row) serialized = serializer.data json_list.append(serialized) return (JsonResponse(json_list, safe=False, status=200)) # JsonResponse application/json
def get_ohdsi_value_by_vocabulary_concept(request, study_id, vocabulary_id, concept_code, table_name): # TODO marked for when we go beyon just picking the first value for each person (cursor, con) = get_cursor() personObj = BasePerson.factory_on_id(int(study_id)) # date_column_name = personObj.get_date_column_for_table(mapping['from_table']) id_column_name = personObj.get_id_field_name() person_ids = personObj.get_study_person_ids(con) json_list = list() for person_id in person_ids: value_row = {} study_person_id = personObj.convert_person_id_to_study(person_id) value_row[id_column_name] = study_person_id tuples = fetch(con, table_name, person_id, vocabulary_id, concept_code) if (len(tuples) > 0): (value_as_number, value_as_string, value_as_concept_id, date) = tuples[0] value_row['value_as_number'] = value_as_number value_row['value_as_string'] = value_as_string value_row['value_as_concept_id'] = value_as_concept_id value_row['date'] = date json_list.append(value_row) #else : # print("no value for ", study_id, vocabulary_id, concept_code, table_name, person_id) cursor.close() con.close() return (JsonResponse(json_list, safe=False, status=200)) # JsonResponse application/json
def get_study_value_by_table_column(request, study_id, table_name, column_name): (cursor, con) = get_cursor() personObj = BasePerson.factory_on_id(int(study_id)) person_ids = personObj.get_study_person_ids(con) mapping_row = {'from_table': table_name, 'from_column': column_name} values = select_values_from_dict(mapping_row, personObj, person_ids, cursor) json_list = list() summary = _summarize_study_values(values, column_name) json_list.append(summary) cursor.close() con.close() return (JsonResponse(json_list, safe=False, status=200)) # JsonResponse application/json
def main(db_name, user_name, study_name, extraction_id): try: conn = psycopg2.connect(database=db_name, user=user_name) (study_id, observation_range_start, observation_range_end, _, _) = get_study_details(conn, study_name) extraction = Extract(conn) person_obj = BasePerson.factory_on_id(study_id) person_ids = person_obj.get_study_person_ids(conn) logger.info("extracting %d persons...", len(list(person_ids))) (melted_rows, column_names) = extraction.rule_driven_melted_extraction( person_ids, extraction_id) wide_rows = extraction.rule_driven_wide_extraction( person_ids, extraction_id) logger.info("...extracted %d persons.", len(list(person_ids))) # # # VERIFY (TODO - resurrect ?) # logg # extraction._verify_extraction_matrix(melted_rows, extraction_id) # # # # STATS # # stat_type = [min, max, avg, n, sum, n_rules] # # long_name -> stat_type -> value # stats = extraction._get_extraction_matrix_stats(melted_rows, extraction_id) # for to_column in stats: # stats[to_column]['avg'] = float(stats[to_column]['sum']) / float(stats[to_column]['n']) # # # if min values is stil maxint, something's fishy: # ### ? logger.warn("min with issues (min == maxint) ...probably a phenotype that doesn't have stats because it has few enough distinct values to fall under the \"instances\" group:") # for (to_column, col_stats) in stats.items(): # if col_stats['min'] == sys.maxsize: # logger.info("bad minimums: %s n:%s sum:%s min/avg/max:%s ", to_column, col_stats['n'], # col_stats['sum'], (col_stats['min'], col_stats['avg'], col_stats['max'])) # # # for (to_column, col_stats) in stats.items(): # if col_stats['min'] != sys.maxsize: # logger.info("ok minimum:%s n:%s sum:%s min/avg/max:%s ", to_column, col_stats['n'], # col_stats['sum'], (col_stats['min'], col_stats['avg'], col_stats['max'])) # # for to_column in stats: # logger.info("STATSs: to_col:%s", to_column) # for (concept, counts) in stats[to_column]['concepts'].items(): # logger.info("STATSs: col:%s vocab:%s term:%s concept:%s counts:%s", to_column, stats[to_column]['vocab'], # stats[to_column]['term'], concept, counts) # PRINT csv_file = open(OUTPUT_BASE + '/' + study_name.lower() + '.csv', 'w+') logger.info("starting to write file %s", csv_file) extraction.print_extraction_header(melted_rows, wide_rows, column_names, csv_file) logger.info("...header in %s", csv_file) na_columns = extraction.print_extraction_data(melted_rows, wide_rows, csv_file, study_name) ##os.close(csv_file) # NA SUMMARY (TODO, don't lose (forget about) this functionality) logger.info("summary:num_columns:%s", len(na_columns)) for (term, count) in na_columns.items(): logger.info("summary %s, %s", term, count) logger.warning("EXTRACT complete") except Exception as e: logger.error("extract main():%s", e) traceback.print_tb(e.__traceback__) raise e conn.commit() conn.close()