def add_unique_id_to_net_sup_edges(path_net_sup_edges): """Input: File path of network supplement edges. Output: Adds a unique ID column to the network supplement edges file.""" pids_from_subs_ids = w.get_data_from_one_col_as_list( c.SUBJECTS_IDS_PATH, c.LABEL_PID) unique_ids_from_subs_ids = w.get_data_from_one_col_as_list( c.SUBJECTS_IDS_PATH, c.LABEL_UNIQUE_ID) net_sup_edges = w.get_csv_as_list(path_net_sup_edges) net_sup_edges_data = net_sup_edges[1:] net_sup_edges_cols = net_sup_edges[:1][0] net_sup_edges_cols.append(c.LABEL_UNIQUE_ID) # make temp file to add unique ID column w.create_csv_add_column_labels(c.TEMP_FILE_PATH, net_sup_edges_cols) pid_index_net_sup_edges = w.get_index_of_file_col(path_net_sup_edges, c.LABEL_SENDER_PID) for row in net_sup_edges_data: # get index of this row's project ID in the subjects_ids.csv subs_ids_index = pids_from_subs_ids.index(row[pid_index_net_sup_edges]) unique_id = unique_ids_from_subs_ids[subs_ids_index] row.append(unique_id) # append the matching unique ID to the row and to the temp file w.append_row_to_csv(c.TEMP_FILE_PATH, row) w.rename_csv( c.TEMP_FILE_PATH, path_net_sup_edges) # replace the previous file with the temp file
def create_p1_p2_overlaps(): """Input: None. Output: Create the overlap file with entries for every participant who was present in both p1_screenings and p2_network_interviews; populates it with unique id, project id, rds id.""" path_screenings = c.P1_SCREENINGS_PATH path_network = c.P2_NET_SUPS_EXTRACT_PATH path_nodes = c.OLD_NODES_PATH path_overlap = c.P1_P2_OVERLAPS_PATH pid, rds, unique = c.LABEL_PID, c.LABEL_RDS_ID, c.LABEL_UNIQUE_ID w.create_csv_add_column_labels(path_overlap, [pid, rds, unique]) # get rds_ids from p1_screenings and unique_ids from p2_network_interviews screening_rds_id_data = w.get_data_from_one_col_as_list(path_screenings, rds) net_unique_id_data = w.get_data_from_one_col_as_list(path_network, unique) # makes dictionaries from unique_ids to rds_ids and from unique_ids to pids, extracted from the old nodes files unique_to_rds_dict = w.get_no_null_entries_dict_from_csv(path_nodes, unique, rds) unique_to_pid_dict = w.get_no_null_entries_dict_from_csv(path_nodes, unique, pid) for unique_id, rds_id in unique_to_rds_dict.items(): # for every unique_id/rds_id pair that exists in whole dataset try: # try to remove the unique_id from p2_network_interviews net_unique_id_data.remove(unique_id) except ValueError: # if it does not exist in p2_netowrk_interviews, continue # do not add it to the overlap file and try the next unique_id/rds_id pair try: # try to remove the rds_id from p1_screenings screening_rds_id_data.remove(rds_id) except ValueError: # if it does not exist in p1_screenings, continue # do not add it to the overlap file and try the next unique_id/rds_id pair p_id = unique_to_pid_dict[unique_id] # if unique_id/rds_id exist in p2/p1 respectively, get the associated pid row = [p_id, rds_id, unique_id] w.append_row_to_csv(path_overlap, row) # add this p1/p2 overlap row of ids to the overlap file
def create_rds_edges_csv(path_rds_edges, old_edge_data, sender_index, receiver_index, rds_index): """Input: rds_edges.csv file path; data from the old edge file; sender, receiver, and rds edge index in that file; edge column label. Output: Creates the rds_edges.csv file and populates it with all rds edge connections.""" w.create_csv_add_column_labels(path_rds_edges, [c.LABEL_EDGE_ID]) for row in old_edge_data: # for each row from the old edge file, if row[rds_index] == 'Yes': # if the column RDS Edge reads Yes, edge_id = row[sender_index] + row[receiver_index] w.append_row_to_csv(path_rds_edges, [edge_id]) # and add edge_id to the rds_edges table
def create_network_edges_csv(path_net_edges, old_edge_data, sender_index, receiver_index, net_index): """Input: net_edges.csv file path; data from the old edge file; sender_receiver to edge_id dictionary; sender, receiver, and network supplement index from old edges file; edge column label. Output: Creates the network_edges.csv file and populates it with all network supplement edge connections.""" w.create_csv_add_column_labels(path_net_edges, [c.LABEL_EDGE_ID]) for row in old_edge_data: # for each row from the old edge file, if row[net_index] == 'Yes': # if the column Net Supplement reads Yes, edge_id = row[sender_index] + row[receiver_index] w.append_row_to_csv(path_net_edges, [edge_id]) # and add edge_id to the network_edges table
def create_edges_csv(path_edges, old_edge_data, sender_index, receiver_index): """Input: edges.csv file path; data from the old edge file; sender and receiver index in that file. Output: Creates edges.csv file and populates it with all edge connections.""" w.create_csv_add_column_labels(path_edges, [c.LABEL_EDGE_ID, c.LABEL_SENDER_PID, c.LABEL_RECEIVER_PID]) for i in range(len(old_edge_data)): row = old_edge_data[i] # get each row from the old edge file w.append_row_to_csv(path_edges, [row[sender_index] + row[receiver_index], # and append the relevant data row[sender_index], row[receiver_index]]) # to the new file
def make_net_sup_extract_file(path_old_net, path_net_sup_extract): """Input: File path of deprecated network supplement and new net sup extract. Output: Extracts P2SF and P2NS1 columns from deprecated net sup, gets rds ids, and makes net sup extract file.""" # make unique id column in deprecated net sup file w.add_merged_col_to_csv(path_old_net, c.LABEL_UNIQUE_ID, c.NETWORK_HCV_HIV_COLS_TO_MERGE) unique_ids_from_subs_ids = w.get_data_from_one_col_as_list( c.SUBJECTS_IDS_PATH, c.LABEL_UNIQUE_ID) unique_ids_from_depr_net_sup = w.get_data_from_one_col_as_list( path_old_net, c.LABEL_UNIQUE_ID) subs_ids_data = w.get_csv_as_list(c.SUBJECTS_IDS_PATH)[1:] depr_net_sup_data = w.get_csv_as_list(path_old_net)[1:] w.create_csv_add_column_labels( path_net_sup_extract, [c.LABEL_PID, c.LABEL_P2SF, c.LABEL_P2NS1, c.LABEL_UNIQUE_ID]) pid_index_subs_ids = w.get_index_of_file_col(c.SUBJECTS_IDS_PATH, c.LABEL_PID) P2SF_index_depr_net_sup = w.get_index_of_file_col(path_old_net, c.LABEL_P2SF) P2NS1_index_depr_net_sup = w.get_index_of_file_col(path_old_net, c.LABEL_P2NS1) for i in range(len(unique_ids_from_depr_net_sup)): unique_id = unique_ids_from_depr_net_sup[i] try: # try to get the row index of this unique id in the subjects ids file subs_ids_row_index = unique_ids_from_subs_ids.index(unique_id) # get that row from subjects ids and get its rds id column entry pid = subs_ids_data[subs_ids_row_index][pid_index_subs_ids] # get current row from depr_net_sup to fetch its P2SF and P2NS1 columns entries P2SF = depr_net_sup_data[i][P2SF_index_depr_net_sup] P2NS1 = depr_net_sup_data[i][P2NS1_index_depr_net_sup] # append the new row to the net_sup_extract file w.append_row_to_csv(path_net_sup_extract, [pid, P2SF, P2NS1, unique_id]) except ValueError: print(unique_id + " not found in subjects_ids.csv") pass
def create_sub_ids_csv(): """Input: None. Output: Creates subjects_ids.csv file and populates it with project_ids, rds_ids, and unique_ids.""" path_sub_ids, path_nodes = c.SUBJECTS_IDS_PATH, c.OLD_NODES_PATH nodes_data = w.get_csv_as_list(path_nodes)[1:] # data of nodes csv as list nodes_pid_index = w.get_index_of_file_col(path_nodes, c.LABEL_PID) nodes_rds_index = w.get_index_of_file_col(path_nodes, c.LABEL_RDS_ID) nodes_unique_index = w.get_index_of_file_col(path_nodes, c.LABEL_UNIQUE_ID) w.create_csv_add_column_labels( path_sub_ids, [c.LABEL_PID, c.LABEL_RDS_ID, c.LABEL_UNIQUE_ID]) # make sure that the data is put into the correct column in subjects_ids # put project_id, rds_id, and unique_id from the old nodes table into the new subjects_ids table for row in nodes_data: for i in range(len(row)): if row[i] in c.NO_ENTRIES: row[i] = '' add_row = [ row[nodes_pid_index], row[nodes_rds_index], row[nodes_unique_index] ] w.append_row_to_csv(path_sub_ids, add_row)