Python append_row_to_csv示例，ETL.wrangling_functions.append_row_to_csv Python示例

示例#1

0

显示文件

文件： prepare_all_p2_csvs.py 项目： dataOtter/PRDE

def add_unique_id_to_net_sup_edges(path_net_sup_edges):
    """Input: File path of network supplement edges.
    Output: Adds a unique ID column to the network supplement edges file."""
    pids_from_subs_ids = w.get_data_from_one_col_as_list(
        c.SUBJECTS_IDS_PATH, c.LABEL_PID)
    unique_ids_from_subs_ids = w.get_data_from_one_col_as_list(
        c.SUBJECTS_IDS_PATH, c.LABEL_UNIQUE_ID)

    net_sup_edges = w.get_csv_as_list(path_net_sup_edges)
    net_sup_edges_data = net_sup_edges[1:]
    net_sup_edges_cols = net_sup_edges[:1][0]
    net_sup_edges_cols.append(c.LABEL_UNIQUE_ID)
    # make temp file to add unique ID column
    w.create_csv_add_column_labels(c.TEMP_FILE_PATH, net_sup_edges_cols)

    pid_index_net_sup_edges = w.get_index_of_file_col(path_net_sup_edges,
                                                      c.LABEL_SENDER_PID)

    for row in net_sup_edges_data:
        # get index of this row's project ID in the subjects_ids.csv
        subs_ids_index = pids_from_subs_ids.index(row[pid_index_net_sup_edges])
        unique_id = unique_ids_from_subs_ids[subs_ids_index]
        row.append(unique_id)
        # append the matching unique ID to the row and to the temp file
        w.append_row_to_csv(c.TEMP_FILE_PATH, row)

    w.rename_csv(
        c.TEMP_FILE_PATH,
        path_net_sup_edges)  # replace the previous file with the temp file

示例#2

0

显示文件

文件： prepare_p1_p2_overlaps.py 项目： dataOtter/PRDE

def create_p1_p2_overlaps():
    """Input: None.
    Output: Create the overlap file with entries for every participant who was present
    in both p1_screenings and p2_network_interviews; populates it with unique id, project id, rds id."""
    path_screenings = c.P1_SCREENINGS_PATH
    path_network = c.P2_NET_SUPS_EXTRACT_PATH
    path_nodes = c.OLD_NODES_PATH
    path_overlap = c.P1_P2_OVERLAPS_PATH

    pid, rds, unique = c.LABEL_PID, c.LABEL_RDS_ID, c.LABEL_UNIQUE_ID
    w.create_csv_add_column_labels(path_overlap, [pid, rds, unique])

    # get rds_ids from p1_screenings and unique_ids from p2_network_interviews
    screening_rds_id_data = w.get_data_from_one_col_as_list(path_screenings, rds)
    net_unique_id_data = w.get_data_from_one_col_as_list(path_network, unique)

    # makes dictionaries from unique_ids to rds_ids and from unique_ids to pids, extracted from the old nodes files
    unique_to_rds_dict = w.get_no_null_entries_dict_from_csv(path_nodes, unique, rds)
    unique_to_pid_dict = w.get_no_null_entries_dict_from_csv(path_nodes, unique, pid)

    for unique_id, rds_id in unique_to_rds_dict.items():  # for every unique_id/rds_id pair that exists in whole dataset
        try:  # try to remove the unique_id from p2_network_interviews
            net_unique_id_data.remove(unique_id)
        except ValueError:  # if it does not exist in p2_netowrk_interviews,
            continue  # do not add it to the overlap file and try the next unique_id/rds_id pair
        try:  # try to remove the rds_id from p1_screenings
            screening_rds_id_data.remove(rds_id)
        except ValueError:  # if it does not exist in p1_screenings,
            continue  # do not add it to the overlap file and try the next unique_id/rds_id pair
        p_id = unique_to_pid_dict[unique_id]  # if unique_id/rds_id exist in p2/p1 respectively, get the associated pid
        row = [p_id, rds_id, unique_id]
        w.append_row_to_csv(path_overlap, row)  # add this p1/p2 overlap row of ids to the overlap file

示例#3

0

显示文件

def create_rds_edges_csv(path_rds_edges, old_edge_data, sender_index, receiver_index, rds_index):
    """Input: rds_edges.csv file path; data from the old edge file;
    sender, receiver, and rds edge index in that file; edge column label.
    Output: Creates the rds_edges.csv file and populates it with all rds edge connections."""
    w.create_csv_add_column_labels(path_rds_edges, [c.LABEL_EDGE_ID])
    for row in old_edge_data:  # for each row from the old edge file,
        if row[rds_index] == 'Yes':  # if the column RDS Edge reads Yes,
            edge_id = row[sender_index] + row[receiver_index]
            w.append_row_to_csv(path_rds_edges, [edge_id])  # and add edge_id to the rds_edges table

示例#4

0

显示文件

def create_network_edges_csv(path_net_edges, old_edge_data, sender_index, receiver_index, net_index):
    """Input: net_edges.csv file path; data from the old edge file; sender_receiver to edge_id dictionary;
    sender, receiver, and network supplement index from old edges file; edge column label.
    Output: Creates the network_edges.csv file and populates it with all network supplement edge connections."""
    w.create_csv_add_column_labels(path_net_edges, [c.LABEL_EDGE_ID])
    for row in old_edge_data:  # for each row from the old edge file,
        if row[net_index] == 'Yes':  # if the column Net Supplement reads Yes,
            edge_id = row[sender_index] + row[receiver_index]
            w.append_row_to_csv(path_net_edges, [edge_id])  # and add edge_id to the network_edges table

示例#5

0

显示文件

def create_edges_csv(path_edges, old_edge_data, sender_index, receiver_index):
    """Input: edges.csv file path; data from the old edge file;
    sender and receiver index in that file.
    Output: Creates edges.csv file and populates it with all edge connections."""
    w.create_csv_add_column_labels(path_edges,
                                   [c.LABEL_EDGE_ID, c.LABEL_SENDER_PID, c.LABEL_RECEIVER_PID])
    for i in range(len(old_edge_data)):
        row = old_edge_data[i]  # get each row from the old edge file
        w.append_row_to_csv(path_edges, [row[sender_index] + row[receiver_index],  # and append the relevant data
                                         row[sender_index], row[receiver_index]])  # to the new file

示例#6

0

显示文件

文件： prepare_all_p2_csvs.py 项目： dataOtter/PRDE

def make_net_sup_extract_file(path_old_net, path_net_sup_extract):
    """Input: File path of deprecated network supplement and new net sup extract.
    Output: Extracts P2SF and P2NS1 columns from deprecated net sup, gets rds ids, and makes net sup extract file."""
    # make unique id column in deprecated net sup file
    w.add_merged_col_to_csv(path_old_net, c.LABEL_UNIQUE_ID,
                            c.NETWORK_HCV_HIV_COLS_TO_MERGE)

    unique_ids_from_subs_ids = w.get_data_from_one_col_as_list(
        c.SUBJECTS_IDS_PATH, c.LABEL_UNIQUE_ID)
    unique_ids_from_depr_net_sup = w.get_data_from_one_col_as_list(
        path_old_net, c.LABEL_UNIQUE_ID)

    subs_ids_data = w.get_csv_as_list(c.SUBJECTS_IDS_PATH)[1:]
    depr_net_sup_data = w.get_csv_as_list(path_old_net)[1:]

    w.create_csv_add_column_labels(
        path_net_sup_extract,
        [c.LABEL_PID, c.LABEL_P2SF, c.LABEL_P2NS1, c.LABEL_UNIQUE_ID])

    pid_index_subs_ids = w.get_index_of_file_col(c.SUBJECTS_IDS_PATH,
                                                 c.LABEL_PID)
    P2SF_index_depr_net_sup = w.get_index_of_file_col(path_old_net,
                                                      c.LABEL_P2SF)
    P2NS1_index_depr_net_sup = w.get_index_of_file_col(path_old_net,
                                                       c.LABEL_P2NS1)

    for i in range(len(unique_ids_from_depr_net_sup)):
        unique_id = unique_ids_from_depr_net_sup[i]
        try:
            # try to get the row index of this unique id in the subjects ids file
            subs_ids_row_index = unique_ids_from_subs_ids.index(unique_id)
            # get that row from subjects ids and get its rds id column entry
            pid = subs_ids_data[subs_ids_row_index][pid_index_subs_ids]
            # get current row from depr_net_sup to fetch its P2SF and P2NS1 columns entries
            P2SF = depr_net_sup_data[i][P2SF_index_depr_net_sup]
            P2NS1 = depr_net_sup_data[i][P2NS1_index_depr_net_sup]
            # append the new row to the net_sup_extract file
            w.append_row_to_csv(path_net_sup_extract,
                                [pid, P2SF, P2NS1, unique_id])

        except ValueError:
            print(unique_id + " not found in subjects_ids.csv")
            pass

示例#7

0

显示文件

文件： prepare_subjects_ids.py 项目： dataOtter/PRDE

def create_sub_ids_csv():
    """Input: None.
    Output: Creates subjects_ids.csv file and populates it with project_ids, rds_ids, and unique_ids."""
    path_sub_ids, path_nodes = c.SUBJECTS_IDS_PATH, c.OLD_NODES_PATH

    nodes_data = w.get_csv_as_list(path_nodes)[1:]  # data of nodes csv as list
    nodes_pid_index = w.get_index_of_file_col(path_nodes, c.LABEL_PID)
    nodes_rds_index = w.get_index_of_file_col(path_nodes, c.LABEL_RDS_ID)
    nodes_unique_index = w.get_index_of_file_col(path_nodes, c.LABEL_UNIQUE_ID)

    w.create_csv_add_column_labels(
        path_sub_ids, [c.LABEL_PID, c.LABEL_RDS_ID, c.LABEL_UNIQUE_ID])

    # make sure that the data is put into the correct column in subjects_ids
    # put project_id, rds_id, and unique_id from the old nodes table into the new subjects_ids table
    for row in nodes_data:
        for i in range(len(row)):
            if row[i] in c.NO_ENTRIES:
                row[i] = ''
        add_row = [
            row[nodes_pid_index], row[nodes_rds_index], row[nodes_unique_index]
        ]
        w.append_row_to_csv(path_sub_ids, add_row)