def merge_top_contributors(nestdf, groupbycol, nestcol, newcolname, mergedf,
                           key):

    contributorsdf = uf.nest_for_json(nestdf, groupbycol, nestcol, newcolname)

    newdf = pd.merge(mergedf, contributorsdf, on=key, how='inner')

    return newdf
示例#2
0
def construct_network_graph_dict(df, groupbycols, nestcol, nestedkeyname):
    """
    df: Dataframe that holds the data
    groupbycols: The single-value (non-array) values of a JSON Object
    nestcol: The data that will be put into an array
    nestedkeyname: New columns name

    Returns an array of JSON objects.
    Pythonically, it's a list of dictionaries (that contain lists in themselves)
    """

    new_df = uf.nest_for_json(prune_dataframe(df, groupbycols + [nestcol]),
                              groupbycols, nestcol, nestedkeyname)
    return new_df.to_dict('r')
def calculate_group_similarities(df, groupbycol, nestcol, newcolname):

    newdf = uf.nest_for_json(df, groupbycol=groupbycol,
                             nestcol=nestcol,
                             newcolname=newcolname)

    length = len(newdf)

    similarity_matrix = np.zeros((length, length))
    for i in range(length):
        for j in range(length):
            similarity_matrix[i,j] = uf.list_similarites(newdf[newcolname][i],newdf[newcolname][j])
    
    similarity_df = pd.DataFrame(similarity_matrix)
    similarity_df.columns = newdf[groupbycol]
    similarity_df[groupbycol]= newdf[groupbycol]

    return similarity_df
示例#4
0
def add_groups_item_to_dict(init_json, df, nestcol, newcolname):
    """
    init_json: Initial Json Array of Objects
    df: Dataframe to add group items
    nestcol: Name of columns that will be nested (thrown into a list)
    newcolname: Name of new column

    Matches on guid

    Adds a specific column (nestcol) with name (newcolname) from
    Pandas dataframe (df) to the array (init_json)
    """

    new_dict = uf.nest_for_json(prune_dataframe(df, ['guid', nestcol]), 'guid',
                                nestcol, newcolname).to_dict('r')

    for init_item in init_json:
        for new_item in new_dict:
            # Matches on guid between the two items
            if init_item['guid'] == new_item['guid']:
                init_item[newcolname] = new_item[newcolname]