def merge_top_contributors(nestdf, groupbycol, nestcol, newcolname, mergedf, key): contributorsdf = uf.nest_for_json(nestdf, groupbycol, nestcol, newcolname) newdf = pd.merge(mergedf, contributorsdf, on=key, how='inner') return newdf
def construct_network_graph_dict(df, groupbycols, nestcol, nestedkeyname): """ df: Dataframe that holds the data groupbycols: The single-value (non-array) values of a JSON Object nestcol: The data that will be put into an array nestedkeyname: New columns name Returns an array of JSON objects. Pythonically, it's a list of dictionaries (that contain lists in themselves) """ new_df = uf.nest_for_json(prune_dataframe(df, groupbycols + [nestcol]), groupbycols, nestcol, nestedkeyname) return new_df.to_dict('r')
def calculate_group_similarities(df, groupbycol, nestcol, newcolname): newdf = uf.nest_for_json(df, groupbycol=groupbycol, nestcol=nestcol, newcolname=newcolname) length = len(newdf) similarity_matrix = np.zeros((length, length)) for i in range(length): for j in range(length): similarity_matrix[i,j] = uf.list_similarites(newdf[newcolname][i],newdf[newcolname][j]) similarity_df = pd.DataFrame(similarity_matrix) similarity_df.columns = newdf[groupbycol] similarity_df[groupbycol]= newdf[groupbycol] return similarity_df
def add_groups_item_to_dict(init_json, df, nestcol, newcolname): """ init_json: Initial Json Array of Objects df: Dataframe to add group items nestcol: Name of columns that will be nested (thrown into a list) newcolname: Name of new column Matches on guid Adds a specific column (nestcol) with name (newcolname) from Pandas dataframe (df) to the array (init_json) """ new_dict = uf.nest_for_json(prune_dataframe(df, ['guid', nestcol]), 'guid', nestcol, newcolname).to_dict('r') for init_item in init_json: for new_item in new_dict: # Matches on guid between the two items if init_item['guid'] == new_item['guid']: init_item[newcolname] = new_item[newcolname]