def oracle_review(sim): queue_params = { 'pos_diameter': None, 'neg_diameter': None, } infr = sim.infr prev = infr.verbose infr.verbose = 0 # rng = np.random.RandomState(0) infr = sim.infr primary_truth = sim.primary_truth review_edges = infr.generate_reviews(**queue_params) max_reviews = 1000 for count, (aid1, aid2) in enumerate(ut.ProgIter(review_edges)): state = primary_truth.loc[(aid1, aid2)].idxmax() tags = [] infr.add_feedback(aid1, aid2, state, tags, apply=True, rectify=False, user_id='oracle', confidence='absolutely_sure') if count > max_reviews: break infr.verbose = prev sim.results['max_reviews'] = max_reviews n_clusters, n_inconsistent = infr.relabel_using_reviews(rectify=False) assert n_inconsistent == 0, 'should not create any inconsistencies' sim.results['n_user_clusters'] = n_clusters # infr.apply_review_inference() curr_decisions = infr.edge_attr_df('decision') curr_truth = primary_truth.loc[curr_decisions.index].idxmax(axis=1) n_user_mistakes = curr_decisions != curr_truth sim.results['n_user_mistakes'] = sum(n_user_mistakes) gt_clusters = ut.group_pairs(infr.gen_node_attrs('orig_name_label')) curr_clusters = ut.group_pairs(infr.gen_node_attrs('name_label')) compare_results = compare_groups(list(gt_clusters.values()), list(curr_clusters.values())) sim.results.update(ut.map_vals(len, compare_results)) common_per_num = ut.group_items(compare_results['common'], map(len, compare_results['common'])) sumafter = 3 greater = [i for i in common_per_num.keys() if i > sumafter] common_per_num['>%s' % sumafter] = ut.flatten( ut.take(common_per_num, greater)) ut.delete_keys(common_per_num, greater) for k, v in common_per_num.items(): sim.results['common@' + str(k)] = len(v) sim.results['n_names_common'] = len(compare_results['common'])
def apply_dummy_viewpoints(infr): transition_rate = 0.5 transition_rate = 0 valid_views = ['L', 'F', 'R', 'B'] rng = np.random.RandomState(42) class MarkovView(object): def __init__(self): self.dir_ = +1 self.state = 0 def __call__(self): return self.next_state() def next_state(self): if self.dir_ == -1 and self.state <= 0: self.dir_ = +1 if self.dir_ == +1 and self.state >= len(valid_views) - 1: self.dir_ = -1 if rng.rand() < transition_rate: self.state += self.dir_ return valid_views[self.state] mkv = MarkovView() nid_to_aids = ut.group_pairs([(n, d['name_label']) for n, d in infr.graph.nodes(data=True)]) grouped_nodes = list(nid_to_aids.values()) node_to_view = {node: mkv() for nodes in grouped_nodes for node in nodes} infr.set_node_attrs('viewpoint', node_to_view)
def check_baseline_results(sim): import networkx as nx infr = sim.infr n_names_possible = 0 real_groups = ut.group_pairs(infr.gen_node_attrs('orig_name_label')) possible_clusters = [] for nid, nodes in real_groups.items(): if len(nodes) == 1: possible_clusters.append(nodes) n_names_possible += 1 continue cc_cand_edges = list(ut.nx_edges_between(infr.graph, nodes)) cc = ut.nx_from_node_edge(nodes, cc_cand_edges) mst = nx.minimum_spanning_tree(cc) ccs = list(nx.connected_components(mst)) possible_clusters.extend(ccs) n_names_possible += (len(ccs)) sumafter = 3 best_possible_compare_results = compare_groups( list(real_groups.values()), list(possible_clusters)) possible_per_num = ut.map_vals( len, ut.group_items(best_possible_compare_results['common'], map(len, best_possible_compare_results['common']))) greater = [i for i in possible_per_num.keys() if i > sumafter] possible_per_num['>%s' % sumafter] = sum( ut.take(possible_per_num, greater)) ut.delete_keys(possible_per_num, greater) for k, v in possible_per_num.items(): sim.results['possible@' + str(k)] = v sim.results['possible'] = len(best_possible_compare_results['common']) # Measure the number of real names in the test (per number of annots) real_per_num = ut.dict_hist(map(len, real_groups.values())) greater = [i for i in real_per_num.keys() if i > sumafter] real_per_num['>%s' % sumafter] = sum(ut.take(real_per_num, greater)) ut.delete_keys(real_per_num, greater) for k, v in real_per_num.items(): sim.results['real@' + str(k)] = v sim.results['n_names_possible'] = n_names_possible sim.results['n_names_real'] = len(real_groups) sim.results['real'] = len(real_groups)
def update_visual_attrs(infr, graph=None, show_reviewed_edges=True, show_unreviewed_edges=False, show_inferred_diff=True, show_inferred_same=True, show_recent_review=False, highlight_reviews=True, show_inconsistency=True, wavy=False, simple_labels=False, show_labels=True, reposition=True, use_image=False, edge_overrides=None, node_overrides=None, colorby='name_label', **kwargs # hide_unreviewed_inferred=True ): import wbia.plottool as pt infr.print('update_visual_attrs', 3) if graph is None: graph = infr.graph # if hide_cuts is not None: # # show_unreviewed_cuts = not hide_cuts # show_reviewed_cuts = not hide_cuts if not getattr(infr, '_viz_init_nodes', False): infr._viz_init_nodes = True nx.set_node_attributes(graph, name='shape', values='circle') # infr.set_node_attrs('shape', 'circle') if getattr(infr, '_viz_image_config_dirty', True): infr.update_node_image_attribute(graph=graph, use_image=use_image) def get_any(dict_, keys, default=None): for key in keys: if key in dict_: return dict_[key] return default show_cand = get_any( kwargs, ['show_candidate_edges', 'show_candidates', 'show_cand']) if show_cand is not None: show_cand = True show_reviewed_edges = True show_unreviewed_edges = True show_inferred_diff = True show_inferred_same = True if kwargs.get('show_all'): show_cand = True # alpha_low = .5 alpha_med = 0.9 alpha_high = 1.0 dark_background = graph.graph.get('dark_background', None) # Ensure we are starting from a clean slate # if reposition: ut.nx_delete_edge_attr(graph, infr.visual_edge_attrs_appearance) # Set annotation node labels node_to_nid = None if not show_labels: nx.set_node_attributes(graph, name='label', values=ut.dzip(graph.nodes(), [''])) else: if simple_labels: nx.set_node_attributes( graph, name='label', values={n: str(n) for n in graph.nodes()}) else: if node_to_nid is None: node_to_nid = nx.get_node_attributes(graph, 'name_label') node_to_view = nx.get_node_attributes(graph, 'viewpoint') if node_to_view: annotnode_to_label = { aid: 'aid=%r%s\nnid=%r' % (aid, node_to_view[aid], node_to_nid[aid]) for aid in graph.nodes() } else: annotnode_to_label = { aid: 'aid=%r\nnid=%r' % (aid, node_to_nid[aid]) for aid in graph.nodes() } nx.set_node_attributes(graph, name='label', values=annotnode_to_label) # NODE_COLOR: based on name_label ut.color_nodes(graph, labelattr=colorby, outof=kwargs.get('outof', None), sat_adjust=-0.4) # EDGES: # Grab different types of edges edges, edge_colors = infr.get_colored_edge_weights( graph, highlight_reviews) # reviewed_states = nx.get_edge_attributes(graph, 'evidence_decision') reviewed_states = { e: infr.edge_decision(e) for e in infr.graph.edges() } edge_to_inferred_state = nx.get_edge_attributes( graph, 'inferred_state') # dummy_edges = [edge for edge, flag in # nx.get_edge_attributes(graph, '_dummy_edge').items() # if flag] edge_to_reviewid = nx.get_edge_attributes(graph, 'review_id') recheck_edges = [ edge for edge, split in nx.get_edge_attributes( graph, 'maybe_error').items() if split ] decision_to_edge = ut.group_pairs(reviewed_states.items()) neg_edges = decision_to_edge[NEGTV] pos_edges = decision_to_edge[POSTV] incomp_edges = decision_to_edge[INCMP] unreviewed_edges = decision_to_edge[UNREV] inferred_same = [ edge for edge, state in edge_to_inferred_state.items() if state == 'same' ] inferred_diff = [ edge for edge, state in edge_to_inferred_state.items() if state == 'diff' ] inconsistent_external = [ edge for edge, state in edge_to_inferred_state.items() if state == 'inconsistent_external' ] inferred_notcomp = [ edge for edge, state in edge_to_inferred_state.items() if state == 'notcomp' ] reviewed_edges = incomp_edges + pos_edges + neg_edges compared_edges = pos_edges + neg_edges uncompared_edges = ut.setdiff(edges, compared_edges) nontrivial_inferred_same = ut.setdiff( inferred_same, pos_edges + neg_edges + incomp_edges) nontrivial_inferred_diff = ut.setdiff( inferred_diff, pos_edges + neg_edges + incomp_edges) nontrivial_inferred_edges = nontrivial_inferred_same + nontrivial_inferred_diff # EDGE_COLOR: based on edge_weight nx.set_edge_attributes(graph, name='color', values=ut.dzip(edges, edge_colors)) # LINE_WIDTH: based on review_state # unreviewed_width = 2.0 # reviewed_width = 5.0 unreviewed_width = 1.0 reviewed_width = 2.0 if highlight_reviews: nx.set_edge_attributes( graph, name='linewidth', values=ut.dzip(reviewed_edges, [reviewed_width]), ) nx.set_edge_attributes( graph, name='linewidth', values=ut.dzip(unreviewed_edges, [unreviewed_width]), ) else: nx.set_edge_attributes(graph, name='linewidth', values=ut.dzip(edges, [unreviewed_width])) # EDGE_STROKE: based on decision and maybe_error # fg = pt.WHITE if dark_background else pt.BLACK # nx.set_edge_attributes(graph, name='stroke', values=ut.dzip(reviewed_edges, [{'linewidth': 3, 'foreground': fg}])) if show_inconsistency: nx.set_edge_attributes( graph, name='stroke', values=ut.dzip(recheck_edges, [{ 'linewidth': 5, 'foreground': infr._error_color }]), ) # Set linestyles to emphasize PCCs # Dash lines between PCCs inferred to be different nx.set_edge_attributes(graph, name='linestyle', values=ut.dzip(inferred_diff, ['dashed'])) # Treat incomparable/incon-external inference as different nx.set_edge_attributes(graph, name='linestyle', values=ut.dzip(inferred_notcomp, ['dashed'])) nx.set_edge_attributes(graph, name='linestyle', values=ut.dzip(inconsistent_external, ['dashed'])) # Dot lines that we are unsure of nx.set_edge_attributes(graph, name='linestyle', values=ut.dzip(unreviewed_edges, ['dotted'])) # Cut edges are implicit and dashed # nx.set_edge_attributes(graph, name='implicit', values=ut.dzip(cut_edges, [True])) # nx.set_edge_attributes(graph, name='linestyle', values=ut.dzip(cut_edges, ['dashed'])) # nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(cut_edges, [alpha_med])) nx.set_edge_attributes(graph, name='implicit', values=ut.dzip(uncompared_edges, [True])) # Only matching edges should impose constraints on the graph layout nx.set_edge_attributes(graph, name='implicit', values=ut.dzip(neg_edges, [True])) nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(neg_edges, [alpha_med])) nx.set_edge_attributes(graph, name='implicit', values=ut.dzip(incomp_edges, [True])) nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(incomp_edges, [alpha_med])) # Ensure reviewed edges are visible nx.set_edge_attributes(graph, name='implicit', values=ut.dzip(reviewed_edges, [False])) nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(reviewed_edges, [alpha_high])) if True: # Infered same edges can be allowed to constrain in order # to make things look nice sometimes nx.set_edge_attributes(graph, name='implicit', values=ut.dzip(inferred_same, [False])) nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(inferred_same, [alpha_high])) if not kwargs.get('show_same', True): nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(inferred_same, [0])) if not kwargs.get('show_diff', True): nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(inferred_diff, [0])) if not kwargs.get('show_positive_edges', True): nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(pos_edges, [0])) if not kwargs.get('show_negative_edges', True): nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(neg_edges, [0])) if not kwargs.get('show_incomparable_edges', True): nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(incomp_edges, [0])) if not kwargs.get('show_between', True): if node_to_nid is None: node_to_nid = nx.get_node_attributes(graph, 'name_label') between_edges = [(u, v) for u, v in edges if node_to_nid[u] != node_to_nid[v]] nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(between_edges, [0])) # SKETCH: based on inferred_edges # Make inferred edges wavy if wavy: # dict(scale=3.0, length=18.0, randomness=None)] nx.set_edge_attributes( graph, name='sketch', values=ut.dzip( nontrivial_inferred_edges, [dict(scale=10.0, length=64.0, randomness=None)], ), ) # Make dummy edges more transparent # nx.set_edge_attributes(graph, name='alpha', values=ut.dzip(dummy_edges, [alpha_low])) selected_edges = kwargs.pop('selected_edges', None) # SHADOW: based on most recent # Increase visibility of nodes with the most recently changed timestamp if show_recent_review and edge_to_reviewid and selected_edges is None: review_ids = list(edge_to_reviewid.values()) recent_idxs = ut.argmax(review_ids, multi=True) recent_edges = ut.take(list(edge_to_reviewid.keys()), recent_idxs) selected_edges = recent_edges if selected_edges is not None: # TODO: add photoshop-like parameters like # spread and size. offset is the same as angle and distance. nx.set_edge_attributes( graph, name='shadow', values=ut.dzip( selected_edges, [{ 'rho': 0.3, 'alpha': 0.6, 'shadow_color': 'w' if dark_background else 'k', 'offset': (0, 0), 'scale': 3.0, }], ), ) # Z_ORDER: make sure nodes are on top nodes = list(graph.nodes()) nx.set_node_attributes(graph, name='zorder', values=ut.dzip(nodes, [10])) nx.set_edge_attributes(graph, name='zorder', values=ut.dzip(edges, [0])) nx.set_edge_attributes(graph, name='picker', values=ut.dzip(edges, [10])) # VISIBILITY: Set visibility of edges based on arguments if not show_reviewed_edges: infr.print('Making reviewed edges invisible', 10) nx.set_edge_attributes(graph, name='style', values=ut.dzip(reviewed_edges, ['invis'])) if not show_unreviewed_edges: infr.print('Making un-reviewed edges invisible', 10) nx.set_edge_attributes(graph, name='style', values=ut.dzip(unreviewed_edges, ['invis'])) if not show_inferred_same: infr.print('Making nontrivial_same edges invisible', 10) nx.set_edge_attributes(graph, name='style', values=ut.dzip(nontrivial_inferred_same, ['invis'])) if not show_inferred_diff: infr.print('Making nontrivial_diff edges invisible', 10) nx.set_edge_attributes(graph, name='style', values=ut.dzip(nontrivial_inferred_diff, ['invis'])) if selected_edges is not None: # Always show the most recent review (remove setting of invis) # infr.print('recent_edges = %r' % (recent_edges,)) nx.set_edge_attributes(graph, name='style', values=ut.dzip(selected_edges, [''])) if reposition: # LAYOUT: update the positioning layout def get_layoutkw(key, default): return kwargs.get(key, graph.graph.get(key, default)) layoutkw = dict( prog='neato', splines=get_layoutkw('splines', 'line'), fontsize=get_layoutkw('fontsize', None), fontname=get_layoutkw('fontname', None), sep=10 / 72, esep=1 / 72, nodesep=0.1, ) layoutkw.update(kwargs) # logger.info(ut.repr3(graph.edges)) pt.nx_agraph_layout(graph, inplace=True, **layoutkw) if edge_overrides: for key, edge_to_attr in edge_overrides.items(): nx.set_edge_attributes(graph, name=key, values=edge_to_attr) if node_overrides: for key, node_to_attr in node_overrides.items(): nx.set_node_attributes(graph, name=key, values=node_to_attr)
def clean_tags(): zotero = get_libzotero() # dict of all zotero items # items = zotero.index # get sql cursor cur = zotero.cur if False: sorted(ut.util_sqlite.get_tablenames(cur)) ut.print_database_structure(cur) # Debug info about tags table in sql # The `tags` table stores all tags # The itemTags table stores the association between items and tags ut.get_table_columninfo_list(cur, 'fields') # ut.get_table_columninfo_list(cur, 'relations') ut.get_table_columninfo_list(cur, 'fieldsCombined') ut.get_table_columninfo_list(cur, 'itemData') ut.get_table_columninfo_list(cur, 'itemDataValues') ut.get_table_columninfo_list(cur, 'tags') ut.get_table_columninfo_list(cur, 'itemTags') import pandas as pd pd.options.display.max_colwidth = 40 pd.options.display.max_rows = 20 def pandas_sql(table, columns): return pd.DataFrame(ut.get_table_rows(cur, table, columns), columns=columns) item_df = pandas_sql('items', ('itemID', 'itemTypeID', 'libraryID', 'key')).set_index('itemID', drop=False) tags_df = pandas_sql('tags', ('tagID', 'name', 'type', 'libraryID', 'key')).set_index('tagID', drop=False) itemData_df = pandas_sql('itemData', ('itemID', 'fieldID', 'valueID')) itemTag_df = pandas_sql('itemTags', ('itemID', 'tagID')) itemDataValues_df = pandas_sql('itemDataValues', ('valueID', 'value')).set_index('valueID') field_df = pandas_sql('fields', ('fieldID', 'fieldName', 'fieldFormatID')).set_index('fieldID') itemData_df['value'] = itemDataValues_df['value'].loc[itemData_df['valueID'].values].values itemData_df['fieldName'] = field_df['fieldName'].loc[itemData_df['fieldID'].values].values titles = itemData_df[itemData_df['fieldName'] == 'title'] assert len(ut.unique(ut.map_vals(len, titles.groupby('itemID').indices).values())) == 1 # itemTag_df.groupby('itemID').count() # Find how often each tag is used tagid_to_count = itemTag_df.groupby('tagID').count() tagid_to_count = tagid_to_count.rename(columns={'itemID': 'nItems'}) tagid_to_count['name'] = tags_df.loc[tagid_to_count.index]['name'] tagid_to_count = tagid_to_count.sort_values('nItems') bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1] tagid_to_count['tag_ncharsize'] = tagid_to_count['name'].apply(len) tagid_to_count = tagid_to_count.sort_values('tag_ncharsize') bad_tags = tagid_to_count[tagid_to_count['tag_ncharsize'] > 25]['name'].values.tolist() def clean_tags2(): api_key = 'fBDBqRPwW9O3mYyNLiksBKZy' base_url = 'https://api.zotero.org' library_id = '1279414' library_type = 'user' from pyzotero import zotero zot = zotero.Zotero(library_id, library_type, api_key) for chunk in ut.ProgChunks(bad_tags, 50): zot.delete_tags(*chunk) if False: api_key = 'fBDBqRPwW9O3mYyNLiksBKZy' base_url = 'https://api.zotero.org' user_id = '1279414' userOrGroupPrefix = '/users/' + user_id params = {'v': 3, 'key': api_key} items_resp = requests.get(base_url + userOrGroupPrefix + '/items', params=params) print(items_resp.content) print(items_resp) json_tags = [] get_url = base_url + userOrGroupPrefix + '/tags' while True: print('get_url = %r' % (get_url,)) tag_resp = requests.get(get_url, params=params) if tag_resp.status_code != 200: break json_tags.extend(tag_resp.json()) if 'next' in tag_resp.links: get_url = tag_resp.links['next']['url'] else: break version_to_tags = ut.ddict(list) bad_tags = [] for tag in ut.ProgIter(json_tags, label='parsing tags'): # x = requests.get(tag['links']['self']['href'], params=params) if tag['meta']['numItems'] == 1: import urllib2 try: bad_tags.append(urllib2.quote(tag['tag'])) except Exception as ex: print('cant encode tag=%r' % (tag,)) pass for chunk in ut.ProgIter(ut.ichunks(bad_tags, 50), length=len(bad_tags) / 50): search_url = base_url + userOrGroupPrefix + '/items?tag=' + ' || '.join(chunk) r = requests.get(search_url, params=params) matching_items = r.json() # assert len(matching_items) == 1 for item in matching_items: version = item['version'] version_to_tags[item['version']].append(tag['tag']) # DELETE MULTIPLE TAGS import requests for chunk in ut.ichunks(bad_tags['name'], 50): import urllib2 encoded_chunk = [] for t in chunk: try: encoded_chunk.append(urllib2.quote(t)) except Exception: print(t) suffix = ' || '.join(encoded_chunk) delete_url = base_url + userOrGroupPrefix + '/tags?' + suffix print('delete_url = %r' % (delete_url,)) resp = requests.delete(delete_url, params=params) bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1] bad_tags['tagID'] = bad_tags.index for tagid in bad_tags: delete from itemTags where tagID in (select tagID from tags where type=1); pass for name in k['name'].values.tolist() item_df['title'] = titles.set_index('itemID')['value'] for idx, item in zotero.index.items(): sql_title = item_df.loc[item.id]['title'] if item.title != sql_title: if pd.isnull(sql_title) and item.title is not None: print(item.__dict__) print(item_df.loc[item.id]) print('item.title = %r' % (item.title,)) print('sql_title = %r' % (sql_title,)) assert False duplicate_tags = [ (name, idxs) for name, idxs in tags_df.groupby('name', sort=True).indices.items() if len(idxs) > 2 ] tagname_to_tagid = tags_df.groupby('name', sort=True).first() new_to_oldtags = {} # Determine which tagi to use for each name for tagname, idxs in duplicate_tags: tags_subdf = tags_df.iloc[idxs] mapping = itemTag_df[itemTag_df['tagID'].isin(tags_subdf['tagID'])] tag_hist = mapping.groupby('tagID').count() best_tagid = tag_hist['itemID'].idxmax() new_to_oldtags[best_tagid] = set(tag_hist['itemID'].values) - {best_tagid} tagname_to_tagid.loc[tagname] = tags_df.loc[best_tagid] # for col in tagname_to_tagid.columns: # tagname_to_tagid.loc[tagname][col] = tags_df.loc[best_tagid][col] # tags_df.loc[best_tagid] if False: # Update tagIds for newid, oldids in new_to_oldtags.items(): for oldid in oldids: # cur.execute('SELECT itemID, tagID FROM itemTags WHERE tagID=?', (oldid,)) import sqlite3 try: cmd = 'UPDATE itemTags SET tagID=? WHERE tagID=?' args = (newid, oldid) print('(%s) args = %r' % (cmd, args,)) cur.execute(cmd, args) print(cur.fetchall()) except sqlite3.IntegrityError: print('error') pass # tags_df.groupby('name', sort=True) # itemTag_df.groupby('itemID') # duptags = tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']] # duptags['tagID'] # flags = itemTag_df['tagID'].isin(duptags['tagID']) # dup_rel = itemTag_df[flags] # item_df['title'].loc[dup_rel['itemID']].values # tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']] # tags_df[tags_df['type'] == 1] # tags_df[tags_df['type'] == 0] # tags_df['libraryID'].unique() # tags_df['type'].unique() ''' SELECT SELECT FROM itemTags WHERE name in (animals) ''' item_tag_pairs = ut.get_table_rows(cur, 'itemTags', ('itemID', 'tagID')) # Group tags by item itemid_to_tagids = ut.group_pairs(item_tag_pairs) # Group items by tags tagid_to_itemids = ut.group_pairs(map(tuple, map(reversed, item_tag_pairs))) # mapping from tagid to name tagid_to_name = dict(ut.get_table_rows(cur, 'tags', ('tagID', 'name'))) tagid_freq = list(ut.sort_dict(ut.map_vals(len, tagid_to_itemids), 'vals').items()) ut.sort_dict(ut.map_vals(sum, ut.group_pairs([(freq, tagid_to_name.get(tagid, tagid)) for tagid, freq in tagid_freq])), 'vals') tagname_freq = ut.map_keys(lambda k: tagid_to_name.get(k, k), tagid_freq)