class QUAL(object): EXCELLENT = 5 GOOD = 4 OK = 3 POOR = 2 JUNK = 1 UNKNOWN = None INT_TO_CODE = ut.odict([ (EXCELLENT, 'excellent'), (GOOD, 'good'), (OK, 'ok'), (POOR, 'poor'), (JUNK, 'junk'), (UNKNOWN, 'unspecified'), ]) INT_TO_NICE = ut.odict([ (EXCELLENT, 'Excellent'), (GOOD, 'Good'), (OK, 'OK'), (POOR, 'Poor'), (JUNK, 'Junk'), (UNKNOWN, 'Unspecified'), ]) CODE_TO_NICE = ut.map_keys(INT_TO_CODE, INT_TO_NICE) CODE_TO_INT = ut.invert_dict(INT_TO_CODE) NICE_TO_CODE = ut.invert_dict(CODE_TO_NICE) NICE_TO_INT = ut.invert_dict(INT_TO_NICE)
class CONFIDENCE(object): UNKNOWN = None GUESSING = 1 NOT_SURE = 2 PRETTY_SURE = 3 ABSOLUTELY_SURE = 4 INT_TO_CODE = ut.odict([ (ABSOLUTELY_SURE, 'absolutely_sure'), (PRETTY_SURE, 'pretty_sure'), (NOT_SURE, 'not_sure'), (GUESSING, 'guessing'), (UNKNOWN, 'unspecified'), ]) INT_TO_NICE = ut.odict([ (ABSOLUTELY_SURE, 'Doubtless'), (PRETTY_SURE, 'Sure'), (NOT_SURE, 'Unsure'), (GUESSING, 'Guessing'), (UNKNOWN, 'Unspecified'), ]) CODE_TO_NICE = ut.map_keys(INT_TO_CODE, INT_TO_NICE) CODE_TO_INT = ut.invert_dict(INT_TO_CODE) NICE_TO_CODE = ut.invert_dict(CODE_TO_NICE) NICE_TO_INT = ut.invert_dict(INT_TO_NICE)
class META_DECISION(object): # NOQA """ Enumerated types of review codes and texts Notes: unreviewed: we dont have a meta decision same: we know this is the same animal through non-visual means diff: we know this is the different animal through non-visual means Example: >>> # ENABLE_DOCTEST >>> from wbia.constants import * # NOQA >>> assert hasattr(META_DECISION, 'CODE') >>> assert hasattr(META_DECISION, 'NICE') >>> code1 = META_DECISION.INT_TO_CODE[META_DECISION.NULL] >>> code2 = META_DECISION.CODE.NULL >>> assert code1 == code2 >>> nice1 = META_DECISION.INT_TO_NICE[META_DECISION.NULL] >>> nice2 = META_DECISION.NICE.NULL >>> assert nice1 == nice2 """ NULL = None DIFF = 0 SAME = 1 INT_TO_CODE = ut.odict([(NULL, 'null'), (DIFF, 'diff'), (SAME, 'same')]) INT_TO_NICE = ut.odict([(NULL, 'NULL'), (DIFF, 'Different'), (SAME, 'Same')]) CODE_TO_NICE = ut.map_keys(INT_TO_CODE, INT_TO_NICE) CODE_TO_INT = ut.invert_dict(INT_TO_CODE) NICE_TO_CODE = ut.invert_dict(CODE_TO_NICE) NICE_TO_INT = ut.invert_dict(INT_TO_NICE)
class EVIDENCE_DECISION(object): # NOQA """ TODO: change to EVIDENCE_DECISION / VISUAL_DECISION Enumerated types of review codes and texts Notes: Unreviewed: Not comparared yet. nomatch: Visually comparable and the different match: Visually comparable and the same notcomp: Not comparable means it is actually impossible to determine. unknown: means that it was reviewed, but we just can't figure it out. """ UNREVIEWED = None NEGATIVE = 0 POSITIVE = 1 INCOMPARABLE = 2 UNKNOWN = 3 INT_TO_CODE = ut.odict([ (POSITIVE, 'match'), (NEGATIVE, 'nomatch'), (INCOMPARABLE, 'notcomp'), (UNKNOWN, 'unknown'), (UNREVIEWED, 'unreviewed'), ]) INT_TO_NICE = ut.odict([ (POSITIVE, 'Positive'), (NEGATIVE, 'Negative'), (INCOMPARABLE, 'Incomparable'), (UNKNOWN, 'Unknown'), (UNREVIEWED, 'Unreviewed'), ]) CODE_TO_NICE = ut.map_keys(INT_TO_CODE, INT_TO_NICE) CODE_TO_INT = ut.invert_dict(INT_TO_CODE) NICE_TO_CODE = ut.invert_dict(CODE_TO_NICE) NICE_TO_INT = ut.invert_dict(INT_TO_NICE) MATCH_CODE = CODE_TO_INT
class VIEW(object): """ categorical viewpoint using the faces of a Rhombicuboctahedron References: https://en.wikipedia.org/wiki/Rhombicuboctahedron """ UNKNOWN = None R = 1 FR = 2 F = 3 FL = 4 L = 5 BL = 6 B = 7 BR = 8 U = 9 UF = 10 UB = 11 UL = 12 UR = 13 UFL = 14 UFR = 15 UBL = 16 UBR = 17 D = 18 DF = 19 DB = 20 DL = 21 DR = 22 DFL = 23 DFR = 24 DBL = 25 DBR = 26 INT_TO_CODE = ut.odict([ (UNKNOWN, 'unknown'), (R, 'right'), (FR, 'frontright'), (F, 'front'), (FL, 'frontleft'), (L, 'left'), (BL, 'backleft'), (B, 'back'), (BR, 'backright'), (U, 'up'), (UF, 'upfront'), (UB, 'upback'), (UL, 'upleft'), (UR, 'upright'), (UFL, 'upfrontleft'), (UFR, 'upfrontright'), (UBL, 'upbackleft'), (UBR, 'upbackright'), (D, 'down'), (DF, 'downfront'), (DB, 'downback'), (DL, 'downleft'), (DR, 'downright'), (DFL, 'downfrontleft'), (DFR, 'downfrontright'), (DBL, 'downbackleft'), (DBR, 'downbackright'), ]) INT_TO_NICE = ut.odict([ (UNKNOWN, 'Unknown'), (R, 'Right'), (FR, 'Front-Right'), (F, 'Front'), (FL, 'Front-Left'), (L, 'Left'), (BL, 'Back-Left'), (B, 'Back'), (BR, 'Back-Right'), (U, 'Up'), (UF, 'Up-Front'), (UB, 'Up-Back'), (UL, 'Up-Left'), (UR, 'Up-Right'), (UFL, 'Up-Front-Left'), (UFR, 'Up-Front-Right'), (UBL, 'Up-Back-Left'), (UBR, 'Up-Back-Right'), (D, 'Down'), (DF, 'Down-Front'), (DB, 'Down-Back'), (DL, 'Down-Left'), (DR, 'Down-Right'), (DFL, 'Down-Front-Left'), (DFR, 'Down-Front-Right'), (DBL, 'Down-Back-Left'), (DBR, 'Down-Back-Right'), ]) CODE_TO_NICE = ut.map_keys(INT_TO_CODE, INT_TO_NICE) CODE_TO_INT = ut.invert_dict(INT_TO_CODE) NICE_TO_CODE = ut.invert_dict(CODE_TO_NICE) NICE_TO_INT = ut.invert_dict(INT_TO_NICE) DIST = { # DIST 0 PAIRS (B, B): 0, (BL, BL): 0, (BR, BR): 0, (D, D): 0, (DB, DB): 0, (DBL, DBL): 0, (DBR, DBR): 0, (DF, DF): 0, (DFL, DFL): 0, (DFR, DFR): 0, (DL, DL): 0, (DR, DR): 0, (F, F): 0, (FL, FL): 0, (FR, FR): 0, (L, L): 0, (R, R): 0, (U, U): 0, (UB, UB): 0, (UBL, UBL): 0, (UBR, UBR): 0, (UF, UF): 0, (UFL, UFL): 0, (UFR, UFR): 0, (UL, UL): 0, (UR, UR): 0, # DIST 1 PAIRS (B, BL): 1, (B, BR): 1, (B, DB): 1, (B, DBL): 1, (B, DBR): 1, (B, UB): 1, (B, UBL): 1, (B, UBR): 1, (BL, DBL): 1, (BL, L): 1, (BL, UBL): 1, (BR, DBR): 1, (BR, R): 1, (BR, UBR): 1, (D, DB): 1, (D, DBL): 1, (D, DBR): 1, (D, DF): 1, (D, DFL): 1, (D, DFR): 1, (D, DL): 1, (D, DR): 1, (DB, DBL): 1, (DB, DBR): 1, (DBL, DL): 1, (DBL, L): 1, (DBR, DR): 1, (DBR, R): 1, (DF, DFL): 1, (DF, DFR): 1, (DF, F): 1, (DFL, DL): 1, (DFL, F): 1, (DFL, FL): 1, (DFL, L): 1, (DFR, DR): 1, (DFR, F): 1, (DFR, FR): 1, (DFR, R): 1, (DL, L): 1, (DR, R): 1, (F, FL): 1, (F, FR): 1, (F, UF): 1, (F, UFL): 1, (F, UFR): 1, (FL, L): 1, (FL, UFL): 1, (FR, R): 1, (FR, UFR): 1, (L, UBL): 1, (L, UFL): 1, (L, UL): 1, (R, UBR): 1, (R, UFR): 1, (R, UR): 1, (U, UB): 1, (U, UBL): 1, (U, UBR): 1, (U, UF): 1, (U, UFL): 1, (U, UFR): 1, (U, UL): 1, (U, UR): 1, (UB, UBL): 1, (UB, UBR): 1, (UBL, UL): 1, (UBR, UR): 1, (UF, UFL): 1, (UF, UFR): 1, (UFL, UL): 1, (UFR, UR): 1, # DIST 2 PAIRS (B, D): 2, (B, DL): 2, (B, DR): 2, (B, L): 2, (B, R): 2, (B, U): 2, (B, UL): 2, (B, UR): 2, (BL, BR): 2, (BL, D): 2, (BL, DB): 2, (BL, DBR): 2, (BL, DFL): 2, (BL, DL): 2, (BL, FL): 2, (BL, U): 2, (BL, UB): 2, (BL, UBR): 2, (BL, UFL): 2, (BL, UL): 2, (BR, D): 2, (BR, DB): 2, (BR, DBL): 2, (BR, DFR): 2, (BR, DR): 2, (BR, FR): 2, (BR, U): 2, (BR, UB): 2, (BR, UBL): 2, (BR, UFR): 2, (BR, UR): 2, (D, F): 2, (D, FL): 2, (D, FR): 2, (D, L): 2, (D, R): 2, (DB, DF): 2, (DB, DFL): 2, (DB, DFR): 2, (DB, DL): 2, (DB, DR): 2, (DB, L): 2, (DB, R): 2, (DB, UB): 2, (DB, UBL): 2, (DB, UBR): 2, (DBL, DBR): 2, (DBL, DF): 2, (DBL, DFL): 2, (DBL, DFR): 2, (DBL, DR): 2, (DBL, FL): 2, (DBL, UB): 2, (DBL, UBL): 2, (DBL, UBR): 2, (DBL, UFL): 2, (DBL, UL): 2, (DBR, DF): 2, (DBR, DFL): 2, (DBR, DFR): 2, (DBR, DL): 2, (DBR, FR): 2, (DBR, UB): 2, (DBR, UBL): 2, (DBR, UBR): 2, (DBR, UFR): 2, (DBR, UR): 2, (DF, DL): 2, (DF, DR): 2, (DF, FL): 2, (DF, FR): 2, (DF, L): 2, (DF, R): 2, (DF, UF): 2, (DF, UFL): 2, (DF, UFR): 2, (DFL, DFR): 2, (DFL, DR): 2, (DFL, FR): 2, (DFL, UBL): 2, (DFL, UF): 2, (DFL, UFL): 2, (DFL, UFR): 2, (DFL, UL): 2, (DFR, DL): 2, (DFR, FL): 2, (DFR, UBR): 2, (DFR, UF): 2, (DFR, UFL): 2, (DFR, UFR): 2, (DFR, UR): 2, (DL, DR): 2, (DL, F): 2, (DL, FL): 2, (DL, UBL): 2, (DL, UFL): 2, (DL, UL): 2, (DR, F): 2, (DR, FR): 2, (DR, UBR): 2, (DR, UFR): 2, (DR, UR): 2, (F, L): 2, (F, R): 2, (F, U): 2, (F, UL): 2, (F, UR): 2, (FL, FR): 2, (FL, U): 2, (FL, UBL): 2, (FL, UF): 2, (FL, UFR): 2, (FL, UL): 2, (FR, U): 2, (FR, UBR): 2, (FR, UF): 2, (FR, UFL): 2, (FR, UR): 2, (L, U): 2, (L, UB): 2, (L, UF): 2, (R, U): 2, (R, UB): 2, (R, UF): 2, (UB, UF): 2, (UB, UFL): 2, (UB, UFR): 2, (UB, UL): 2, (UB, UR): 2, (UBL, UBR): 2, (UBL, UF): 2, (UBL, UFL): 2, (UBL, UFR): 2, (UBL, UR): 2, (UBR, UF): 2, (UBR, UFL): 2, (UBR, UFR): 2, (UBR, UL): 2, (UF, UL): 2, (UF, UR): 2, (UFL, UFR): 2, (UFL, UR): 2, (UFR, UL): 2, (UL, UR): 2, # DIST 3 PAIRS (B, DF): 3, (B, DFL): 3, (B, DFR): 3, (B, FL): 3, (B, FR): 3, (B, UF): 3, (B, UFL): 3, (B, UFR): 3, (BL, DF): 3, (BL, DFR): 3, (BL, DR): 3, (BL, F): 3, (BL, R): 3, (BL, UF): 3, (BL, UFR): 3, (BL, UR): 3, (BR, DF): 3, (BR, DFL): 3, (BR, DL): 3, (BR, F): 3, (BR, L): 3, (BR, UF): 3, (BR, UFL): 3, (BR, UL): 3, (D, UB): 3, (D, UBL): 3, (D, UBR): 3, (D, UF): 3, (D, UFL): 3, (D, UFR): 3, (D, UL): 3, (D, UR): 3, (DB, F): 3, (DB, FL): 3, (DB, FR): 3, (DB, U): 3, (DB, UFL): 3, (DB, UFR): 3, (DB, UL): 3, (DB, UR): 3, (DBL, F): 3, (DBL, FR): 3, (DBL, R): 3, (DBL, U): 3, (DBL, UF): 3, (DBL, UR): 3, (DBR, F): 3, (DBR, FL): 3, (DBR, L): 3, (DBR, U): 3, (DBR, UF): 3, (DBR, UL): 3, (DF, U): 3, (DF, UBL): 3, (DF, UBR): 3, (DF, UL): 3, (DF, UR): 3, (DFL, R): 3, (DFL, U): 3, (DFL, UB): 3, (DFL, UR): 3, (DFR, L): 3, (DFR, U): 3, (DFR, UB): 3, (DFR, UL): 3, (DL, FR): 3, (DL, R): 3, (DL, U): 3, (DL, UB): 3, (DL, UBR): 3, (DL, UF): 3, (DL, UFR): 3, (DR, FL): 3, (DR, L): 3, (DR, U): 3, (DR, UB): 3, (DR, UBL): 3, (DR, UF): 3, (DR, UFL): 3, (F, UB): 3, (F, UBL): 3, (F, UBR): 3, (FL, R): 3, (FL, UB): 3, (FL, UBR): 3, (FL, UR): 3, (FR, L): 3, (FR, UB): 3, (FR, UBL): 3, (FR, UL): 3, (L, UBR): 3, (L, UFR): 3, (L, UR): 3, (R, UBL): 3, (R, UFL): 3, (R, UL): 3, # DIST 4 PAIRS (B, F): 4, (BL, FR): 4, (BR, FL): 4, (D, U): 4, (DB, UF): 4, (DBL, UFR): 4, (DBR, UFL): 4, (DF, UB): 4, (DFL, UBR): 4, (DFR, UBL): 4, (DL, UR): 4, (DR, UL): 4, (L, R): 4, # UNDEFINED DIST PAIRS (B, UNKNOWN): None, (BL, UNKNOWN): None, (BR, UNKNOWN): None, (D, UNKNOWN): None, (DB, UNKNOWN): None, (DBL, UNKNOWN): None, (DBR, UNKNOWN): None, (DF, UNKNOWN): None, (DFL, UNKNOWN): None, (DFR, UNKNOWN): None, (DL, UNKNOWN): None, (DR, UNKNOWN): None, (F, UNKNOWN): None, (FL, UNKNOWN): None, (FR, UNKNOWN): None, (L, UNKNOWN): None, (R, UNKNOWN): None, (U, UNKNOWN): None, (UB, UNKNOWN): None, (UBL, UNKNOWN): None, (UBR, UNKNOWN): None, (UF, UNKNOWN): None, (UFL, UNKNOWN): None, (UFR, UNKNOWN): None, (UL, UNKNOWN): None, (UNKNOWN, B): None, (UNKNOWN, BL): None, (UNKNOWN, BR): None, (UNKNOWN, D): None, (UNKNOWN, DB): None, (UNKNOWN, DBL): None, (UNKNOWN, DBR): None, (UNKNOWN, DF): None, (UNKNOWN, DFL): None, (UNKNOWN, DFR): None, (UNKNOWN, DL): None, (UNKNOWN, DR): None, (UNKNOWN, F): None, (UNKNOWN, FL): None, (UNKNOWN, FR): None, (UNKNOWN, L): None, (UNKNOWN, R): None, (UNKNOWN, U): None, (UNKNOWN, UB): None, (UNKNOWN, UBL): None, (UNKNOWN, UBR): None, (UNKNOWN, UF): None, (UNKNOWN, UFL): None, (UNKNOWN, UFR): None, (UNKNOWN, UL): None, (UNKNOWN, UR): None, (UR, UNKNOWN): None, (UNKNOWN, UNKNOWN): None, } # make distance symmetric for (f1, f2), d in list(DIST.items()): DIST[(f2, f1)] = d
def clean_tags(): zotero = get_libzotero() # dict of all zotero items # items = zotero.index # get sql cursor cur = zotero.cur if False: sorted(ut.util_sqlite.get_tablenames(cur)) ut.print_database_structure(cur) # Debug info about tags table in sql # The `tags` table stores all tags # The itemTags table stores the association between items and tags ut.get_table_columninfo_list(cur, 'fields') # ut.get_table_columninfo_list(cur, 'relations') ut.get_table_columninfo_list(cur, 'fieldsCombined') ut.get_table_columninfo_list(cur, 'itemData') ut.get_table_columninfo_list(cur, 'itemDataValues') ut.get_table_columninfo_list(cur, 'tags') ut.get_table_columninfo_list(cur, 'itemTags') import pandas as pd pd.options.display.max_colwidth = 40 pd.options.display.max_rows = 20 def pandas_sql(table, columns): return pd.DataFrame(ut.get_table_rows(cur, table, columns), columns=columns) item_df = pandas_sql('items', ('itemID', 'itemTypeID', 'libraryID', 'key')).set_index('itemID', drop=False) tags_df = pandas_sql('tags', ('tagID', 'name', 'type', 'libraryID', 'key')).set_index('tagID', drop=False) itemData_df = pandas_sql('itemData', ('itemID', 'fieldID', 'valueID')) itemTag_df = pandas_sql('itemTags', ('itemID', 'tagID')) itemDataValues_df = pandas_sql('itemDataValues', ('valueID', 'value')).set_index('valueID') field_df = pandas_sql('fields', ('fieldID', 'fieldName', 'fieldFormatID')).set_index('fieldID') itemData_df['value'] = itemDataValues_df['value'].loc[itemData_df['valueID'].values].values itemData_df['fieldName'] = field_df['fieldName'].loc[itemData_df['fieldID'].values].values titles = itemData_df[itemData_df['fieldName'] == 'title'] assert len(ut.unique(ut.map_vals(len, titles.groupby('itemID').indices).values())) == 1 # itemTag_df.groupby('itemID').count() # Find how often each tag is used tagid_to_count = itemTag_df.groupby('tagID').count() tagid_to_count = tagid_to_count.rename(columns={'itemID': 'nItems'}) tagid_to_count['name'] = tags_df.loc[tagid_to_count.index]['name'] tagid_to_count = tagid_to_count.sort_values('nItems') bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1] tagid_to_count['tag_ncharsize'] = tagid_to_count['name'].apply(len) tagid_to_count = tagid_to_count.sort_values('tag_ncharsize') bad_tags = tagid_to_count[tagid_to_count['tag_ncharsize'] > 25]['name'].values.tolist() def clean_tags2(): api_key = 'fBDBqRPwW9O3mYyNLiksBKZy' base_url = 'https://api.zotero.org' library_id = '1279414' library_type = 'user' from pyzotero import zotero zot = zotero.Zotero(library_id, library_type, api_key) for chunk in ut.ProgChunks(bad_tags, 50): zot.delete_tags(*chunk) if False: api_key = 'fBDBqRPwW9O3mYyNLiksBKZy' base_url = 'https://api.zotero.org' user_id = '1279414' userOrGroupPrefix = '/users/' + user_id params = {'v': 3, 'key': api_key} items_resp = requests.get(base_url + userOrGroupPrefix + '/items', params=params) print(items_resp.content) print(items_resp) json_tags = [] get_url = base_url + userOrGroupPrefix + '/tags' while True: print('get_url = %r' % (get_url,)) tag_resp = requests.get(get_url, params=params) if tag_resp.status_code != 200: break json_tags.extend(tag_resp.json()) if 'next' in tag_resp.links: get_url = tag_resp.links['next']['url'] else: break version_to_tags = ut.ddict(list) bad_tags = [] for tag in ut.ProgIter(json_tags, label='parsing tags'): # x = requests.get(tag['links']['self']['href'], params=params) if tag['meta']['numItems'] == 1: import urllib2 try: bad_tags.append(urllib2.quote(tag['tag'])) except Exception as ex: print('cant encode tag=%r' % (tag,)) pass for chunk in ut.ProgIter(ut.ichunks(bad_tags, 50), length=len(bad_tags) / 50): search_url = base_url + userOrGroupPrefix + '/items?tag=' + ' || '.join(chunk) r = requests.get(search_url, params=params) matching_items = r.json() # assert len(matching_items) == 1 for item in matching_items: version = item['version'] version_to_tags[item['version']].append(tag['tag']) # DELETE MULTIPLE TAGS import requests for chunk in ut.ichunks(bad_tags['name'], 50): import urllib2 encoded_chunk = [] for t in chunk: try: encoded_chunk.append(urllib2.quote(t)) except Exception: print(t) suffix = ' || '.join(encoded_chunk) delete_url = base_url + userOrGroupPrefix + '/tags?' + suffix print('delete_url = %r' % (delete_url,)) resp = requests.delete(delete_url, params=params) bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1] bad_tags['tagID'] = bad_tags.index for tagid in bad_tags: delete from itemTags where tagID in (select tagID from tags where type=1); pass for name in k['name'].values.tolist() item_df['title'] = titles.set_index('itemID')['value'] for idx, item in zotero.index.items(): sql_title = item_df.loc[item.id]['title'] if item.title != sql_title: if pd.isnull(sql_title) and item.title is not None: print(item.__dict__) print(item_df.loc[item.id]) print('item.title = %r' % (item.title,)) print('sql_title = %r' % (sql_title,)) assert False duplicate_tags = [ (name, idxs) for name, idxs in tags_df.groupby('name', sort=True).indices.items() if len(idxs) > 2 ] tagname_to_tagid = tags_df.groupby('name', sort=True).first() new_to_oldtags = {} # Determine which tagi to use for each name for tagname, idxs in duplicate_tags: tags_subdf = tags_df.iloc[idxs] mapping = itemTag_df[itemTag_df['tagID'].isin(tags_subdf['tagID'])] tag_hist = mapping.groupby('tagID').count() best_tagid = tag_hist['itemID'].idxmax() new_to_oldtags[best_tagid] = set(tag_hist['itemID'].values) - {best_tagid} tagname_to_tagid.loc[tagname] = tags_df.loc[best_tagid] # for col in tagname_to_tagid.columns: # tagname_to_tagid.loc[tagname][col] = tags_df.loc[best_tagid][col] # tags_df.loc[best_tagid] if False: # Update tagIds for newid, oldids in new_to_oldtags.items(): for oldid in oldids: # cur.execute('SELECT itemID, tagID FROM itemTags WHERE tagID=?', (oldid,)) import sqlite3 try: cmd = 'UPDATE itemTags SET tagID=? WHERE tagID=?' args = (newid, oldid) print('(%s) args = %r' % (cmd, args,)) cur.execute(cmd, args) print(cur.fetchall()) except sqlite3.IntegrityError: print('error') pass # tags_df.groupby('name', sort=True) # itemTag_df.groupby('itemID') # duptags = tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']] # duptags['tagID'] # flags = itemTag_df['tagID'].isin(duptags['tagID']) # dup_rel = itemTag_df[flags] # item_df['title'].loc[dup_rel['itemID']].values # tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']] # tags_df[tags_df['type'] == 1] # tags_df[tags_df['type'] == 0] # tags_df['libraryID'].unique() # tags_df['type'].unique() ''' SELECT SELECT FROM itemTags WHERE name in (animals) ''' item_tag_pairs = ut.get_table_rows(cur, 'itemTags', ('itemID', 'tagID')) # Group tags by item itemid_to_tagids = ut.group_pairs(item_tag_pairs) # Group items by tags tagid_to_itemids = ut.group_pairs(map(tuple, map(reversed, item_tag_pairs))) # mapping from tagid to name tagid_to_name = dict(ut.get_table_rows(cur, 'tags', ('tagID', 'name'))) tagid_freq = list(ut.sort_dict(ut.map_vals(len, tagid_to_itemids), 'vals').items()) ut.sort_dict(ut.map_vals(sum, ut.group_pairs([(freq, tagid_to_name.get(tagid, tagid)) for tagid, freq in tagid_freq])), 'vals') tagname_freq = ut.map_keys(lambda k: tagid_to_name.get(k, k), tagid_freq)
def demo_refresh(): r""" CommandLine: python -m ibeis.algo.graph.refresh demo_refresh \ --num_pccs=40 --size=2 --show Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.graph.refresh import * # NOQA >>> demo_refresh() >>> ut.show_if_requested() """ from ibeis.algo.graph import demo demokw = ut.argparse_dict({'num_pccs': 50, 'size': 4}) refreshkw = ut.argparse_funckw(RefreshCriteria) # make an inference object infr = demo.demodata_infr(size_std=0, **demokw) edges = list(infr.dummy_verif.find_candidate_edges(K=100)) scores = np.array(infr.dummy_verif.predict_edges(edges)) sortx = scores.argsort()[::-1] edges = ut.take(edges, sortx) scores = scores[sortx] ys = infr.match_state_df(edges)[POSTV].values y_remainsum = ys[::-1].cumsum()[::-1] # Do oracle reviews and wait to converge refresh = RefreshCriteria(**refreshkw) xdata = [] pprob_any = [] rfrac_any = [] for count, (edge, y) in enumerate(zip(edges, ys)): refresh.add(y, user_id='user:oracle') rfrac_any.append(y_remainsum[count] / y_remainsum[0]) pprob_any.append(refresh.prob_any_remain()) xdata.append(count + 1) if refresh.check(): break xdata = xdata ydatas = ut.odict([ ('Est. probability any remain', pprob_any), ('Fraction remaining', rfrac_any), ]) ut.quit_if_noshow() import plottool_ibeis as pt pt.qtensure() from ibeis.scripts.thesis import TMP_RC import matplotlib as mpl mpl.rcParams.update(TMP_RC) pt.multi_plot( xdata, ydatas, xlabel='# manual reviews', rcParams=TMP_RC, marker='', ylim=(0, 1), use_legend=False, ) demokw = ut.map_keys({'num_pccs': '#PCC', 'size': 'PCC size'}, demokw) thresh = refreshkw.pop('thresh') refreshkw['span'] = refreshkw.pop('window') pt.relative_text((.02, .58 + .0), ut.get_cfg_lbl(demokw, sep=' ')[1:], valign='bottom') pt.relative_text((.02, .68 + .0), ut.get_cfg_lbl(refreshkw, sep=' ')[1:], valign='bottom') legend = pt.gca().legend() legend.get_frame().set_alpha(1.0) pt.plt.plot([xdata[0], xdata[-1]], [thresh, thresh], 'g--', label='thresh')