def gen_retrieve_db_listing(db_name=None): """Retrieve listing for all or given database. db_name -- If absent, get listing of all dbs, if present, get listing of collections in named db NB connection must have been setup and checked! """ table_name = 'inv_dbs' coll_name = 'inv_tables' try: table = db[table_name] if db_name is None: query = {} records = list(table.search(query, {'_id': 1, 'name' : 1, 'nice_name':1})) records = [(rec['name'], rec['nice_name'], idc.count_colls(rec['_id'])) for rec in records] else: _id = idc.get_db_id(db_name)['id'] table = db[coll_name] query = {'db_id':_id} records = list(table.search(query, {'_id': 1, 'name' : 1, 'nice_name':1, 'status':1})) records = [(rec['name'], rec['nice_name'], 0, ih.code_to_status(rec['status']), False) for rec in records] except: records = None if records is not None: return sorted(records, key=lambda s: s[0].lower()) else: return records
def register_scrape(db, table, uid): """Create a suitable inventory entry for the scrape""" inv.log_dest.warning(db+' '+table+' '+str(uid)) try: db_id = idc.get_db_id(db) inv.log_dest.warning(str(db_id)) inprog = False had_err = False if not table: all_tables = idc.get_all_tables(db_id) for table in all_tables: table_id = table['_id'] tmp = check_and_insert_scrape_record(db_id, table_id, uid) inprog = tmp['inprog'] and inprog had_err = tmp['err'] and had_err else: table_id = idc.get_table_id(table) inv.log_dest.warning(str(table_id)) tmp = check_and_insert_scrape_record(db_id, table_id, uid) inprog = tmp['inprog'] and inprog had_err = tmp['err'] and had_err except Exception as e: #Either failed to connect etc, or are already scraping inv.log_dest.warning('Error resistering scrape '+str(e)) return {'err':True, 'inprog':False} return {'err':had_err, 'inprog':inprog}
def upload_scraped_inventory(db, structure_dat, uid): """Upload a json structure document and store any oprhans db -- LMFDB connection to inventory database structure_dat -- JSON document containing all db/collections to upload uid -- UID string for uploading process """ inv.log_dest.info( "_____________________________________________________________________________________________" ) n_dbs = len(structure_dat.keys()) progress_tracker = 0 for db_name in structure_dat: progress_tracker += 1 inv.log_dest.info("Uploading " + db_name + " (" + str(progress_tracker) + " of " + str(n_dbs) + ')') invc.set_db(db, db_name, db_name) for coll_name in structure_dat[db_name]: inv.log_dest.info(" Uploading collection " + coll_name) orphaned_keys = upload_collection_structure(db, db_name, coll_name, structure_dat, fresh=False) if len(orphaned_keys) != 0: db_id = invc.get_db_id(db, db_name) coll_id = invc.get_coll_id(db, db_id['id'], coll_name) ild.store_orphans(db, db_id['id'], coll_id['id'], uid, orphaned_keys)
def register_scrape(db, coll, uid): """Create a suitable inventory entry for the scrape""" try: db_id = idc.get_db_id(db) db_id = db_id['id'] inprog = False had_err = False if not coll: all_colls = idc.get_all_colls(db_id) for coll in all_colls: coll_id = coll['_id'] tmp = check_and_insert_scrape_record(db_id, coll_id, uid) inprog = tmp['inprog'] and inprog had_err = tmp['err'] and had_err else: coll_id = idc.get_coll_id(db_id, coll) coll_id = coll_id['id'] tmp = check_and_insert_scrape_record(db_id, coll_id, uid) inprog = tmp['inprog'] and inprog had_err = tmp['err'] and had_err except: #Either failed to connect etc, or are already scraping return {'err': True, 'inprog': False} return {'err': had_err, 'inprog': inprog}
def retrieve_db_listing(db_name=None): """Retrieve listing for all or given database. db_name -- If absent, get listing of all dbs, if present, get listing of tables in named db """ try: if db_name is None: #query = {} records = list(db.inv_dbs.search({}, ['name', 'nice_name'])) counts = defaultdict(int) for tablename in db.tablenames: dbname = tablename.split('_')[0] counts[dbname] += 1 records = [(rec['name'], rec['nice_name'], counts[rec['name']]) for rec in records] else: db_id = idc.get_db_id(db_name) records = list(db.inv_tables.search({'db_id': db_id}, ['_id', 'name', 'nice_name', 'status'])) records = [(rec['name'], rec['nice_name'], comma(db[rec['name']].count()), ih.code_to_status(rec['status']), check_locked(rec['_id'])) for rec in records] return sorted(records, key=lambda s: s[0].lower()) except Exception as e: inv.log_dest.error("Something went wrong retrieving db info "+str(e)) raise return None
def upload_scraped_inventory(structure_dat, uid): """Upload a json structure document and store any oprhans structure_dat -- JSON document containing all db/collections to upload uid -- UID string for uploading process """ n_dbs = len(structure_dat.keys()) progress_tracker = 0 for db_name in structure_dat: progress_tracker += 1 invc.set_db(db_name, db_name) for coll_name in structure_dat[db_name]: orphaned_keys = upload_collection_structure(db_name, coll_name, structure_dat, fresh=False) if len(orphaned_keys) != 0: db_id = invc.get_db_id(db_name) coll_id = invc.get_coll_id(db_id['id'], coll_name) ild.store_orphans(db_id['id'], coll_id['id'], uid, orphaned_keys) return n_dbs
def check_scrapes_on(spec=None): """If table given, check for scrapes in progress or queued on it. If only db, check all tables in it. If spec is None, check everything""" spec_ids = {} if spec is not None: if spec.get('db'): spec_ids['db'] = idc.get_db_id(spec['db']) if spec.get('table'): spec_ids['table'] = idc.get_table_id(spec['table']) return check_if_scraping(spec_ids) or check_if_scraping_queued(spec_ids)
def is_valid_db_collection(db_name, collection_name): """Check if db and collection name (if not None) exist""" try: db_id = idc.get_db_id(db_name) if not db_id['exist']: return False if collection_name: coll_id = idc.get_coll_id(db_id['id'], collection_name) if not coll_id['exist']: return False except: return False return True
def update_scrape_progress(db_name, coll, uid, complete=None, running=None): """Update progress of scrape from db/coll names and uid """ try: db_id = idc.get_db_id(db_name) coll_id = idc.get_coll_id(db_id['id'], coll) update_scrape_progress_helper(db_id['id'], coll_id['id'], uid, complete=complete, running=running) except: return False
def is_valid_db_table(db_name, table_name): """Check if db and table_name name (if not None) exist""" try: db_id = idc.get_db_id(db_name) if db_id is None: return False if table_name: table_id = idc.get_table_id(table_name) if table_id is None: return False except Exception as e: inv.log_dest.error('Failed checking existence of '+db_name+' '+str(table_name)+' '+str(e)) return False return True
def check_locks(resp): """Check if request pertains to locked coll or editing is locked globally """ if get_lockout_state(): raise EditLockError('Global Edit Lock') try: db_name = resp['db'] coll_name = resp['collection'] db_id = idc.get_db_id(db_name) coll_id = idc.get_coll_id(db_id['id'], coll_name) if check_locked(coll_id['id']): raise EditLockError('Collection locked') except Exception as e: raise e
def update_scrape_progress(db_name, table_name, uid, complete=None, running=None): """Update progress of scrape from db/table names and uid """ try: db_id = idc.get_db_id(db_name) table_id = idc.get_table_id(table_name) rec_find = {'db_id':db_id, 'table_id':table_id, 'uid':uid} rec_set = {} if complete is not None: rec_set['complete'] = complete if running is not None: rec_set['running'] = running if rec_set: db.inv_ops.update(rec_find, rec_set) except Exception as e: inv.log_dest.error("Error updating progress "+ str(e)) return False
def check_scrapes_on(spec=None): """If collection given, check for scrapes in progress or queued on it. If only db, check all collections in it. If spec is None, check everything""" try: spec_ids = {} if spec is not None: db_id = idc.get_db_id(spec['db']) spec_ids = {'db': db_id['id']} if spec['coll']: coll_id = idc.get_coll_id(db_id['id'], spec['coll']) spec_ids['coll'] = coll_id['id'] result = check_if_scraping(spec_ids) or check_if_scraping_queued( spec_ids) return result except: return False
def get_nicename(db_name, collection_name): """Return the nice_name string for given db/coll pair""" try: if collection_name: db_id = idc.get_db_id(db_name) coll_rec = idc.get_coll(db_id['id'], collection_name) nice_name = coll_rec['data']['nice_name'] else: db_rec = idc.get_db(db_name) #print db_rec nice_name = db_rec['data']['nice_name'] return nice_name except: #Can't return nice name so return None return None
def mark_all_gone(): """Set status of all removed collections to gone""" dbs = iv.gen_retrieve_db_listing() all_colls = get_db_lists() gone_code = ih.status_to_code('gone') for db in dbs: colls = iv.gen_retrieve_db_listing(db[0]) db_id = idc.get_db_id(db[0]) for coll in colls: gone = not (coll[0] in all_colls[db[0]]) #Only mark if isn't already mark = gone and coll[3] != 'gone' if mark: coll_id = idc.get_coll_id(db_id['id'], coll[0]) idc.update_coll(coll_id['id'], status=gone_code)
def update_scrape_progress(db, coll, uid, complete=None, running=None): """Update progress of scrape from db/coll names and uid """ try: got_client = inv.setup_internal_client(editor=True) assert(got_client == True) inv_db = inv.int_client[inv.get_inv_db_name()] except Exception as e: inv.log_dest.error("Error getting Db connection "+ str(e)) return False try: db_id = idc.get_db_id(inv_db, db) coll_id = idc.get_coll_id(inv_db, db_id['id'], coll) update_scrape_progress_helper(inv_db, db_id['id'], coll_id['id'], uid, complete=complete, running=running) except Exception as e: inv.log_dest.error("Error updating progress "+ str(e)) return False
def mark_all_gone(main_db): """Set status of all removed collections to gone""" inv_db = main_db[inv.get_inv_db_name()] dbs = iv.gen_retrieve_db_listing(inv_db) all_colls = get_db_lists() gone_code = ih.status_to_code('gone') for db in dbs: colls = iv.gen_retrieve_db_listing(inv_db, db[0]) db_id = idc.get_db_id(inv_db, db[0]) for coll in colls: gone = not (coll[0] in all_colls[db[0]]) #Only mark if isn't already mark = gone and coll[3] != 'gone' if mark: coll_id = idc.get_coll_id(inv_db, db_id['id'], coll[0]) idc.update_coll(inv_db, coll_id['id'], status=gone_code) inv.log_dest.info(str(db) +'.'+str(coll) +' is now gone')
def delete_by_collection(db_name, coll_name): """Remove collection entry and all its fields""" try: _db_id = invc.get_db_id(db_name) _c_id = invc.get_coll_id(_db_id['id'], coll_name) except: return {'err': True, 'id': 0, 'exist': False} #Remove fields entries matching _c_id delete_collection_data(_c_id['id'], tbl='auto') delete_collection_data(_c_id['id'], tbl='human') delete_collection_data(_c_id['id'], tbl='records') try: lmfdb_db[inv.ALL_STRUC.coll_ids[inv.STR_NAME]].delete( {'_id': _c_id['id']}) except: pass
def is_valid_db_collection(db_name, collection_name): """Check if db and collection name (if not None) exist""" try: inv.setup_internal_client() db = inv.int_client[inv.ALL_STRUC.name] except Exception as e: raise ih.ConnectOrAuthFail("") return False try: db_id = idc.get_db_id(db, db_name) if not db_id['exist']: return False if collection_name: coll_id = idc.get_coll_id(db, db_id['id'], collection_name) if not coll_id['exist']: return False except Exception as e: inv.log_dest.error('Failed checking existence of '+db_name+' '+collection_name+' '+str(e)) return False return True
def update_scrape_progress(db_name, table_name, uid, complete=None, running=None): """Update progress of scrape from db/table names and uid """ try: db_id = idc.get_db_id(db_name) table_id = idc.get_table_id(table_name) rec_find = {'db_id': db_id, 'table_id': table_id, 'uid': uid} rec_set = {} if complete is not None: rec_set['complete'] = complete if running is not None: rec_set['running'] = running if rec_set: db.inv_ops.update(rec_find, rec_set) except Exception as e: inv.log_dest.error("Error updating progress " + str(e)) return False
def check_locks(resp): """Check if request pertains to locked coll or editing is locked globally """ inv.setup_internal_client() try: db = inv.int_client[inv.ALL_STRUC.name] except Exception: raise ih.ConnectOrAuthFail("") if get_lockout_state(): raise EditLockError('Global Edit Lock') try: db_name = resp['db'] coll_name = resp['collection'] db_id = idc.get_db_id(db, db_name) coll_id = idc.get_coll_id(db, db_id['id'], coll_name) if check_locked(db, coll_id['id']): raise EditLockError('Collection locked') except Exception as e: inv.log_dest.error("Error in locking " + str(e)) raise e
def is_valid_db_collection(db_name, collection_name): """Check if db and collection name (if not None) exist""" try: inv.setup_internal_client() db = inv.int_client[inv.ALL_STRUC.name] except Exception as e: raise ih.ConnectOrAuthFail("") return False try: db_id = idc.get_db_id(db, db_name) if not db_id['exist']: return False if collection_name: coll_id = idc.get_coll_id(db, db_id['id'], collection_name) if not coll_id['exist']: return False except Exception as e: inv.log_dest.error('Failed checking existence of ' + db_name + ' ' + collection_name + ' ' + str(e)) return False return True
def check_scrapes_on(spec): """If collection given, check for scrapes in progress or queued on it. If only db, check all collections in it""" try: got_client = inv.setup_internal_client(editor=True) assert (got_client == True) inv_db = inv.int_client[inv.get_inv_db_name()] except Exception as e: inv.log_dest.error("Error getting Db connection " + str(e)) return False try: db_id = idc.get_db_id(inv_db, spec['db']) spec_ids = {'db': db_id['id']} if spec['coll']: coll_id = idc.get_coll_id(inv_db, db_id['id'], spec['coll']) spec_ids['coll'] = coll_id['id'] result = check_if_scraping( inv_db, spec_ids) or check_if_scraping_queued(inv_db, spec_ids) return result except Exception as e: return False
def check_locks(resp): """Check if request pertains to locked coll or editing is locked globally """ inv.setup_internal_client() try: db = inv.int_client[inv.ALL_STRUC.name] except Exception: raise ih.ConnectOrAuthFail("") if get_lockout_state(): raise EditLockError('Global Edit Lock') try: db_name = resp['db'] coll_name = resp['collection'] db_id = idc.get_db_id(db, db_name) coll_id = idc.get_coll_id(db, db_id['id'], coll_name) if check_locked(db, coll_id['id']): raise EditLockError('Collection locked') except Exception as e: inv.log_dest.error("Error in locking "+str(e)) raise e
def register_scrape(db, coll, uid): """Create a suitable inventory entry for the scrape""" inv.log_dest.warning(db + ' ' + coll + ' ' + str(uid)) try: got_client = inv.setup_internal_client(editor=True) assert (got_client == True) inv_db = inv.int_client[inv.get_inv_db_name()] except Exception as e: inv.log_dest.error("Error getting Db connection " + str(e)) return {'err': True, 'inprog': False} try: db_id = idc.get_db_id(inv_db, db) inv.log_dest.warning(str(db_id)) db_id = db_id['id'] inprog = False had_err = False if not coll: all_colls = idc.get_all_colls(inv_db, db_id) for coll in all_colls: coll_id = coll['_id'] tmp = check_and_insert_scrape_record(inv_db, db_id, coll_id, uid) inprog = tmp['inprog'] and inprog had_err = tmp['err'] and had_err else: coll_id = idc.get_coll_id(inv_db, db_id, coll) inv.log_dest.warning(str(coll_id)) coll_id = coll_id['id'] tmp = check_and_insert_scrape_record(inv_db, db_id, coll_id, uid) inprog = tmp['inprog'] and inprog had_err = tmp['err'] and had_err except Exception as e: #Either failed to connect etc, or are already scraping inv.log_dest.warning('Error resistering scrape ' + str(e)) return {'err': True, 'inprog': False} return {'err': had_err, 'inprog': inprog}
def upload_scraped_inventory(structure_dat, uid): """Upload a json structure document and store any oprhans structure_dat -- JSON document containing all db/tables to upload uid -- UID string for uploading process """ inv.log_dest.info("_____________________________________________________________________________________________") n_dbs = len(structure_dat.keys()) progress_tracker = 0 for db_name in structure_dat: progress_tracker += 1 inv.log_dest.info("Uploading "+db_name+" ("+str(progress_tracker)+" of "+str(n_dbs)+')') invc.set_db(db_name, db_name) for table_name in structure_dat[db_name]: inv.log_dest.info(" Uploading table "+table_name) orphaned_keys = upload_table_structure(db_name, table_name, structure_dat, fresh=False) if len(orphaned_keys) != 0: db_id = invc.get_db_id(db_name) table_id = invc.get_table_id(table_name) ild.store_orphans(db_id, table_id, uid, orphaned_keys)
def null_all_scrapes(db, coll): """Update all scrapes on db.coll to be 'complete' """ try: got_client = inv.setup_internal_client(editor=True) assert(got_client == True) inv_db = inv.int_client[inv.get_inv_db_name()] except Exception as e: inv.log_dest.error("Error getting Db connection "+ str(e)) return False try: db_id = idc.get_db_id(inv_db, db) coll_id = idc.get_coll_id(inv_db, db_id['id'], coll) rec_find = {'db':db_id['id'], 'coll':coll_id['id']} rec_set = {} rec_set['complete'] = True rec_set['running'] = False inv_db['ops'].update_many(rec_find, {"$set":rec_set}) except Exception as e: inv.log_dest.error("Error updating progress "+ str(e)) return False
def delete_by_collection(inv_db, db_name, coll_name): """Remove collection entry and all its fields""" if not inv.validate_mongodb(inv_db): raise TypeError("db does not match Inventory structure") return try: _db_id = invc.get_db_id(inv_db, db_name) _c_id = invc.get_coll_id(inv_db, _db_id['id'], coll_name) except Exception as e: inv.log_dest.error("Error getting collection " + str(e)) return {'err':True, 'id':0, 'exist':False} #Remove fields entries matching _c_id delete_collection_data(inv_db, _c_id['id'], tbl='auto') delete_collection_data(inv_db, _c_id['id'], tbl='human') delete_collection_data(inv_db, _c_id['id'], tbl='records') try: inv_db[inv.ALL_STRUC.coll_ids[inv.STR_NAME]].remove({'_id':_c_id['id']}) except Exception as e: inv.log_dest.error("Error removing collection " + str(e))
def update_scrape_progress(db, coll, uid, complete=None, running=None): """Update progress of scrape from db/coll names and uid """ try: got_client = inv.setup_internal_client(editor=True) assert (got_client == True) inv_db = inv.int_client[inv.get_inv_db_name()] except Exception as e: inv.log_dest.error("Error getting Db connection " + str(e)) return False try: db_id = idc.get_db_id(inv_db, db) coll_id = idc.get_coll_id(inv_db, db_id['id'], coll) update_scrape_progress_helper(inv_db, db_id['id'], coll_id['id'], uid, complete=complete, running=running) except Exception as e: inv.log_dest.error("Error updating progress " + str(e)) return False
def get_nicename(db_name, collection_name): """Return the nice_name string for given db/coll pair""" try: inv.setup_internal_client() db = inv.int_client[inv.ALL_STRUC.name] except Exception as e: raise ih.ConnectOrAuthFail("") return None try: if collection_name: db_id = idc.get_db_id(db, db_name) coll_rec = idc.get_coll(db, db_id['id'], collection_name) nice_name = coll_rec['data']['nice_name'] else: db_rec = idc.get_db(db, db_name) #print db_rec nice_name = db_rec['data']['nice_name'] return nice_name except Exception as e: inv.log_dest.error('Failed to get nice name for '+db_name+' '+collection_name+' '+str(e)) #Can't return nice name so return None return None
def get_nicename(db_name, collection_name): """Return the nice_name string for given db/coll pair""" try: inv.setup_internal_client() db = inv.int_client[inv.ALL_STRUC.name] except Exception as e: raise ih.ConnectOrAuthFail("") return None try: if collection_name: db_id = idc.get_db_id(db, db_name) coll_rec = idc.get_coll(db, db_id['id'], collection_name) nice_name = coll_rec['data']['nice_name'] else: db_rec = idc.get_db(db, db_name) print db_rec nice_name = db_rec['data']['nice_name'] return nice_name except Exception as e: inv.log_dest.error('Failed to get nice name for ' + db_name + ' ' + collection_name + ' ' + str(e)) #Can't return nice name so return None return None
def delete_by_collection(inv_db, db_name, coll_name): """Remove collection entry and all its fields""" if not inv.validate_mongodb(inv_db): raise TypeError("db does not match Inventory structure") return try: _db_id = invc.get_db_id(inv_db, db_name) _c_id = invc.get_coll_id(inv_db, _db_id['id'], coll_name) except Exception as e: inv.log_dest.error("Error getting collection " + str(e)) return {'err': True, 'id': 0, 'exist': False} #Remove fields entries matching _c_id delete_collection_data(inv_db, _c_id['id'], tbl='auto') delete_collection_data(inv_db, _c_id['id'], tbl='human') delete_collection_data(inv_db, _c_id['id'], tbl='records') try: inv_db[inv.ALL_STRUC.coll_ids[inv.STR_NAME]].remove( {'_id': _c_id['id']}) except Exception as e: inv.log_dest.error("Error removing collection " + str(e))
def upload_scraped_inventory(db, structure_dat, uid): """Upload a json structure document and store any oprhans db -- LMFDB connection to inventory database structure_dat -- JSON document containing all db/collections to upload uid -- UID string for uploading process """ inv.log_dest.info("_____________________________________________________________________________________________") n_dbs = len(structure_dat.keys()) progress_tracker = 0 for db_name in structure_dat: progress_tracker += 1 inv.log_dest.info("Uploading " + db_name+" ("+str(progress_tracker)+" of "+str(n_dbs)+')') invc.set_db(db, db_name, db_name) for coll_name in structure_dat[db_name]: inv.log_dest.info(" Uploading collection "+coll_name) orphaned_keys = upload_collection_structure(db, db_name, coll_name, structure_dat, fresh=False) if len(orphaned_keys) != 0: db_id = invc.get_db_id(db, db_name) coll_id = invc.get_coll_id(db, db_id['id'], coll_name) ild.store_orphans(db, db_id['id'], coll_id['id'], uid, orphaned_keys)
def update_fields(diff, storeRollback=True): """Update a record from a diff object. diff -- should be a fully qualified difference, containing db, table names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example) e.g. {"db":"hgcwa","table":"hgcwa_passports","diffs":[{"item":"total_label","field":"type","content":"string"}]} If this is a record entry, then the 'item' field will be a record hash. storeRollback -- determine whether to store the undiff and diff to allow rollback of the change """ try: if diff['table'] is not None: inv.log_dest.info("Updating descriptions for " + diff["table"]) else: inv.log_dest.info("Updating descriptions for " + diff["db"]) db_id = idc.get_db_id(diff["db"]) rollback = None try: for change in diff["diffs"]: if ih.is_special_field(change["item"]): if storeRollback: rollback = capture_rollback(db_id, diff["db"], diff["table"], change) change["item"] = change["item"][2:-2] #Trim special fields. TODO this should be done better somehow updated = idc.update_table_data(db_id, diff["table"], change["item"], change["field"], change["content"]) elif ih.is_toplevel_field(change["item"]): #Here we have item == "toplevel", field the relevant field, and change the new value if storeRollback: rollback = capture_rollback(db_id, diff["db"], diff["table"], change) #Only nice_name is currently an option if(change["field"] not in ['nice_name', 'status']): updated = {'err':True} else: if(diff["table"]): if(change['field']) == 'nice_name': new_nice = change['content'] new_stat = None else: new_nice = None new_stat = ih.status_to_code(change['content']) table_id = idc.get_table_id(diff['table']) updated = idc.update_table(table_id, nice_name=new_nice, status=new_stat) else: #Is database nice_name updated = idc.update_db(db_id, nice_name=change["content"]) else: table_id = idc.get_table_id(diff["table"]) if storeRollback: rollback = capture_rollback(db_id, diff["db"], diff["table"], change, table_id = table_id) updated = idc.update_field(table_id, change["item"], change["field"], change["content"], type="human") if updated['err']: raise KeyError("Cannot update, item not present") else: if storeRollback: store_rollback(rollback) except Exception as e: inv.log_dest.error("Error applying diff "+ str(change)+' '+str(e)) raise UpdateFailed(str(e)) except Exception as e: inv.log_dest.error("Error updating fields "+ str(e))
def update_fields(diff, storeRollback=True): """Update a record from a diff object. diff -- should be a fully qualified difference, containing db, collection names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example) e.g. {"db":"curve_automorphisms","collection":"passports","diffs":[{"item":"total_label","field":"type","content":"string"}]} If this is a record entry, then the 'item' field will be a record hash. storeRollback -- determine whether to store the undiff and diff to allow rollback of the change """ try: got_client = inv.setup_internal_client(editor=True) assert(got_client == True) db = inv.int_client[inv.get_inv_db_name()] except Exception as e: inv.log_dest.error("Error getting Db connection "+ str(e)) return try: if diff['collection'] is not None: inv.log_dest.info("Updating descriptions for " + diff["db"]+'.'+diff["collection"]) else: inv.log_dest.info("Updating descriptions for " + diff["db"]) _id = idc.get_db_id(db, diff["db"]) rollback = None try: for change in diff["diffs"]: if ih.is_special_field(change["item"]): if storeRollback: rollback = capture_rollback(db, _id['id'], diff["db"], diff["collection"], change) change["item"] = change["item"][2:-2] #Trim special fields. TODO this should be done better somehow updated = idc.update_coll_data(db, _id['id'], diff["collection"], change["item"], change["field"], change["content"]) elif ih.is_toplevel_field(change["item"]): #Here we have item == "toplevel", field the relevant field, and change the new value if storeRollback: rollback = capture_rollback(db, _id['id'], diff["db"], diff["collection"], change) #Only nice_name is currently an option if(change["field"] not in ['nice_name', 'status']): updated = {'err':True} else: if(diff["collection"]): if(change['field']) == 'nice_name': new_nice = change['content'] new_stat = None else: new_nice = None new_stat = ih.status_to_code(change['content']) c_id = idc.get_coll_id(db, _id['id'], diff['collection']) updated = idc.update_coll(db, c_id['id'], nice_name=new_nice, status=new_stat) else: #Is database nice_name updated = idc.update_db(db, _id['id'], nice_name=change["content"]) else: _c_id = idc.get_coll_id(db, _id['id'], diff["collection"]) if storeRollback: rollback = capture_rollback(db, _id['id'], diff["db"], diff["collection"], change, coll_id = _c_id['id']) succeeded = False #if it looks like a record, try treating as one #If this fails try it as a field if ih.is_probable_record_hash(change['item']): updated = idc.update_record_description(db, _c_id['id'], {'hash':change["item"], change["field"]:change["content"]}) if updated['err'] == False: succeeded = True; if not succeeded: updated = idc.update_field(db, _c_id['id'], change["item"], change["field"], change["content"], type="human") if updated['err']: raise KeyError("Cannot update, item not present") else: if storeRollback: store_rollback(db, rollback) except Exception as e: inv.log_dest.error("Error applying diff "+ str(change)+' '+str(e)) raise UpdateFailed(str(e)) except Exception as e: inv.log_dest.error("Error updating fields "+ str(e))
def update_fields(diff, storeRollback=True): """Update a record from a diff object. diff -- should be a fully qualified difference, containing db, table names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example) e.g. {"db":"hgcwa","table":"hgcwa_passports","diffs":[{"item":"total_label","field":"type","content":"string"}]} If this is a record entry, then the 'item' field will be a record hash. storeRollback -- determine whether to store the undiff and diff to allow rollback of the change """ try: if diff['table'] is not None: inv.log_dest.info("Updating descriptions for " + diff["table"]) else: inv.log_dest.info("Updating descriptions for " + diff["db"]) db_id = idc.get_db_id(diff["db"]) rollback = None try: for change in diff["diffs"]: if ih.is_special_field(change["item"]): if storeRollback: rollback = capture_rollback(db_id, diff["db"], diff["table"], change) change["item"] = change["item"][ 2: -2] #Trim special fields. TODO this should be done better somehow updated = idc.update_table_data(db_id, diff["table"], change["item"], change["field"], change["content"]) elif ih.is_toplevel_field(change["item"]): #Here we have item == "toplevel", field the relevant field, and change the new value if storeRollback: rollback = capture_rollback(db_id, diff["db"], diff["table"], change) #Only nice_name is currently an option if (change["field"] not in ['nice_name', 'status']): updated = {'err': True} else: if (diff["table"]): if (change['field']) == 'nice_name': new_nice = change['content'] new_stat = None else: new_nice = None new_stat = ih.status_to_code(change['content']) table_id = idc.get_table_id(diff['table']) updated = idc.update_table(table_id, nice_name=new_nice, status=new_stat) else: #Is database nice_name updated = idc.update_db( db_id, nice_name=change["content"]) else: table_id = idc.get_table_id(diff["table"]) if storeRollback: rollback = capture_rollback(db_id, diff["db"], diff["table"], change, table_id=table_id) updated = idc.update_field(table_id, change["item"], change["field"], change["content"], type="human") if updated['err']: raise KeyError("Cannot update, item not present") else: if storeRollback: store_rollback(rollback) except Exception as e: inv.log_dest.error("Error applying diff " + str(change) + ' ' + str(e)) raise UpdateFailed(str(e)) except Exception as e: inv.log_dest.error("Error updating fields " + str(e))
def update_fields(diff, storeRollback=True): """Update a record from a diff object. diff -- should be a fully qualified difference, containing db, collection names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example) e.g. {"db":"curve_automorphisms","collection":"passports","diffs":[{"item":"total_label","field":"type","content":"string"}]} If this is a record entry, then the 'item' field will be a record hash. storeRollback -- determine whether to store the undiff and diff to allow rollback of the change """ try: _id = idc.get_db_id(diff["db"]) rollback = None storeRollback = False try: for change in diff["diffs"]: if ih.is_special_field(change["item"]): if storeRollback: rollback = capture_rollback(_id['id'], diff["db"], diff["collection"], change) change["item"] = change["item"][2:-2] #Trim special fields. TODO this should be done better somehow updated = idc.update_coll_data(_id['id'], diff["collection"], change["item"], change["field"], change["content"]) elif ih.is_toplevel_field(change["item"]): #Here we have item == "toplevel", field the relevant field, and change the new value if storeRollback: rollback = capture_rollback(_id['id'], diff["db"], diff["collection"], change) #Only nice_name is currently an option print(change['field']) if(change["field"] not in ['nice_name', 'status']): updated = {'err':True} else: if(diff["collection"]): if(change['field']) == 'nice_name': new_nice = change['content'] new_stat = None else: new_nice = None new_stat = ih.status_to_code(change['content']) c_id = idc.get_coll_id(_id['id'], diff['collection']) updated = idc.update_coll(c_id['id'], nice_name=new_nice, status=new_stat) else: #Is database nice_name print(_id) updated = idc.update_db(_id['id'], nice_name=change["content"]) else: _c_id = idc.get_coll_id(_id['id'], diff["collection"]) if storeRollback: rollback = capture_rollback(_id['id'], diff["db"], diff["collection"], change, coll_id = _c_id['id']) succeeded = False #if it looks like a record, try treating as one #If this fails try it as a field if ih.is_probable_record_hash(change['item']): updated = idc.update_record_description(_c_id['id'], {'hash':change["item"], change["field"]:change["content"]}) if updated['err'] == False: succeeded = True; if not succeeded: updated = idc.update_field(_c_id['id'], change["item"], change["field"], change["content"], type="human") if updated['err']: raise KeyError("Cannot update, item not present") else: if storeRollback: store_rollback(rollback) except Exception as e: raise UpdateFailed(str(e)) except Exception as e: #inv.log_dest.error("Error updating fields "+ str(e)) pass
def upload_table_structure(db_name, table_name, structure_dat, fresh=False): """Upload the structure description for a single table Any entered descriptions for keys which still exist are preserved. Removed or renamed keys will be returned for handling Table entry is created if it doesn't exist, in which case Notes and Info are filled with dummies db_name -- Name of database (must exist) table_name -- Name of table to upload structure_dat -- lmfdb db structure as json object """ dummy_info = {} #Dummy per table info, containing basic fields we want included for field in inv.info_editable_fields: dummy_info[field] = None try: table_entry = structure_dat[table_name] db_entry = invc.get_db_id(db_name) if db_entry is None: #All dbs should have been added from the struc: if not is error inv.log_dest.error("ERROR: No inventory DB entry "+ db_name) inv.log_dest.error("Cannot add descriptions") return [] table_id = invc.get_table_id(table_name) if table_id is None: #Table doesn't exist, create it table_id = invc.set_table(db_entry, table_name, table_name, None, dummy_info, 0) else: #Delete existing auto-table entries (no table => no entries) delete_table_data(table_id, tbl='auto') try: scrape_date = datetime.datetime.strptime(structure_dat[db_name][table_name]['scrape_date'], '%Y-%m-%d %H:%M:%S.%f') except Exception as e: inv.log_dest.info("Scrape date parsing failed "+str(e)) scrape_date = datetime.datetime.min invc.set_table_scrape_date(table_id, scrape_date) except Exception as e: inv.log_dest.error("Failed to refresh table (db, table or scrape data) "+str(e)) try: for field in table_entry['fields']: inv.log_dest.info(" Processing "+field) invc.set_field(table_id, field, table_entry['fields'][field]) for record in table_entry['records']: inv.log_dest.info(" Processing record "+str(record)) invc.set_record(table_id, table_entry['records'][record]) #Cleanup any records which no longer exist invc.cleanup_records(db, table_id, table_entry['records']) inv.log_dest.info(" Processing indices") #FIXME upload_indices(db, table_id, table_entry['indices']) except Exception as e: inv.log_dest.error("Failed to refresh table entries "+str(e)) orphaned_keys = [] if not fresh: try: #Trim any human table keys which are now redundant orphaned_keys = invc.trim_human_table(db_entry, table_id) except Exception as e: inv.log_dest.error("Failed trimming table "+str(e)) #Ensure everything mandatory is present in human table try: invc.complete_human_table(db_entry, table_id) except Exception as e: inv.log_dest.error("Failed padding table "+str(e)) return orphaned_keys
def upload_collection_structure(db_name, coll_name, structure_dat, fresh=False): """Upload the structure description for a single collection Any entered descriptions for keys which still exist are preserved. Removed or renamed keys will be returned for handling Collection is entry is created if it doesn't exist, in which case Notes and Info are filled with dummies db -- LMFDB connection to inventory database db_name -- Name of database this collection is in (MUST exist) coll_name -- Name of collection to upload structure_dat -- lmfdb db structure as json object """ dummy_info = { } #Dummy per collection info, containing basic fields we want included for field in inv.info_editable_fields: dummy_info[field] = None try: coll_entry = structure_dat[db_name][coll_name] db_entry = invc.get_db_id(db_name) if not db_entry['exist']: #All dbs should have been added from the struc: if not is error return #Inventory data migration includes db name in collection name for some reason #Work around until we can fix the data full_coll_name = db_name + '_' + coll_name _c_id = invc.get_coll_id(db_entry['id'], full_coll_name) if not _c_id['exist']: #Collection doesn't exist, create it _c_id = invc.set_coll(db_entry['id'], full_coll_name, full_coll_name, {'description': None}, dummy_info, 0) else: #Delete existing auto-table entries (no collection => no entries) delete_collection_data(_c_id['id'], tbl='auto') try: scrape_date = datetime.datetime.strptime( structure_dat[db_name][coll_name]['scrape_date'], '%Y-%m-%d %H:%M:%S.%f') except: scrape_date = datetime.datetime.min invc.set_coll_scrape_date(_c_id['id'], scrape_date) except: pass try: for field in coll_entry['fields']: invc.set_field(_c_id['id'], field, coll_entry['fields'][field]) #Add any keys needed to human_table invc.create_field(_c_id['id'], field, 'human') except: pass orphaned_keys = [] if not fresh: try: #Trim any human table keys which are now redundant orphaned_keys = invc.trim_human_table(db_entry['id'], _c_id['id']) except: pass return orphaned_keys