def mongo_del(app, args): """ get a mongodb record based on id """ if args.core: MONGO = get_mongo_core_db(app) else: MONGO = get_mongo_transient_db(app) mongo_id = args.mongo_id MONGO.remove({'_id': mongo_id})
def mongo_remove_key(app, args): lg.warning("removing %s from the %s db", args.key, args.database) if args.database == 'core': COLLECTION = get_mongo_core_db(app) elif args.databse == 'transient': COLLECTION = get_mongo_transient_db(app) print(COLLECTION) query = {args.key: {'$exists': True}} update = {"$unset": {args.key: ""}} print(query) print(update) COLLECTION.update(query, update, multi=True)
def mongo_index(app, args): """ Ensure indexes on the relevant fields """ MONGO_trans = get_mongo_transient_db(app) MONGO_core = get_mongo_core_db(app) core_index =app.conf['plugin.mongo.indici.core'] trans_index =app.conf['plugin.mongo.indici.transient'] for db, flds in [(MONGO_trans, trans_index), (MONGO_core, core_index)]: for k, v in list(flds.items()): assert v==1 db.ensure_index(k)
def mongo_get(app, args): """ get a mongodb record based on id """ if args.core: collection = get_mongo_core_db(app) else: collection = get_mongo_transient_db(app) rec = collection.find_one({'_id': args.mongo_id[:24]}) if not rec: return print(yaml.safe_dump(rec, default_flow_style=False))
def mongo_index(app, args): """ Ensure indexes on the relevant fields """ MONGO_transient = get_mongo_transient_db(app) MONGO_core = get_mongo_core_db(app) MONGO_transact, MONGO_sha1sum2transact = get_mongo_transact_db(app) core_index = app.conf['plugin.mongo.indici.core'] transient_index = app.conf['plugin.mongo.indici.transient'] transact_index = app.conf['plugin.mongo.indici.transact'] sha2tra_index = app.conf['plugin.mongo.indici.sha1sum2transact'] for db, flds in [(MONGO_transient, transient_index), (MONGO_core, core_index), (MONGO_transact, transact_index), (MONGO_sha1sum2transact, sha2tra_index)]: for k, v in list(flds.items()): print(db, k, v) assert v == 1 db.ensure_index(k)
def mongo_index(app, args): """ Ensure indexes on the relevant fields """ MONGO_transient = get_mongo_transient_db(app) MONGO_core = get_mongo_core_db(app) MONGO_transact, MONGO_sha1sum2transact = get_mongo_transact_db(app) core_index =app.conf['plugin.mongo.indici.core'] transient_index =app.conf['plugin.mongo.indici.transient'] transact_index =app.conf['plugin.mongo.indici.transact'] sha2tra_index =app.conf['plugin.mongo.indici.sha1sum2transact'] for db, flds in [(MONGO_transient, transient_index), (MONGO_core, core_index), (MONGO_transact, transact_index), (MONGO_sha1sum2transact, sha2tra_index)]: for k, v in list(flds.items()): print(db, k, v) assert v==1 db.ensure_index(k)
def _get_mongo_keys(app, collection, force=False): from bson.code import Code mapper = Code(""" function() { for (var key in this) { emit(key, 1); } } """) rv = {} reducer = Code("function(key, vals) { return Array.sum(vals); }") if collection == 'transient': message("Get keys from the transient db") COLLECTION = get_mongo_transient_db(app) else: message("Get keys from the core db") COLLECTION = get_mongo_core_db(app) res = COLLECTION.map_reduce(mapper, reducer, "my_collection" + "_keys") for r in res.find(): rv[r['_id']] = int(r['value']) return rv
def forget(app, args): MONGO = get_mongo_transient_db(app) MONGO_CORE = get_mongo_core_db(app) to_remove = [] to_remove_core = [] def go(coll, lst): coll.remove({'_id': {'$in': lst}}) for madfile in get_all_mad_files(app, args): to_remove.append(madfile['_id_transient']) if args.remove_from_core: to_remove_core.append(['_id']) if len(to_remove) > 100: go(MONGO, to_remove) to_remove = [] if len(to_remove_core) > 100: go(MONGO_CORE, to_remove_core) to_remove_core = [] go(MONGO, to_remove) go(MONGO_CORE, to_remove_core)
def madfile_init(app, madfile): """ Initialize this madfile - mainly - check if the mongo transient database knows about this file, and has the SHA1SUM. The SHA1SUM is then used to get the data from the core database """ global COUNTER COUNTER['init'] += 1 trans_db = get_mongo_transient_db(app) core_db = get_mongo_core_db(app) trans_id = get_mongo_transient_id(madfile) rec = trans_db.find_one({'_id': trans_id}) nowtime = datetime.datetime.utcnow() mtime = madfile.get('mtime') sha1sum = None sha1sum_time = None #lg.setLevel(logging.DEBUG) if isinstance(rec, dict): sha1sum = rec.get('sha1sum') sha1sum_time = rec.get('sha1sum_time') def _prep_madfile(_madfile, sha1, sha1_time): _madfile.all['_id_core'] = sha1[:24] _madfile.all['sha1sum'] = sha1 _madfile.mad['sha1sum'] = sha1 _madfile.all['sha1sum_time'] = sha1_time def _create_new_sha1(_madfile): #also not in the sha1sum file - recalculate lg.debug("recreate shasum for %s", _madfile['inputfile']) COUNTER['calc'] += 1 sha1 = mad2.hash.get_sha1(_madfile['fullpath']) sha1_time = datetime.datetime.utcnow() if sha1 is None: #still not?? maybe the file does not exist? Link is broken?? Will not save this return False lg.debug("shasum for %s (%s) is %s", _madfile['inputfile'], trans_id, sha1) # trans_db.update({'_id': trans_id}, # {"$set": {'sha1sum': sha1, # 'sha1sum_time': nowtime}}, # upsert=True) _prep_madfile(madfile, sha1, sha1_time) return sha1 if sha1sum is None or not (isinstance(sha1sum_time, datetime.datetime)): # no shasum - recreate _create_new_sha1(madfile) elif sha1sum_time is None or mtime is None or mtime > sha1sum_time: # changed sha1sum? old_sha1sum = sha1sum new_sha1sum = _create_new_sha1(madfile) if old_sha1sum == new_sha1sum: COUNTER['unchanged'] += 1 else: #record has changed - copy the core data from the old to the #new record. old_core_id = old_sha1sum[:24] new_core_id = new_sha1sum[:24] lg.info("file changed: %s", madfile['inputfile'][-30:]) lg.debug("coreid %s -> %s", old_core_id, new_core_id) #prepare record old_core_record = core_db.find_one({'_id': old_core_id}) if not old_core_record: old_core_record = {} if not 'old_sha1sums' in old_core_record: old_core_record['old_sha1sums'] = [] old_core_record['old_sha1sums'].append(old_sha1sum) old_core_record['sha1sum'] = new_sha1sum if '_id' in old_core_record: del old_core_record['_id'] #store in core database core_db.update({'_id': new_sha1sum[:24]}, {"$set": old_core_record}, upsert=True) madfile.mad.update(old_core_record) save_to_mongo(app, madfile) COUNTER['changed'] += 1 else: _prep_madfile(madfile, sha1sum, sha1sum_time)
def madfile_init(app, madfile): """ Initialize this madfile - mainly - check if the mongo transient database knows about this file, and has the SHA1SUM. The SHA1SUM is then used to get the data from the core database """ global COUNTER COUNTER['init'] += 1 trans_db = get_mongo_transient_db(app) core_db = get_mongo_core_db(app) trans_id = get_mongo_transient_id(madfile) rec = trans_db.find_one({'_id': trans_id}) nowtime = datetime.utcnow() mtime = madfile.get('mtime') sha1sum = None sha1sum_time = None #lg.setLevel(logging.DEBUG) if isinstance(rec, dict): sha1sum = rec.get('sha1sum') sha1sum_time = rec.get('sha1sum_time') def _prep_madfile(_madfile, sha1, sha1_time): _madfile.all['_id_core'] = sha1[:24] _madfile.all['sha1sum'] = sha1 _madfile.mad['sha1sum'] = sha1 _madfile.all['sha1sum_time'] = sha1_time def _create_new_sha1(_madfile): # TODO: temporary hack - see if we can get the data from the # SHA1SUM files. sha1, sha1_time = mad2.hash.check_sha1sum_file(_madfile['fullpath']) if sha1 is not None and arrow.get(mtime).to('local') <= sha1_time: COUNTER['shafile'] += 1 lg.info("recoved sha1 from the SHA1SUM file") else: #also not in the sha1sum file - recalculate lg.debug("recreate shasum for %s", _madfile['inputfile']) COUNTER['calc'] += 1 sha1 = mad2.hash.get_sha1(_madfile['fullpath']) sha1_time = datetime.utcnow() if sha1 is None: #still not?? maybe the file does not exist? Link is broken?? Will not save this return False lg.info("shasum for %s (%s) is %s", _madfile['inputfile'], trans_id, sha1) trans_db.update({'_id': trans_id}, {"$set": {'sha1sum': sha1, 'sha1sum_time': nowtime}}, upsert=True) _prep_madfile(madfile, sha1, sha1_time) return sha1 if sha1sum is None or not(isinstance(sha1sum_time, datetime)): # no shasum - recreate _create_new_sha1(madfile) elif sha1sum_time is None or mtime is None or mtime > sha1sum_time: # changed sha1sum? old_sha1sum = sha1sum new_sha1sum = _create_new_sha1(madfile) if old_sha1sum == new_sha1sum: COUNTER['unchanged'] += 1 else: #record has changed - copy the core data from the old to the #new record. old_core_id = old_sha1sum[:24] new_core_id = new_sha1sum[:24] lg.info("file changed: %s", madfile['inputfile'][-30:]) lg.debug("coreid %s -> %s", old_core_id, new_core_id) #prepare record old_core_record = core_db.find_one({'_id': old_core_id}) if not old_core_record: old_core_record = {} if not 'old_sha1sums' in old_core_record: old_core_record['old_sha1sums'] = [] old_core_record['old_sha1sums'].append(old_sha1sum) old_core_record['sha1sum'] = new_sha1sum if '_id' in old_core_record: del old_core_record['_id'] #store in core database core_db.update({'_id': new_sha1sum[:24]}, {"$set": old_core_record}, upsert=True) madfile.mad.update(old_core_record) save_to_mongo(app, madfile) COUNTER['changed'] += 1 else: _prep_madfile(madfile, sha1sum, sha1sum_time)