def placeholders_for_collections(): log.info('Create placeholders for colelctions') COLLECTIONS_PREFIXES = [('projects', 'files'), ('acquisitions', 'files'), ('analyses', 'files'), ('sessions', 'files'), ('sessions', 'subject.files'), ('collections', 'files')] _files = [] for collection, prefix in COLLECTIONS_PREFIXES: cursor = config.db.get_collection(collection).find({}) for document in cursor: for f in get_files_by_prefix(document, prefix): f_dict = { 'collection_id': document.get('_id'), 'collection': collection, 'fileinfo': f, 'prefix': prefix } _files.append(f_dict) base = config.get_item('persistent', 'data_path') for i, f in enumerate(_files): f_path = os.path.join(base, util.path_from_hash(f['fileinfo']['hash'])) create_placeholder_file(f_path, f['fileinfo']['size']) # Show progress if i % (len(_files) / 10 + 1) == 0: log.info('Processed %s files of total %s files ...' % (i, len(_files)))
def users(args): now = datetime.datetime.utcnow() with open(args.json) as json_dump: input_data = json.load(json_dump) log.info('bootstrapping users...') with requests.Session() as rs: rs.params = {'d': '404'} for u in input_data.get('users', []): log.info(' ' + u['_id']) u['created'] = now u['modified'] = now u.setdefault('email', u['_id']) u.setdefault('preferences', {}) gravatar = 'https://gravatar.com/avatar/' + hashlib.md5(u['email']).hexdigest() + '?s=512' if rs.head(gravatar): u.setdefault('avatar', gravatar) u.setdefault('avatars', {}) u['avatars'].setdefault('gravatar', gravatar) config.db.users.update_one({'_id': u['_id']}, {'$setOnInsert': u}, upsert=True) log.info('bootstrapping groups...') site_id = config.get_item('site', 'id') for g in input_data.get('groups', []): log.info(' ' + g['_id']) g['created'] = now g['modified'] = now for r in g['roles']: r.setdefault('site', site_id) config.db.groups.update_one({'_id': g['_id']}, {'$setOnInsert': g}, upsert=True) log.info('bootstrapping drones...') for d in input_data.get('drones', []): log.info(' ' + d['_id']) d['created'] = now d['modified'] = now config.db.drones.update_one({'_id': d['_id']}, {'$setOnInsert': d}, upsert=True) log.info('bootstrapping complete')
def data(args): log.info('inspecting %s' % args.path) files = [] for dirpath, dirnames, filenames in os.walk(args.path): for filepath in [ os.path.join(dirpath, fn) for fn in filenames if not fn.startswith('.') ]: if not os.path.islink(filepath) and filepath.endswith('.zip'): files.append(filepath) dirnames[:] = [ dn for dn in dirnames if not dn.startswith('.') ] # need to use slice assignment to influence walk behavior file_cnt = len(files) log.info('found %d files to sort (ignoring symlinks and dotfiles)' % file_cnt) for i, filepath in enumerate(files): log.info('Loading %s [%s] (%d/%d)' % (os.path.basename(filepath), util.hrsize(os.path.getsize(filepath)), i + 1, file_cnt)) hash_ = hashlib.sha384() size = os.path.getsize(filepath) try: metadata = json.loads(zipfile.ZipFile(filepath).comment) except ValueError as e: log.warning(str(e)) continue container = reaperutil.create_container_hierarchy(metadata) with open(filepath, 'rb') as fd: for chunk in iter(lambda: fd.read(2**20), ''): hash_.update(chunk) computed_hash = 'v0-sha384-' + hash_.hexdigest() destpath = os.path.join(config.get_item('persistent', 'data_path'), util.path_from_hash(computed_hash)) dir_destpath = os.path.dirname(destpath) filename = os.path.basename(filepath) if not os.path.exists(dir_destpath): os.makedirs(dir_destpath) if args.copy: shutil.copyfile(filepath, destpath) else: shutil.move(filepath, destpath) created = modified = datetime.datetime.utcnow() fileinfo = { 'name': filename, 'size': size, 'hash': computed_hash, 'type': 'dicom', # we are only bootstrapping dicoms at the moment 'created': created, 'modified': modified } container.add_file(fileinfo) rules.create_jobs(config.db, container.acquisition, 'acquisition', fileinfo)
def data(args): log.info('inspecting %s' % args.path) files = [] for dirpath, dirnames, filenames in os.walk(args.path): for filepath in [os.path.join(dirpath, fn) for fn in filenames if not fn.startswith('.')]: if not os.path.islink(filepath) and filepath.endswith('.zip'): files.append(filepath) dirnames[:] = [dn for dn in dirnames if not dn.startswith('.')] # need to use slice assignment to influence walk behavior file_cnt = len(files) log.info('found %d files to sort (ignoring symlinks and dotfiles)' % file_cnt) for i, filepath in enumerate(files): log.info('Loading %s [%s] (%d/%d)' % (os.path.basename(filepath), util.hrsize(os.path.getsize(filepath)), i+1, file_cnt)) hash_ = hashlib.sha384() size = os.path.getsize(filepath) try: metadata = json.loads(zipfile.ZipFile(filepath).comment) except ValueError as e: log.warning(str(e)) continue container = reaperutil.create_container_hierarchy(metadata) with open(filepath, 'rb') as fd: for chunk in iter(lambda: fd.read(2**20), ''): hash_.update(chunk) computed_hash = 'v0-sha384-' + hash_.hexdigest() destpath = os.path.join(config.get_item('persistent', 'data_path'), util.path_from_hash(computed_hash)) dir_destpath = os.path.dirname(destpath) filename = os.path.basename(filepath) if not os.path.exists(dir_destpath): os.makedirs(dir_destpath) if args.copy: shutil.copyfile(filepath, destpath) else: shutil.move(filepath, destpath) created = modified = datetime.datetime.utcnow() fileinfo = { 'name': filename, 'size': size, 'hash': computed_hash, 'type': 'dicom', # we are only bootstrapping dicoms at the moment 'created': created, 'modified': modified } container.add_file(fileinfo) rules.create_jobs(config.db, container.acquisition, 'acquisition', fileinfo)
def placeholders_for_gears(): log.info('Create placeholders for gears') cursor = config.db.get_collection('gears').find({}) _files = [] for document in cursor: if document['exchange']['git-commit'] == 'local': f_dict = { 'gear_id': document['_id'], 'gear_name': document['gear']['name'], 'exchange': document['exchange'] } _files.append(f_dict) base = config.get_item('persistent', 'data_path') for i, f in enumerate(_files): f_hash = 'v0-' + f['exchange']['rootfs-hash'].replace(':', '-') f_path = os.path.join(base, util.path_from_hash(f_hash)) create_placeholder_file(f_path, f['gear_name']) # Show progress if i % (len(_files) / 10 + 1) == 0: log.info('Processed %s gear files of total %s files ...' % (i, len(_files)))
def users(args): now = datetime.datetime.utcnow() with open(args.json) as json_dump: input_data = json.load(json_dump) log.info('bootstrapping users...') with requests.Session() as rs: rs.params = {'d': '404'} for u in input_data.get('users', []): log.info(' ' + u['_id']) u['created'] = now u['modified'] = now u.setdefault('email', u['_id']) u.setdefault('preferences', {}) gravatar = 'https://gravatar.com/avatar/' + hashlib.md5( u['email']).hexdigest() + '?s=512' if rs.head(gravatar): u.setdefault('avatar', gravatar) u.setdefault('avatars', {}) u['avatars'].setdefault('gravatar', gravatar) config.db.users.update_one({'_id': u['_id']}, {'$setOnInsert': u}, upsert=True) log.info('bootstrapping groups...') site_id = config.get_item('site', 'id') for g in input_data.get('groups', []): log.info(' ' + g['_id']) g['created'] = now g['modified'] = now for r in g['roles']: r.setdefault('site', site_id) config.db.groups.update_one({'_id': g['_id']}, {'$setOnInsert': g}, upsert=True) log.info('bootstrapping drones...') for d in input_data.get('drones', []): log.info(' ' + d['_id']) d['created'] = now d['modified'] = now config.db.drones.update_one({'_id': d['_id']}, {'$setOnInsert': d}, upsert=True) log.info('bootstrapping complete')