Exemplo n.º 1
0
def placeholders_for_collections():
    log.info('Create placeholders for colelctions')
    COLLECTIONS_PREFIXES = [('projects', 'files'),
                            ('acquisitions', 'files'),
                            ('analyses', 'files'),
                            ('sessions', 'files'),
                            ('sessions', 'subject.files'),
                            ('collections', 'files')]

    _files = []

    for collection, prefix in COLLECTIONS_PREFIXES:
        cursor = config.db.get_collection(collection).find({})
        for document in cursor:
            for f in get_files_by_prefix(document, prefix):
                f_dict = {
                    'collection_id': document.get('_id'),
                    'collection': collection,
                    'fileinfo': f,
                    'prefix': prefix
                }
                _files.append(f_dict)

    base = config.get_item('persistent', 'data_path')
    for i, f in enumerate(_files):
        f_path = os.path.join(base, util.path_from_hash(f['fileinfo']['hash']))
        create_placeholder_file(f_path, f['fileinfo']['size'])

        # Show progress
        if i % (len(_files) / 10 + 1) == 0:
            log.info('Processed %s files of total %s files ...' % (i, len(_files)))
Exemplo n.º 2
0
def users(args):
    now = datetime.datetime.utcnow()
    with open(args.json) as json_dump:
        input_data = json.load(json_dump)
    log.info('bootstrapping users...')
    with requests.Session() as rs:
        rs.params = {'d': '404'}
        for u in input_data.get('users', []):
            log.info('    ' + u['_id'])
            u['created'] = now
            u['modified'] = now
            u.setdefault('email', u['_id'])
            u.setdefault('preferences', {})
            gravatar = 'https://gravatar.com/avatar/' + hashlib.md5(u['email']).hexdigest() + '?s=512'
            if rs.head(gravatar):
                u.setdefault('avatar', gravatar)
            u.setdefault('avatars', {})
            u['avatars'].setdefault('gravatar', gravatar)
            config.db.users.update_one({'_id': u['_id']}, {'$setOnInsert': u}, upsert=True)
    log.info('bootstrapping groups...')
    site_id = config.get_item('site', 'id')
    for g in input_data.get('groups', []):
        log.info('    ' + g['_id'])
        g['created'] = now
        g['modified'] = now
        for r in g['roles']:
            r.setdefault('site', site_id)
        config.db.groups.update_one({'_id': g['_id']}, {'$setOnInsert': g}, upsert=True)
    log.info('bootstrapping drones...')
    for d in input_data.get('drones', []):
        log.info('    ' + d['_id'])
        d['created'] = now
        d['modified'] = now
        config.db.drones.update_one({'_id': d['_id']}, {'$setOnInsert': d}, upsert=True)
    log.info('bootstrapping complete')
Exemplo n.º 3
0
def data(args):
    log.info('inspecting %s' % args.path)
    files = []
    for dirpath, dirnames, filenames in os.walk(args.path):
        for filepath in [
                os.path.join(dirpath, fn) for fn in filenames
                if not fn.startswith('.')
        ]:
            if not os.path.islink(filepath) and filepath.endswith('.zip'):
                files.append(filepath)
        dirnames[:] = [
            dn for dn in dirnames if not dn.startswith('.')
        ]  # need to use slice assignment to influence walk behavior
    file_cnt = len(files)
    log.info('found %d files to sort (ignoring symlinks and dotfiles)' %
             file_cnt)
    for i, filepath in enumerate(files):
        log.info('Loading     %s [%s] (%d/%d)' %
                 (os.path.basename(filepath),
                  util.hrsize(os.path.getsize(filepath)), i + 1, file_cnt))
        hash_ = hashlib.sha384()
        size = os.path.getsize(filepath)
        try:
            metadata = json.loads(zipfile.ZipFile(filepath).comment)
        except ValueError as e:
            log.warning(str(e))
            continue
        container = reaperutil.create_container_hierarchy(metadata)
        with open(filepath, 'rb') as fd:
            for chunk in iter(lambda: fd.read(2**20), ''):
                hash_.update(chunk)
        computed_hash = 'v0-sha384-' + hash_.hexdigest()
        destpath = os.path.join(config.get_item('persistent', 'data_path'),
                                util.path_from_hash(computed_hash))
        dir_destpath = os.path.dirname(destpath)
        filename = os.path.basename(filepath)
        if not os.path.exists(dir_destpath):
            os.makedirs(dir_destpath)
        if args.copy:
            shutil.copyfile(filepath, destpath)
        else:
            shutil.move(filepath, destpath)
        created = modified = datetime.datetime.utcnow()
        fileinfo = {
            'name': filename,
            'size': size,
            'hash': computed_hash,
            'type': 'dicom',  # we are only bootstrapping dicoms at the moment
            'created': created,
            'modified': modified
        }
        container.add_file(fileinfo)
        rules.create_jobs(config.db, container.acquisition, 'acquisition',
                          fileinfo)
Exemplo n.º 4
0
def data(args):
    log.info('inspecting %s' % args.path)
    files = []
    for dirpath, dirnames, filenames in os.walk(args.path):
        for filepath in [os.path.join(dirpath, fn) for fn in filenames if not fn.startswith('.')]:
            if not os.path.islink(filepath) and filepath.endswith('.zip'):
                files.append(filepath)
        dirnames[:] = [dn for dn in dirnames if not dn.startswith('.')] # need to use slice assignment to influence walk behavior
    file_cnt = len(files)
    log.info('found %d files to sort (ignoring symlinks and dotfiles)' % file_cnt)
    for i, filepath in enumerate(files):
        log.info('Loading     %s [%s] (%d/%d)' % (os.path.basename(filepath), util.hrsize(os.path.getsize(filepath)), i+1, file_cnt))
        hash_ = hashlib.sha384()
        size = os.path.getsize(filepath)
        try:
            metadata = json.loads(zipfile.ZipFile(filepath).comment)
        except ValueError as e:
            log.warning(str(e))
            continue
        container = reaperutil.create_container_hierarchy(metadata)
        with open(filepath, 'rb') as fd:
            for chunk in iter(lambda: fd.read(2**20), ''):
                hash_.update(chunk)
        computed_hash = 'v0-sha384-' + hash_.hexdigest()
        destpath = os.path.join(config.get_item('persistent', 'data_path'), util.path_from_hash(computed_hash))
        dir_destpath = os.path.dirname(destpath)
        filename = os.path.basename(filepath)
        if not os.path.exists(dir_destpath):
            os.makedirs(dir_destpath)
        if args.copy:
            shutil.copyfile(filepath, destpath)
        else:
            shutil.move(filepath, destpath)
        created = modified = datetime.datetime.utcnow()
        fileinfo = {
            'name': filename,
            'size': size,
            'hash': computed_hash,
            'type': 'dicom', # we are only bootstrapping dicoms at the moment
            'created': created,
            'modified': modified
        }
        container.add_file(fileinfo)
        rules.create_jobs(config.db, container.acquisition, 'acquisition', fileinfo)
Exemplo n.º 5
0
def placeholders_for_gears():
    log.info('Create placeholders for gears')
    cursor = config.db.get_collection('gears').find({})
    _files = []
    for document in cursor:
        if document['exchange']['git-commit'] == 'local':
            f_dict = {
                'gear_id': document['_id'],
                'gear_name': document['gear']['name'],
                'exchange': document['exchange']
            }
            _files.append(f_dict)

    base = config.get_item('persistent', 'data_path')
    for i, f in enumerate(_files):
        f_hash = 'v0-' + f['exchange']['rootfs-hash'].replace(':', '-')
        f_path = os.path.join(base, util.path_from_hash(f_hash))
        create_placeholder_file(f_path, f['gear_name'])

        # Show progress
        if i % (len(_files) / 10 + 1) == 0:
            log.info('Processed %s gear files of total %s files ...' % (i, len(_files)))
Exemplo n.º 6
0
def users(args):
    now = datetime.datetime.utcnow()
    with open(args.json) as json_dump:
        input_data = json.load(json_dump)
    log.info('bootstrapping users...')
    with requests.Session() as rs:
        rs.params = {'d': '404'}
        for u in input_data.get('users', []):
            log.info('    ' + u['_id'])
            u['created'] = now
            u['modified'] = now
            u.setdefault('email', u['_id'])
            u.setdefault('preferences', {})
            gravatar = 'https://gravatar.com/avatar/' + hashlib.md5(
                u['email']).hexdigest() + '?s=512'
            if rs.head(gravatar):
                u.setdefault('avatar', gravatar)
            u.setdefault('avatars', {})
            u['avatars'].setdefault('gravatar', gravatar)
            config.db.users.update_one({'_id': u['_id']}, {'$setOnInsert': u},
                                       upsert=True)
    log.info('bootstrapping groups...')
    site_id = config.get_item('site', 'id')
    for g in input_data.get('groups', []):
        log.info('    ' + g['_id'])
        g['created'] = now
        g['modified'] = now
        for r in g['roles']:
            r.setdefault('site', site_id)
        config.db.groups.update_one({'_id': g['_id']}, {'$setOnInsert': g},
                                    upsert=True)
    log.info('bootstrapping drones...')
    for d in input_data.get('drones', []):
        log.info('    ' + d['_id'])
        d['created'] = now
        d['modified'] = now
        config.db.drones.update_one({'_id': d['_id']}, {'$setOnInsert': d},
                                    upsert=True)
    log.info('bootstrapping complete')