def create_bucket(storage): """Create s3 storage bucket.""" storages = [storage] if storage else config.STORAGE_BACKENDS for key in storages: try: storage_instance = get_storage(key) except RuntimeError: if storage: click.echo(f'Storage {key} does not exist') sys.exit(1) continue if isinstance(storage_instance, ReadOnlyStorageMixin): if storage: click.echo(f'Storage {key} is read-only') sys.exit(1) continue if isinstance(storage_instance, S3StorageBase): bucket_name = storage_instance._get_current_bucket_name() if storage_instance._bucket_exists(bucket_name): click.echo( f'Storage {key}: bucket {bucket_name} already exists' ) continue storage_instance._create_bucket(bucket_name) click.echo(f'Storage {key}: bucket {bucket_name} created') elif storage: click.echo(f'Storage {key} is not an s3 storage') sys.exit(1)
def _process(self): data = {} for key in config.STORAGE_BACKENDS: storage = get_storage(key) if not isinstance(storage, S3StorageBase): continue readonly = isinstance(storage, ReadOnlyStorageMixin) data[key] = {'readonly': readonly, 'meta': storage.meta} if isinstance(storage, S3Storage): data[key].update(self._get_static_info(storage)) elif isinstance(storage, DynamicS3Storage): data[key].update(self._get_dynamic_info(key, storage)) return jsonify(data)
def _process_file(self, data): """Copy a file from storage into the export archive.""" if data.get('storage_file_id') is None: return assert '__file__' not in data # only one file per row allowed storage_backend = data.pop('storage_backend') storage_file_id = data.pop('storage_file_id') filename = data.pop('filename') content_type = data.pop('content_type') size = data.pop('size') md5 = data.pop('md5') uuid = self._get_uuid() with get_storage(storage_backend).open(storage_file_id) as f: self._add_file(uuid, size, f) data['__file__'] = ('file', {'uuid': uuid, 'filename': filename, 'content_type': content_type, 'size': size, 'md5': md5})
def _process_file(self, id_, data): storage_backend = config.ATTACHMENT_STORAGE storage = get_storage(storage_backend) extracted = self.archive.extractfile(data['uuid']) path = self._get_file_storage_path(id_, data['filename']) storage_file_id, md5 = storage.save(path, data['content_type'], data['filename'], extracted) assert data['size'] == storage.getsize(storage_file_id) if data['md5']: assert data['md5'] == md5 return { 'storage_backend': storage_backend, 'storage_file_id': storage_file_id, 'content_type': data['content_type'], 'filename': data['filename'], 'size': data['size'], 'md5': md5 }
def create_bucket(): for key in config.STORAGE_BACKENDS: storage = get_storage(key) if not isinstance(storage, DynamicS3Storage) or isinstance(storage, ReadOnlyStorageMixin): continue today = date.today() placeholders = set(re.findall('<.*?>', storage.bucket_name_template)) if not placeholders: continue elif placeholders == {'<year>', '<week>'}: bucket_date = today + relativedelta(weeks=1) bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) elif placeholders == {'<year>', '<month>'} or placeholders == {'<year>'}: if '<month>' in placeholders or today.month == 12: bucket_date = today + relativedelta(months=1) bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) else: raise RuntimeError('Invalid placeholder combination in bucket name template: {}' .format(storage.bucket_name_template))
def downgrade(): if context.is_offline_mode(): raise Exception('This downgrade is only possible in online mode') op.add_column('static_sites', sa.Column('path', sa.String(), nullable=True), schema='events') conn = op.get_bind() res = conn.execute(""" SELECT id, storage_backend, storage_file_id FROM events.static_sites WHERE storage_backend IS NOT NULL """) for row in res: with get_storage(row.storage_backend).get_local_path( row.storage_file_id) as path: conn.execute('UPDATE events.static_sites SET path=%s WHERE id=%s', (path, row.id)) op.drop_column('static_sites', 'storage_file_id', schema='events') op.drop_column('static_sites', 'storage_backend', schema='events') op.drop_column('static_sites', 'size', schema='events') op.drop_column('static_sites', 'filename', schema='events') op.drop_column('static_sites', 'content_type', schema='events')
def create_bucket(): for key in config.STORAGE_BACKENDS: storage = get_storage(key) if not isinstance(storage, DynamicS3Storage) or isinstance( storage, ReadOnlyStorageMixin): continue today = date.today() placeholders = set(re.findall('<.*?>', storage.bucket_name_template)) if not placeholders: continue elif placeholders == {'<year>', '<week>'}: bucket_date = today + relativedelta(weeks=1) bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) elif placeholders == {'<year>', '<month>' } or placeholders == {'<year>'}: if '<month>' in placeholders or today.month == 12: bucket_date = today + relativedelta(months=1) bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) else: raise RuntimeError( 'Invalid placeholder combination in bucket name template: {}'. format(storage.bucket_name_template))
def storage(self): """The Storage object used to store the file.""" if self.storage_backend is None: raise RuntimeError('No storage backend set') return get_storage(self.storage_backend)
def copy(source_backend_names, bucket_names, static_bucket_name, s3_endpoint, s3_profile, s3_bucket_policy_file, rclone, output): """Copy files to S3. This command copies files to S3 and records the necessary database changes in a JSONL file. Multiple bucket names can be specified; in that case the bucket name can change based on the year a file was created in. The last bucket name will be the default, while any other bucket name must include a conditional indicating when to use it: \b -B '<2001:indico-pre-2001' -B '<2009:indico-<year>' -B 'indico-<year>-<month>' The static bucket name cannot contain any placeholders. The indico storage backend will get the same name as the bucket by default, but this can be overridden, e.g. `-B 'indico-<year>/s3-<year>'` would name the bucket 'indico-2018' but use a backend named 's3-2018'. It is your responsibility to ensure that placeholders match between the two names. S3 credentials should be specified in the usual places, i.e. `~/.aws/credentials` for regular S3 access and `~/.config/rclone/rclone.conf` when using rclone. """ bucket_names = [ tuple(x.split('/', 1)) if '/' in x else (x, x.split(':', 1)[-1]) for x in bucket_names ] if ':' in bucket_names[-1][0]: raise click.UsageError('Last bucket name cannot contain criteria') if not all(':' in x[0] for x in bucket_names[:-1]): raise click.UsageError( 'All but the last bucket name need to contain criteria') matches = [(re.match(r'^(<|>|==|<=|>=)\s*(\d{4}):(.+)$', name), backend) for name, backend in bucket_names[:-1]] if not all(x[0] for x in matches): raise click.UsageError("Could not parse '{}'".format( bucket_names[matches.index(None)])) criteria = [(match.groups(), backend) for match, backend in matches] # Build and compile a function to get the bucket/backend name to avoid # processing the criteria for every single file (can be millions for large # instances) code = ['def get_bucket_name(dt):'] if criteria: for i, ((op, value, bucket), backend) in enumerate(criteria): code.append(' {}if dt.year {} {}:'.format( 'el' if i else '', op, value)) code.append(' bucket, backend = {!r}'.format( (bucket, backend))) code.append(' else:') code.append(' bucket, backend = {!r}'.format(bucket_names[-1])) else: code.append(' bucket, backend = {!r}'.format(bucket_names[-1])) code.append(' bucket = bucket.replace("<year>", dt.strftime("%Y"))') code.append(' bucket = bucket.replace("<month>", dt.strftime("%m"))') code.append(' bucket = bucket.replace("<week>", dt.strftime("%W"))') code.append(' backend = backend.replace("<year>", dt.strftime("%Y"))') code.append(' backend = backend.replace("<month>", dt.strftime("%m"))') code.append(' backend = backend.replace("<week>", dt.strftime("%W"))') code.append(' return bucket, backend') d = {} exec '\n'.join(code) in d if not source_backend_names: source_backend_names = [ x for x in config.STORAGE_BACKENDS if not isinstance(get_storage(x), S3StorageBase) ] if rclone: invalid = [ x for x in source_backend_names if not isinstance(get_storage(x), FileSystemStorage) ] if invalid: click.secho('Found unsupported storage backends: {}'.format( ', '.join(sorted(invalid))), fg='yellow') click.secho('The backends might not work together with `--rclone`', fg='yellow') click.confirm('Continue anyway?', abort=True) s3_bucket_policy = s3_bucket_policy_file.read( ) if s3_bucket_policy_file else None imp = S3Importer(d['get_bucket_name'], static_bucket_name, output, source_backend_names, rclone, s3_endpoint, s3_profile, s3_bucket_policy) with monkeypatch_registration_file_time(): imp.run()
def copy(source_backend_names, bucket_names, static_bucket_name, s3_endpoint, s3_profile, s3_bucket_policy_file, rclone, output): """Copy files to S3. This command copies files to S3 and records the necessary database changes in a JSONL file. Multiple bucket names can be specified; in that case the bucket name can change based on the year a file was created in. The last bucket name will be the default, while any other bucket name must include a conditional indicating when to use it: \b -B '<2001:indico-pre-2001' -B '<2009:indico-<year>' -B 'indico-<year>-<month>' The static bucket name cannot contain any placeholders. The indico storage backend will get the same name as the bucket by default, but this can be overridden, e.g. `-B 'indico-<year>/s3-<year>'` would name the bucket 'indico-2018' but use a backend named 's3-2018'. It is your responsibility to ensure that placeholders match between the two names. S3 credentials should be specified in the usual places, i.e. `~/.aws/credentials` for regular S3 access and `~/.config/rclone/rclone.conf` when using rclone. """ bucket_names = [tuple(x.split('/', 1)) if '/' in x else (x, x.split(':', 1)[-1]) for x in bucket_names] if ':' in bucket_names[-1][0]: raise click.UsageError('Last bucket name cannot contain criteria') if not all(':' in x[0] for x in bucket_names[:-1]): raise click.UsageError('All but the last bucket name need to contain criteria') matches = [(re.match(r'^(<|>|==|<=|>=)\s*(\d{4}):(.+)$', name), backend) for name, backend in bucket_names[:-1]] if not all(x[0] for x in matches): raise click.UsageError("Could not parse '{}'".format(bucket_names[matches.index(None)])) criteria = [(match.groups(), backend) for match, backend in matches] # Build and compile a function to get the bucket/backend name to avoid # processing the criteria for every single file (can be millions for large # instances) code = ['def get_bucket_name(dt):'] if criteria: for i, ((op, value, bucket), backend) in enumerate(criteria): code.append(' {}if dt.year {} {}:'.format('el' if i else '', op, value)) code.append(' bucket, backend = {!r}'.format((bucket, backend))) code.append(' else:') code.append(' bucket, backend = {!r}'.format(bucket_names[-1])) else: code.append(' bucket, backend = {!r}'.format(bucket_names[-1])) code.append(' bucket = bucket.replace("<year>", dt.strftime("%Y"))') code.append(' bucket = bucket.replace("<month>", dt.strftime("%m"))') code.append(' bucket = bucket.replace("<week>", dt.strftime("%W"))') code.append(' backend = backend.replace("<year>", dt.strftime("%Y"))') code.append(' backend = backend.replace("<month>", dt.strftime("%m"))') code.append(' backend = backend.replace("<week>", dt.strftime("%W"))') code.append(' return bucket, backend') d = {} exec '\n'.join(code) in d if not source_backend_names: source_backend_names = [x for x in config.STORAGE_BACKENDS if not isinstance(get_storage(x), S3StorageBase)] if rclone: invalid = [x for x in source_backend_names if not isinstance(get_storage(x), FileSystemStorage)] if invalid: click.secho('Found unsupported storage backends: {}'.format(', '.join(sorted(invalid))), fg='yellow') click.secho('The backends might not work together with `--rclone`', fg='yellow') click.confirm('Continue anyway?', abort=True) s3_bucket_policy = s3_bucket_policy_file.read() if s3_bucket_policy_file else None imp = S3Importer(d['get_bucket_name'], static_bucket_name, output, source_backend_names, rclone, s3_endpoint, s3_profile, s3_bucket_policy) with monkeypatch_registration_file_time(): imp.run()