def main(argv, session): args = docopt(__doc__, argv=argv) # Validation error messages. invalid_id_msg = ('<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, underscores ( _ ), or ' 'dashes ( - )') # Validate args. s = Schema({ six.text_type: Use(lambda x: bool(x)), '<file>': And(list, Use( lambda x: convert_str_list_to_unicode(x) if six.PY2 else x)), '--format': list, '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--glob': list, 'delete': bool, '--retries': Use(lambda i: int(i[0])), '<identifier>': str, }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) verbose = True if not args['--quiet'] else False item = session.get_item(args['<identifier>']) if not item.exists: print('{0}: skipping, item does\'t exist.') # Files that cannot be deleted via S3. no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite'] # Add keep-old-version by default. if 'x-archive-keep-old-version' not in args['--header']: args['--header']['x-archive-keep-old-version'] = '1' if verbose: sys.stdout.write('Deleting files from {0}\n'.format(item.identifier)) if args['--all']: files = [f for f in item.get_files()] args['--cacade'] = True elif args['--glob']: files = item.get_files(glob_pattern=args['--glob']) elif args['--format']: files = item.get_files(formats=args['--format']) else: fnames = [] if args['<file>'] == ['-']: if six.PY2: fnames = convert_str_list_to_unicode([f.strip() for f in sys.stdin]) else: fnames = [f.strip() for f in sys.stdin] else: fnames = [f.strip() for f in args['<file>']] files = list(item.get_files(fnames)) if not files: sys.stderr.write(' warning: no files found, nothing deleted.\n') sys.exit(1) errors = False for f in files: if not f: if verbose: sys.stderr.write(' error: "{0}" does not exist\n'.format(f.name)) errors = True if any(f.name.endswith(s) for s in no_delete): continue if args['--dry-run']: sys.stdout.write(' will delete: {0}/{1}\n'.format(item.identifier, f.name.encode('utf-8'))) continue try: resp = f.delete(verbose=verbose, cascade_delete=args['--cascade'], headers=args['--headers'], retries=args['--retries']) except requests.exceptions.RetryError as e: print(' error: max retries exceeded for {0}'.format(f.name), file=sys.stderr) errors = True continue if resp.status_code != 204: errors = True msg = get_s3_xml_text(resp.content) print(' error: {0} ({1})'.format(msg, resp.status_code), file=sys.stderr) continue if errors is True: sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) # Validation error messages. invalid_id_msg = ('<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, underscores ( _ ), or ' 'dashes ( - )') # Validate args. s = Schema({ six.text_type: Use(lambda x: bool(x)), '<file>': And(list, Use( lambda x: convert_str_list_to_unicode(x) if six.PY2 else x)), '--format': list, '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--glob': list, 'delete': bool, '--retries': Use(lambda i: int(i[0])), '<identifier>': str, }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) verbose = True if not args['--quiet'] else False item = session.get_item(args['<identifier>']) if not item.exists: print('{0}: skipping, item does\'t exist.') # Files that cannot be deleted via S3. no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite'] # Add keep-old-version by default. if 'x-archive-keep-old-version' not in args['--header']: args['--header']['x-archive-keep-old-version'] = '1' if verbose: sys.stdout.write('Deleting files from {0}\n'.format(item.identifier)) if args['--all']: files = [f for f in item.get_files()] args['--cacade'] = True elif args['--glob']: files = item.get_files(glob_pattern=args['--glob']) elif args['--format']: files = item.get_files(formats=args['--format']) else: fnames = [] if args['<file>'] == ['-']: if six.PY2: fnames = convert_str_list_to_unicode([f.strip() for f in sys.stdin]) else: fnames = [f.strip() for f in sys.stdin] else: fnames = [f.strip() for f in args['<file>']] files = list(item.get_files(fnames)) if not files: sys.stderr.write(' warning: no files found, nothing deleted.\n') sys.exit(1) errors = False for f in files: if not f: if verbose: sys.stderr.write(' error: "{0}" does not exist\n'.format(f.name)) errors = True if any(f.name.endswith(s) for s in no_delete): continue if args['--dry-run']: sys.stdout.write(' will delete: {0}/{1}\n'.format(item.identifier, f.name.encode('utf-8'))) continue try: resp = f.delete(verbose=verbose, cascade_delete=args['--cascade'], headers=args['--header'], retries=args['--retries']) except requests.exceptions.RetryError as e: print(' error: max retries exceeded for {0}'.format(f.name), file=sys.stderr) errors = True continue if resp.status_code != 204: errors = True msg = get_s3_xml_text(resp.content) print(' error: {0} ({1})'.format(msg, resp.status_code), file=sys.stderr) continue if errors is True: sys.exit(1)
def main(argv, session): if six.PY2: args = docopt(__doc__.encode('utf-8'), argv=argv) else: args = docopt(__doc__, argv=argv) ERRORS = False # Validate args. s = Schema({ str: Use(bool), '<identifier>': Or( None, And(str, validate_s3_identifier, error= ('<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, periods ".", ' 'underscores "_", or dashes "-". However, <identifier> cannot begin ' 'with periods, underscores, or dashes.'))), '<file>': And( Use(lambda l: l if not six.PY2 else convert_str_list_to_unicode(l)), And(lambda f: all(os.path.exists(x) for x in f if x != '-'), error='<file> should be a readable file or directory.'), And(lambda f: False if f == ['-'] and not args['--remote-name'] else True, error= '--remote-name must be provided when uploading from stdin.')), '--remote-name': Or( None, Use(lambda x: x.decode(sys.getfilesystemencoding()) if six.PY2 else x)), '--spreadsheet': Or(None, os.path.isfile, error='--spreadsheet should be a readable file.'), '--file-metadata': Or(None, os.path.isfile, error='--file-metadata should be a readable file.'), '--metadata': Or(None, And(Use(get_args_dict), dict), error='--metadata must be formatted as --metadata="key:value"'), '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--retries': Use(lambda x: int(x[0]) if x else 0), '--sleep': Use(lambda l: int(l[0]), error='--sleep value must be an integer.'), '--size-hint': Or(Use(lambda l: str(l[0]) if l else None), int, None, error='--size-hint value must be an integer.'), '--status-check': bool, }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Make sure the collection being uploaded to exists. collection_id = args['--metadata'].get('collection') if collection_id and not args['--no-collection-check'] and not args[ '--status-check']: if isinstance(collection_id, list): collection_id = collection_id[0] collection = session.get_item(collection_id) if not collection.exists: print('You must upload to a collection that exists. ' '"{0}" does not exist.\n{1}'.format( collection_id, printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Status check. if args['--status-check']: if session.s3_is_overloaded(): print('warning: {0} is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.'.format(args['<identifier>']), file=sys.stderr) sys.exit(1) else: print('success: {0} is accepting requests.'.format( args['<identifier>'])) sys.exit() elif args['<identifier>']: item = session.get_item(args['<identifier>']) # Upload keyword arguments. if args['--size-hint']: args['--header']['x-archive-size-hint'] = args['--size-hint'] # Upload with backups turned on by default. if not args['--header'].get('x-archive-keep-old-version'): args['--header']['x-archive-keep-old-version'] = '1' queue_derive = True if args['--no-derive'] is False else False verbose = True if args['--quiet'] is False else False if args['--file-metadata']: try: args['<file>'] = json.load(open(args['--file-metadata'])) except json.decoder.JSONDecodeError: args['<file>'] = list() for line in open(args['--file-metadata']): j = json.loads(line.strip()) args['<file>'].append(j) upload_kwargs = dict(metadata=args['--metadata'], headers=args['--header'], debug=args['--debug'], queue_derive=queue_derive, verbose=verbose, verify=args['--verify'], checksum=args['--checksum'], retries=args['--retries'], retries_sleep=args['--sleep'], delete=args['--delete'], validate_identifier=True) # Upload files. if not args['--spreadsheet']: if args['-']: local_file = TemporaryFile() local_file.write(sys.stdin.read()) local_file.seek(0) else: local_file = args['<file>'] if isinstance(local_file, (list, tuple, set)) and args['--remote-name']: local_file = local_file[0] if args['--remote-name']: files = {args['--remote-name']: local_file} else: files = local_file for _r in _upload_files(item, files, upload_kwargs): if args['--debug']: break if (not _r.status_code) or (not _r.ok): ERRORS = True else: if args['--open-after-upload']: webbrowser.open_new_tab('{}//{}/details/{}'.format( session.protocol, session.host, item.identifier)) # Bulk upload using spreadsheet. else: # Use the same session for each upload request. with io.open(args['--spreadsheet'], 'rU', newline='', encoding='utf-8') as csvfp: spreadsheet = csv.DictReader(csvfp) prev_identifier = None for row in spreadsheet: upload_kwargs_copy = deepcopy(upload_kwargs) local_file = row['file'] identifier = row.get('item', row.get('identifier')) if not identifier: print('error: no identifier column on spreadsheet!') sys.exit(1) del row['file'] if 'identifier' in row: del row['identifier'] elif 'item' in row: del row['item'] if (not identifier) and (prev_identifier): identifier = prev_identifier item = session.get_item(identifier) # TODO: Clean up how indexed metadata items are coerced # into metadata. md_args = [ '{0}:{1}'.format(k.lower(), v) for (k, v) in row.items() if v ] metadata = get_args_dict(md_args) upload_kwargs_copy['metadata'].update(metadata) r = _upload_files(item, local_file, upload_kwargs_copy, prev_identifier, session) for _r in r: if args['--debug']: break if (not _r) or (not _r.ok): ERRORS = True else: if args['--open-after-upload']: webbrowser.open_new_tab('{}//{}/details/{}'.format( session.protocol, session.host, identifier)) prev_identifier = identifier if ERRORS: sys.exit(1)
def main(argv, session): if six.PY2: args = docopt(__doc__.encode('utf-8'), argv=argv) else: args = docopt(__doc__, argv=argv) ERRORS = False # Validate args. s = Schema({ str: Use(bool), '<identifier>': Or( None, And(str, validate_ia_identifier, error= ('<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, periods ".", ' 'underscores "_", or dashes "-". However, <identifier> cannot begin ' 'with periods, underscores, or dashes.'))), '<file>': And( Use(lambda l: l if not six.PY2 else convert_str_list_to_unicode(l)), And(lambda f: all(os.path.exists(x) for x in f if x != '-'), error='<file> should be a readable file or directory.'), And(lambda f: False if f == ['-'] and not args['--remote-name'] else True, error= '--remote-name must be provided when uploading from stdin.')), '--remote-name': Or( None, Use(lambda x: x.decode(sys.getfilesystemencoding()) if six.PY2 else x)), '--spreadsheet': Or(None, os.path.isfile, error='--spreadsheet should be a readable file.'), '--metadata': Or(None, And(Use(get_args_dict), dict), error='--metadata must be formatted as --metadata="key:value"'), '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--retries': Use(lambda x: int(x[0]) if x else 0), '--sleep': Use(lambda l: int(l[0]), error='--sleep value must be an integer.'), '--size-hint': Or(Use(lambda l: int(l[0]) if l else None), int, None, error='--size-hint value must be an integer.'), '--status-check': bool, }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Status check. if args['--status-check']: if session.s3_is_overloaded(): print('warning: {0} is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.'.format(args['<identifier>']), file=sys.stderr) sys.exit(1) else: print('success: {0} is accepting requests.'.format( args['<identifier>'])) sys.exit() elif args['<identifier>']: item = session.get_item(args['<identifier>']) # Upload keyword arguments. if args['--size-hint']: args['--header']['x-archive-size-hint'] = args['--size-hint'] queue_derive = True if args['--no-derive'] is False else False verbose = True if args['--quiet'] is False else False upload_kwargs = dict( metadata=args['--metadata'], headers=args['--header'], debug=args['--debug'], queue_derive=queue_derive, verbose=verbose, verify=args['--verify'], checksum=args['--checksum'], retries=args['--retries'], retries_sleep=args['--sleep'], delete=args['--delete'], ) # Upload files. if not args['--spreadsheet']: if args['-']: local_file = TemporaryFile() local_file.write(sys.stdin.read()) local_file.seek(0) else: local_file = args['<file>'] if isinstance(local_file, (list, tuple, set)) and args['--remote-name']: local_file = local_file[0] if args['--remote-name']: files = {args['--remote-name']: local_file} else: files = local_file for _r in _upload_files(item, files, upload_kwargs): if args['--debug']: break if (not _r) or (not _r.ok): ERRORS = True # Bulk upload using spreadsheet. else: # Use the same session for each upload request. session = ArchiveSession() spreadsheet = csv.DictReader(open(args['--spreadsheet'], 'rU')) prev_identifier = None for row in spreadsheet: local_file = row['file'] identifier = row['identifier'] del row['file'] del row['identifier'] if (not identifier) and (prev_identifier): identifier = prev_identifier item = session.get_item(identifier) # TODO: Clean up how indexed metadata items are coerced # into metadata. md_args = [ '{0}:{1}'.format(k.lower(), v) for (k, v) in row.items() if v ] metadata = get_args_dict(md_args) upload_kwargs['metadata'].update(metadata) r = _upload_files(item, local_file, upload_kwargs, prev_identifier, session) for _r in r: if args['--debug']: break if (not _r) or (not _r.ok): ERRORS = True prev_identifier = identifier if ERRORS: sys.exit(1)
def main(argv, session): if six.PY2: args = docopt(__doc__.encode('utf-8'), argv=argv) else: args = docopt(__doc__, argv=argv) ERRORS = False # Validate args. s = Schema({ str: Use(bool), '<identifier>': Or(None, And(str, validate_ia_identifier, error=('<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, underscores ( _ ), or ' 'dashes ( - )'))), '<file>': And( Use(lambda l: l if not six.PY2 else convert_str_list_to_unicode(l)), And(lambda f: all(os.path.exists(x) for x in f if x != '-'), error='<file> should be a readable file or directory.'), And(lambda f: False if f == ['-'] and not args['--remote-name'] else True, error='--remote-name must be provided when uploading from stdin.')), '--remote-name': Or(None, Use(lambda x: x.decode(sys.getfilesystemencoding()) if six.PY2 else x)), '--spreadsheet': Or(None, os.path.isfile, error='--spreadsheet should be a readable file.'), '--metadata': Or(None, And(Use(get_args_dict), dict), error='--metadata must be formatted as --metadata="key:value"'), '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--retries': Use(lambda x: int(x[0]) if x else 0), '--sleep': Use(lambda l: int(l[0]), error='--sleep value must be an integer.'), '--size-hint': Or(Use(lambda l: int(l[0]) if l else None), int, None, error='--size-hint value must be an integer.'), '--status-check': bool, }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Status check. if args['--status-check']: if session.s3_is_overloaded(): print('warning: {0} is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.'.format(args['<identifier>']), file=sys.stderr) sys.exit(1) else: print('success: {0} is accepting requests.'.format(args['<identifier>'])) sys.exit() elif args['<identifier>']: item = session.get_item(args['<identifier>']) # Upload keyword arguments. if args['--size-hint']: args['--header']['x-archive-size-hint'] = args['--size-hint'] queue_derive = True if args['--no-derive'] is False else False verbose = True if args['--quiet'] is False else False upload_kwargs = dict( metadata=args['--metadata'], headers=args['--header'], debug=args['--debug'], queue_derive=queue_derive, verbose=verbose, verify=args['--verify'], checksum=args['--checksum'], retries=args['--retries'], retries_sleep=args['--sleep'], delete=args['--delete'], ) # Upload files. if not args['--spreadsheet']: if args['-']: local_file = TemporaryFile() local_file.write(sys.stdin.read()) local_file.seek(0) else: local_file = args['<file>'] if isinstance(local_file, (list, tuple, set)) and args['--remote-name']: local_file = local_file[0] if args['--remote-name']: files = {args['--remote-name']: local_file} else: files = local_file for _r in _upload_files(item, files, upload_kwargs): if args['--debug']: break if (not _r) or (not _r.ok): ERRORS = True # Bulk upload using spreadsheet. else: # Use the same session for each upload request. session = ArchiveSession() spreadsheet = csv.DictReader(open(args['--spreadsheet'], 'rU')) prev_identifier = None for row in spreadsheet: local_file = row['file'] identifier = row['identifier'] del row['file'] del row['identifier'] if (not identifier) and (prev_identifier): identifier = prev_identifier item = session.get_item(identifier) # TODO: Clean up how indexed metadata items are coerced # into metadata. md_args = ['{0}:{1}'.format(k.lower(), v) for (k, v) in row.items() if v] metadata = get_args_dict(md_args) upload_kwargs['metadata'].update(metadata) r = _upload_files(item, local_file, upload_kwargs, prev_identifier, session) for _r in r: if args['--debug']: break if (not _r) or (not _r.ok): ERRORS = True prev_identifier = identifier if ERRORS: sys.exit(1)