Exemplo n.º 1
0
def main(argv, session):
    args = docopt(__doc__, argv=argv)

    # Validation error messages.
    invalid_id_msg = ('<identifier> should be between 3 and 80 characters in length, and '
                      'can only contain alphanumeric characters, underscores ( _ ), or '
                      'dashes ( - )')

    # Validate args.
    s = Schema({
        six.text_type: Use(lambda x: bool(x)),
        '<file>': And(list, Use(
            lambda x: convert_str_list_to_unicode(x) if six.PY2 else x)),
        '--format': list,
        '--header': Or(None, And(Use(get_args_dict), dict),
                       error='--header must be formatted as --header="key:value"'),
        '--glob': list,
        'delete': bool,
        '--retries': Use(lambda i: int(i[0])),
        '<identifier>': str,
    })
    try:
        args = s.validate(args)
    except SchemaError as exc:
        print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr)
        sys.exit(1)

    verbose = True if not args['--quiet'] else False
    item = session.get_item(args['<identifier>'])
    if not item.exists:
        print('{0}: skipping, item does\'t exist.')

    # Files that cannot be deleted via S3.
    no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite']

    # Add keep-old-version by default.
    if 'x-archive-keep-old-version' not in args['--header']:
        args['--header']['x-archive-keep-old-version'] = '1'

    if verbose:
        sys.stdout.write('Deleting files from {0}\n'.format(item.identifier))

    if args['--all']:
        files = [f for f in item.get_files()]
        args['--cacade'] = True
    elif args['--glob']:
        files = item.get_files(glob_pattern=args['--glob'])
    elif args['--format']:
        files = item.get_files(formats=args['--format'])
    else:
        fnames = []
        if args['<file>'] == ['-']:
            if six.PY2:
                fnames = convert_str_list_to_unicode([f.strip() for f in sys.stdin])
            else:
                fnames = [f.strip() for f in sys.stdin]
        else:
            fnames = [f.strip() for f in args['<file>']]

        files = list(item.get_files(fnames))

    if not files:
        sys.stderr.write(' warning: no files found, nothing deleted.\n')
        sys.exit(1)

    errors = False

    for f in files:
        if not f:
            if verbose:
                sys.stderr.write(' error: "{0}" does not exist\n'.format(f.name))
            errors = True
        if any(f.name.endswith(s) for s in no_delete):
            continue
        if args['--dry-run']:
            sys.stdout.write(' will delete: {0}/{1}\n'.format(item.identifier,
                                                              f.name.encode('utf-8')))
            continue
        try:
            resp = f.delete(verbose=verbose,
                            cascade_delete=args['--cascade'],
                            headers=args['--headers'],
                            retries=args['--retries'])
        except requests.exceptions.RetryError as e:
            print(' error: max retries exceeded for {0}'.format(f.name), file=sys.stderr)
            errors = True
            continue

        if resp.status_code != 204:
            errors = True
            msg = get_s3_xml_text(resp.content)
            print(' error: {0} ({1})'.format(msg, resp.status_code), file=sys.stderr)
            continue

    if errors is True:
        sys.exit(1)
Exemplo n.º 2
0
def main(argv, session):
    args = docopt(__doc__, argv=argv)

    # Validation error messages.
    invalid_id_msg = ('<identifier> should be between 3 and 80 characters in length, and '
                      'can only contain alphanumeric characters, underscores ( _ ), or '
                      'dashes ( - )')

    # Validate args.
    s = Schema({
        six.text_type: Use(lambda x: bool(x)),
        '<file>': And(list, Use(
            lambda x: convert_str_list_to_unicode(x) if six.PY2 else x)),
        '--format': list,
        '--header': Or(None, And(Use(get_args_dict), dict),
                       error='--header must be formatted as --header="key:value"'),
        '--glob': list,
        'delete': bool,
        '--retries': Use(lambda i: int(i[0])),
        '<identifier>': str,
    })
    try:
        args = s.validate(args)
    except SchemaError as exc:
        print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr)
        sys.exit(1)

    verbose = True if not args['--quiet'] else False
    item = session.get_item(args['<identifier>'])
    if not item.exists:
        print('{0}: skipping, item does\'t exist.')

    # Files that cannot be deleted via S3.
    no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite']

    # Add keep-old-version by default.
    if 'x-archive-keep-old-version' not in args['--header']:
        args['--header']['x-archive-keep-old-version'] = '1'

    if verbose:
        sys.stdout.write('Deleting files from {0}\n'.format(item.identifier))

    if args['--all']:
        files = [f for f in item.get_files()]
        args['--cacade'] = True
    elif args['--glob']:
        files = item.get_files(glob_pattern=args['--glob'])
    elif args['--format']:
        files = item.get_files(formats=args['--format'])
    else:
        fnames = []
        if args['<file>'] == ['-']:
            if six.PY2:
                fnames = convert_str_list_to_unicode([f.strip() for f in sys.stdin])
            else:
                fnames = [f.strip() for f in sys.stdin]
        else:
            fnames = [f.strip() for f in args['<file>']]

        files = list(item.get_files(fnames))

    if not files:
        sys.stderr.write(' warning: no files found, nothing deleted.\n')
        sys.exit(1)

    errors = False

    for f in files:
        if not f:
            if verbose:
                sys.stderr.write(' error: "{0}" does not exist\n'.format(f.name))
            errors = True
        if any(f.name.endswith(s) for s in no_delete):
            continue
        if args['--dry-run']:
            sys.stdout.write(' will delete: {0}/{1}\n'.format(item.identifier,
                                                              f.name.encode('utf-8')))
            continue
        try:
            resp = f.delete(verbose=verbose,
                            cascade_delete=args['--cascade'],
                            headers=args['--header'],
                            retries=args['--retries'])
        except requests.exceptions.RetryError as e:
            print(' error: max retries exceeded for {0}'.format(f.name), file=sys.stderr)
            errors = True
            continue

        if resp.status_code != 204:
            errors = True
            msg = get_s3_xml_text(resp.content)
            print(' error: {0} ({1})'.format(msg, resp.status_code), file=sys.stderr)
            continue

    if errors is True:
        sys.exit(1)
Exemplo n.º 3
0
def main(argv, session):
    if six.PY2:
        args = docopt(__doc__.encode('utf-8'), argv=argv)
    else:
        args = docopt(__doc__, argv=argv)
    ERRORS = False

    # Validate args.
    s = Schema({
        str:
        Use(bool),
        '<identifier>':
        Or(
            None,
            And(str,
                validate_s3_identifier,
                error=
                ('<identifier> should be between 3 and 80 characters in length, and '
                 'can only contain alphanumeric characters, periods ".", '
                 'underscores "_", or dashes "-". However, <identifier> cannot begin '
                 'with periods, underscores, or dashes.'))),
        '<file>':
        And(
            Use(lambda l: l
                if not six.PY2 else convert_str_list_to_unicode(l)),
            And(lambda f: all(os.path.exists(x) for x in f if x != '-'),
                error='<file> should be a readable file or directory.'),
            And(lambda f: False
                if f == ['-'] and not args['--remote-name'] else True,
                error=
                '--remote-name must be provided when uploading from stdin.')),
        '--remote-name':
        Or(
            None,
            Use(lambda x: x.decode(sys.getfilesystemencoding())
                if six.PY2 else x)),
        '--spreadsheet':
        Or(None,
           os.path.isfile,
           error='--spreadsheet should be a readable file.'),
        '--file-metadata':
        Or(None,
           os.path.isfile,
           error='--file-metadata should be a readable file.'),
        '--metadata':
        Or(None,
           And(Use(get_args_dict), dict),
           error='--metadata must be formatted as --metadata="key:value"'),
        '--header':
        Or(None,
           And(Use(get_args_dict), dict),
           error='--header must be formatted as --header="key:value"'),
        '--retries':
        Use(lambda x: int(x[0]) if x else 0),
        '--sleep':
        Use(lambda l: int(l[0]), error='--sleep value must be an integer.'),
        '--size-hint':
        Or(Use(lambda l: str(l[0]) if l else None),
           int,
           None,
           error='--size-hint value must be an integer.'),
        '--status-check':
        bool,
    })
    try:
        args = s.validate(args)
    except SchemaError as exc:
        print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)),
              file=sys.stderr)
        sys.exit(1)

    # Make sure the collection being uploaded to exists.
    collection_id = args['--metadata'].get('collection')
    if collection_id and not args['--no-collection-check'] and not args[
            '--status-check']:
        if isinstance(collection_id, list):
            collection_id = collection_id[0]
        collection = session.get_item(collection_id)
        if not collection.exists:
            print('You must upload to a collection that exists. '
                  '"{0}" does not exist.\n{1}'.format(
                      collection_id, printable_usage(__doc__)),
                  file=sys.stderr)
            sys.exit(1)

    # Status check.
    if args['--status-check']:
        if session.s3_is_overloaded():
            print('warning: {0} is over limit, and not accepting requests. '
                  'Expect 503 SlowDown errors.'.format(args['<identifier>']),
                  file=sys.stderr)
            sys.exit(1)
        else:
            print('success: {0} is accepting requests.'.format(
                args['<identifier>']))
            sys.exit()

    elif args['<identifier>']:
        item = session.get_item(args['<identifier>'])

    # Upload keyword arguments.
    if args['--size-hint']:
        args['--header']['x-archive-size-hint'] = args['--size-hint']
    # Upload with backups turned on by default.
    if not args['--header'].get('x-archive-keep-old-version'):
        args['--header']['x-archive-keep-old-version'] = '1'

    queue_derive = True if args['--no-derive'] is False else False
    verbose = True if args['--quiet'] is False else False

    if args['--file-metadata']:
        try:
            args['<file>'] = json.load(open(args['--file-metadata']))
        except json.decoder.JSONDecodeError:
            args['<file>'] = list()
            for line in open(args['--file-metadata']):
                j = json.loads(line.strip())
                args['<file>'].append(j)
    upload_kwargs = dict(metadata=args['--metadata'],
                         headers=args['--header'],
                         debug=args['--debug'],
                         queue_derive=queue_derive,
                         verbose=verbose,
                         verify=args['--verify'],
                         checksum=args['--checksum'],
                         retries=args['--retries'],
                         retries_sleep=args['--sleep'],
                         delete=args['--delete'],
                         validate_identifier=True)

    # Upload files.
    if not args['--spreadsheet']:
        if args['-']:
            local_file = TemporaryFile()
            local_file.write(sys.stdin.read())
            local_file.seek(0)
        else:
            local_file = args['<file>']

        if isinstance(local_file,
                      (list, tuple, set)) and args['--remote-name']:
            local_file = local_file[0]
        if args['--remote-name']:
            files = {args['--remote-name']: local_file}
        else:
            files = local_file

        for _r in _upload_files(item, files, upload_kwargs):
            if args['--debug']:
                break
            if (not _r.status_code) or (not _r.ok):
                ERRORS = True
            else:
                if args['--open-after-upload']:
                    webbrowser.open_new_tab('{}//{}/details/{}'.format(
                        session.protocol, session.host, item.identifier))

    # Bulk upload using spreadsheet.
    else:
        # Use the same session for each upload request.
        with io.open(args['--spreadsheet'], 'rU', newline='',
                     encoding='utf-8') as csvfp:
            spreadsheet = csv.DictReader(csvfp)
            prev_identifier = None
            for row in spreadsheet:
                upload_kwargs_copy = deepcopy(upload_kwargs)
                local_file = row['file']
                identifier = row.get('item', row.get('identifier'))
                if not identifier:
                    print('error: no identifier column on spreadsheet!')
                    sys.exit(1)
                del row['file']
                if 'identifier' in row:
                    del row['identifier']
                elif 'item' in row:
                    del row['item']
                if (not identifier) and (prev_identifier):
                    identifier = prev_identifier
                item = session.get_item(identifier)
                # TODO: Clean up how indexed metadata items are coerced
                # into metadata.
                md_args = [
                    '{0}:{1}'.format(k.lower(), v) for (k, v) in row.items()
                    if v
                ]
                metadata = get_args_dict(md_args)
                upload_kwargs_copy['metadata'].update(metadata)
                r = _upload_files(item, local_file, upload_kwargs_copy,
                                  prev_identifier, session)
                for _r in r:
                    if args['--debug']:
                        break
                    if (not _r) or (not _r.ok):
                        ERRORS = True
                    else:
                        if args['--open-after-upload']:
                            webbrowser.open_new_tab('{}//{}/details/{}'.format(
                                session.protocol, session.host, identifier))
                prev_identifier = identifier

    if ERRORS:
        sys.exit(1)
Exemplo n.º 4
0
def main(argv, session):
    if six.PY2:
        args = docopt(__doc__.encode('utf-8'), argv=argv)
    else:
        args = docopt(__doc__, argv=argv)
    ERRORS = False

    # Validate args.
    s = Schema({
        str:
        Use(bool),
        '<identifier>':
        Or(
            None,
            And(str,
                validate_ia_identifier,
                error=
                ('<identifier> should be between 3 and 80 characters in length, and '
                 'can only contain alphanumeric characters, periods ".", '
                 'underscores "_", or dashes "-". However, <identifier> cannot begin '
                 'with periods, underscores, or dashes.'))),
        '<file>':
        And(
            Use(lambda l: l
                if not six.PY2 else convert_str_list_to_unicode(l)),
            And(lambda f: all(os.path.exists(x) for x in f if x != '-'),
                error='<file> should be a readable file or directory.'),
            And(lambda f: False
                if f == ['-'] and not args['--remote-name'] else True,
                error=
                '--remote-name must be provided when uploading from stdin.')),
        '--remote-name':
        Or(
            None,
            Use(lambda x: x.decode(sys.getfilesystemencoding())
                if six.PY2 else x)),
        '--spreadsheet':
        Or(None,
           os.path.isfile,
           error='--spreadsheet should be a readable file.'),
        '--metadata':
        Or(None,
           And(Use(get_args_dict), dict),
           error='--metadata must be formatted as --metadata="key:value"'),
        '--header':
        Or(None,
           And(Use(get_args_dict), dict),
           error='--header must be formatted as --header="key:value"'),
        '--retries':
        Use(lambda x: int(x[0]) if x else 0),
        '--sleep':
        Use(lambda l: int(l[0]), error='--sleep value must be an integer.'),
        '--size-hint':
        Or(Use(lambda l: int(l[0]) if l else None),
           int,
           None,
           error='--size-hint value must be an integer.'),
        '--status-check':
        bool,
    })
    try:
        args = s.validate(args)
    except SchemaError as exc:
        print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)),
              file=sys.stderr)
        sys.exit(1)

    # Status check.
    if args['--status-check']:
        if session.s3_is_overloaded():
            print('warning: {0} is over limit, and not accepting requests. '
                  'Expect 503 SlowDown errors.'.format(args['<identifier>']),
                  file=sys.stderr)
            sys.exit(1)
        else:
            print('success: {0} is accepting requests.'.format(
                args['<identifier>']))
            sys.exit()

    elif args['<identifier>']:
        item = session.get_item(args['<identifier>'])

    # Upload keyword arguments.
    if args['--size-hint']:
        args['--header']['x-archive-size-hint'] = args['--size-hint']

    queue_derive = True if args['--no-derive'] is False else False
    verbose = True if args['--quiet'] is False else False

    upload_kwargs = dict(
        metadata=args['--metadata'],
        headers=args['--header'],
        debug=args['--debug'],
        queue_derive=queue_derive,
        verbose=verbose,
        verify=args['--verify'],
        checksum=args['--checksum'],
        retries=args['--retries'],
        retries_sleep=args['--sleep'],
        delete=args['--delete'],
    )

    # Upload files.
    if not args['--spreadsheet']:
        if args['-']:
            local_file = TemporaryFile()
            local_file.write(sys.stdin.read())
            local_file.seek(0)
        else:
            local_file = args['<file>']

        if isinstance(local_file,
                      (list, tuple, set)) and args['--remote-name']:
            local_file = local_file[0]
        if args['--remote-name']:
            files = {args['--remote-name']: local_file}
        else:
            files = local_file

        for _r in _upload_files(item, files, upload_kwargs):
            if args['--debug']:
                break
            if (not _r) or (not _r.ok):
                ERRORS = True

    # Bulk upload using spreadsheet.
    else:
        # Use the same session for each upload request.
        session = ArchiveSession()
        spreadsheet = csv.DictReader(open(args['--spreadsheet'], 'rU'))
        prev_identifier = None
        for row in spreadsheet:
            local_file = row['file']
            identifier = row['identifier']
            del row['file']
            del row['identifier']
            if (not identifier) and (prev_identifier):
                identifier = prev_identifier
            item = session.get_item(identifier)
            # TODO: Clean up how indexed metadata items are coerced
            # into metadata.
            md_args = [
                '{0}:{1}'.format(k.lower(), v) for (k, v) in row.items() if v
            ]
            metadata = get_args_dict(md_args)
            upload_kwargs['metadata'].update(metadata)
            r = _upload_files(item, local_file, upload_kwargs, prev_identifier,
                              session)
            for _r in r:
                if args['--debug']:
                    break
                if (not _r) or (not _r.ok):
                    ERRORS = True
            prev_identifier = identifier

    if ERRORS:
        sys.exit(1)
Exemplo n.º 5
0
def main(argv, session):
    if six.PY2:
        args = docopt(__doc__.encode('utf-8'), argv=argv)
    else:
        args = docopt(__doc__, argv=argv)
    ERRORS = False

    # Validate args.
    s = Schema({
        str: Use(bool),
        '<identifier>': Or(None, And(str, validate_ia_identifier,
            error=('<identifier> should be between 3 and 80 characters in length, and '
                   'can only contain alphanumeric characters, underscores ( _ ), or '
                   'dashes ( - )'))),
        '<file>': And(
            Use(lambda l: l if not six.PY2 else convert_str_list_to_unicode(l)),
            And(lambda f: all(os.path.exists(x) for x in f if x != '-'),
                error='<file> should be a readable file or directory.'),
            And(lambda f: False if f == ['-'] and not args['--remote-name'] else True,
                error='--remote-name must be provided when uploading from stdin.')),
        '--remote-name': Or(None,
            Use(lambda x: x.decode(sys.getfilesystemencoding()) if six.PY2 else x)),
        '--spreadsheet': Or(None, os.path.isfile,
            error='--spreadsheet should be a readable file.'),
        '--metadata': Or(None, And(Use(get_args_dict), dict),
            error='--metadata must be formatted as --metadata="key:value"'),
        '--header': Or(None, And(Use(get_args_dict), dict),
            error='--header must be formatted as --header="key:value"'),
        '--retries': Use(lambda x: int(x[0]) if x else 0),
        '--sleep': Use(lambda l: int(l[0]), error='--sleep value must be an integer.'),
        '--size-hint': Or(Use(lambda l: int(l[0]) if l else None), int, None,
            error='--size-hint value must be an integer.'),
        '--status-check': bool,
    })
    try:
        args = s.validate(args)
    except SchemaError as exc:
        print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr)
        sys.exit(1)

    # Status check.
    if args['--status-check']:
        if session.s3_is_overloaded():
            print('warning: {0} is over limit, and not accepting requests. '
                  'Expect 503 SlowDown errors.'.format(args['<identifier>']),
                  file=sys.stderr)
            sys.exit(1)
        else:
            print('success: {0} is accepting requests.'.format(args['<identifier>']))
            sys.exit()

    elif args['<identifier>']:
        item = session.get_item(args['<identifier>'])

    # Upload keyword arguments.
    if args['--size-hint']:
        args['--header']['x-archive-size-hint'] = args['--size-hint']

    queue_derive = True if args['--no-derive'] is False else False
    verbose = True if args['--quiet'] is False else False

    upload_kwargs = dict(
        metadata=args['--metadata'],
        headers=args['--header'],
        debug=args['--debug'],
        queue_derive=queue_derive,
        verbose=verbose,
        verify=args['--verify'],
        checksum=args['--checksum'],
        retries=args['--retries'],
        retries_sleep=args['--sleep'],
        delete=args['--delete'],
    )

    # Upload files.
    if not args['--spreadsheet']:
        if args['-']:
            local_file = TemporaryFile()
            local_file.write(sys.stdin.read())
            local_file.seek(0)
        else:
            local_file = args['<file>']

        if isinstance(local_file, (list, tuple, set)) and args['--remote-name']:
            local_file = local_file[0]
        if args['--remote-name']:
            files = {args['--remote-name']: local_file}
        else:
            files = local_file

        for _r in _upload_files(item, files, upload_kwargs):
            if args['--debug']:
                break
            if (not _r) or (not _r.ok):
                ERRORS = True

    # Bulk upload using spreadsheet.
    else:
        # Use the same session for each upload request.
        session = ArchiveSession()
        spreadsheet = csv.DictReader(open(args['--spreadsheet'], 'rU'))
        prev_identifier = None
        for row in spreadsheet:
            local_file = row['file']
            identifier = row['identifier']
            del row['file']
            del row['identifier']
            if (not identifier) and (prev_identifier):
                identifier = prev_identifier
            item = session.get_item(identifier)
            # TODO: Clean up how indexed metadata items are coerced
            # into metadata.
            md_args = ['{0}:{1}'.format(k.lower(), v) for (k, v) in row.items() if v]
            metadata = get_args_dict(md_args)
            upload_kwargs['metadata'].update(metadata)
            r = _upload_files(item, local_file, upload_kwargs, prev_identifier, session)
            for _r in r:
                if args['--debug']:
                    break
                if (not _r) or (not _r.ok):
                    ERRORS = True
            prev_identifier = identifier

    if ERRORS:
        sys.exit(1)