示例#1
0
文件: reindex.py 项目: NoopDog/azul
def main(argv: List[str]):
    args = parser.parse_args(argv)

    if args.verbose:
        config.debug = 1

    configure_script_logging(logger)

    azul_client = AzulClient(prefix=args.prefix, num_workers=args.num_workers)

    azul_client.reset_indexer(args.catalogs,
                              purge_queues=args.purge,
                              delete_indices=args.delete,
                              create_indices=args.create
                              or args.index and args.delete)

    if args.index:
        logger.info('Queuing notifications for reindexing ...')
        for catalog in args.catalogs:
            if args.partition_prefix_length:
                azul_client.remote_reindex(catalog,
                                           args.partition_prefix_length)
            else:
                azul_client.reindex(catalog)
        if args.wait:
            # Match max_timeout to reindex job timeout in `.gitlab-ci.yml`
            azul_client.wait_for_indexer(
                min_timeout=20 * 60 if config.dss_query_prefix else None,
                max_timeout=13 * 60 * 60)
示例#2
0
def main(argv):
    configure_script_logging(logger)
    import argparse
    parser = argparse.ArgumentParser(
        description='Subscribe indexer lambda to bundle events from DSS')
    parser.add_argument('--unsubscribe',
                        '-U',
                        dest='subscribe',
                        action='store_false',
                        default=True)
    parser.add_argument(
        '--personal',
        '-p',
        dest='shared',
        action='store_false',
        default=True,
        help=
        "Do not use the shared credentials of the Google service account that represents the "
        "current deployment, but instead use personal credentials for authenticating to the DSS. "
        "When specifying this option you will need to a) run `hca dss login` prior to running "
        "this script or b) set GOOGLE_APPLICATION_CREDENTIALS to point to another service "
        "account's credentials. Note that this implies that the resulting DSS subscription will "
        "be owned by a) you or b) the other service account and that only a) you or b) someone "
        "in possession of those credentials can modify the subscription in the future. This is "
        "typically not what you'd want.")
    options = parser.parse_args(argv)
    dss_client = azul.dss.client()
    for catalog in config.catalogs:
        plugin = RepositoryPlugin.load(catalog)
        if isinstance(plugin, dss.Plugin):
            if options.shared:
                with aws.service_account_credentials(
                        config.ServiceAccount.indexer):
                    subscription.manage_subscriptions(
                        plugin, dss_client, subscribe=options.subscribe)
            else:
                subscription.manage_subscriptions(plugin,
                                                  dss_client,
                                                  subscribe=options.subscribe)
示例#3
0
# This script simulates a user triggering Azul endpoints via the Data Browser
# GUI.
#
# Usage:
#
#  - Set $azul_locust_catalog to the desired catalog, or leave unset to test the
#    default catalog.
#
#  - Run `locust -f scripts/locust/service.py`
#
#  - In browser go to localhost:8089
#
# For more info see https://docs.locust.io/en/stable/

log = logging.getLogger(__name__)
configure_script_logging(log)


class LocustConfig(Config):
    @cached_property
    def catalog(self) -> str:
        # Locust does not support passing command-line arguments to the script
        catalog = os.environ.get('azul_locust_catalog', self.default_catalog)
        require(catalog in self.catalogs)
        return catalog


config = LocustConfig()


@contextmanager
示例#4
0
def main():
    configure_script_logging(log)
    register_with_sam()
    verify_sources()
    verify_source_access()
示例#5
0
def main(argv: List[str]):
    args = parser.parse_args(argv)

    if args.verbose:
        config.debug = 1

    configure_script_logging(logger)

    azul = AzulClient(num_workers=args.num_workers)

    source_globs = set(args.sources)
    if not args.local or args.deindex:
        sources_by_catalog = defaultdict(set)
        globs_matched = set()
        for catalog in args.catalogs:
            sources = azul.catalog_sources(catalog)
            for source_glob in source_globs:
                matches = fnmatch.filter(sources, source_glob)
                if matches:
                    globs_matched.add(source_glob)
                logger.debug('Source glob %r matched sources %r in catalog %r',
                             source_glob, matches, catalog)
                sources_by_catalog[catalog].update(matches)
        unmatched = source_globs - globs_matched
        if unmatched:
            logger.warning('Source(s) not found in any catalog: %r', unmatched)
        require(any(sources_by_catalog.values()),
                'No valid sources specified for any catalog')
    else:
        if source_globs == {'*'}:
            sources_by_catalog = {
                catalog: azul.catalog_sources(catalog)
                for catalog in args.catalogs
            }
        else:
            parser.error('Cannot specify sources when performing a local reindex')
            assert False

    if args.deindex:
        require(not any((args.index, args.delete, args.create)),
                '--deindex is incompatible with --index, --create, and --delete.')
        require('*' not in source_globs, '--deindex is incompatible with source `*`. '
                                         'Use --delete instead.')

        for catalog, sources in sources_by_catalog.items():
            if sources:
                azul.deindex(catalog, sources)

    azul.reset_indexer(args.catalogs,
                       purge_queues=args.purge,
                       delete_indices=args.delete,
                       create_indices=args.create or args.index and args.delete)

    if args.index:
        logger.info('Queuing notifications for reindexing ...')
        reservation = None
        num_notifications = 0
        for catalog, sources in sources_by_catalog.items():
            if sources:
                if (
                    args.manage_slots
                    and reservation is None
                    and isinstance(azul.repository_plugin(catalog), tdr.Plugin)
                ):
                    reservation = BigQueryReservation()
                    reservation.activate()
                if not args.local:
                    azul.remote_reindex(catalog, sources)
                    num_notifications = None
                else:
                    num_notifications += azul.reindex(catalog, args.prefix)
            else:
                logger.info('Skipping catalog %r (no matching sources)', catalog)
        if args.wait:
            if num_notifications == 0:
                logger.warning('No notifications for prefix %r and catalogs %r were sent',
                               args.prefix, args.catalogs)
            else:
                azul.wait_for_indexer()
示例#6
0
def main(argv):
    configure_script_logging(logger)
    p = argparse.ArgumentParser(
        description='Manage the SQS queues in the current deployment')
    sps = p.add_subparsers(help='sub-command help', dest='command')

    sps.add_parser('list', help='List SQS queues in current deployment')

    sp = sps.add_parser('dump',
                        help='Dump contents of queue into designated file')
    sp.add_argument('queue',
                    metavar='QUEUE_NAME',
                    help='Name of the queue to obtain messages from')
    sp.add_argument('path',
                    metavar='FILE_PATH',
                    help='Path of file to write messages to')
    sp.add_argument(
        '--delete',
        '-D',
        action='store_true',
        help=
        'Remove messages from the queue after writing them to the specified file. By default the '
        'messages will be returned to the queue')
    sp.add_argument('--no-json-body',
                    '-J',
                    dest='json_body',
                    action='store_false',
                    help='Do not deserialize JSON in queue message body.')

    sp = sps.add_parser('feed', help='Feed messages from file back into queue')
    sp.add_argument('path',
                    metavar='FILE_PATH',
                    help='Path of file to read messages from')
    sp.add_argument('queue',
                    metavar='QUEUE_NAME',
                    help='Name of the queue to feed messages into')
    sp.add_argument(
        '--force',
        '-F',
        action='store_true',
        help='Force feeding messages to a queue they did not originate from.')
    sp.add_argument(
        '--delete',
        '-D',
        action='store_true',
        help=
        'Remove messages from the file after submitting them to the specified queue. By default '
        'the messages will remain in the file')

    sp = sps.add_parser('purge', help='Purge all messages in a queue')
    sp.add_argument('queue',
                    metavar='QUEUE_NAME',
                    help='Name of the queue to purge.')

    sps.add_parser(
        'purge_all',
        help=
        'Purge all messages in all queues in the current deployment. Use with caution. The '
        'messages will be lost forever.')

    sp = sps.add_parser(
        'dump_all',
        help=
        'Dump all messages in all queues in the current deployment. Each queue will be '
        'dumped into a separate JSON file. The name of the JSON file is the name of '
        'the queue followed by ".json"')
    sp.add_argument(
        '--delete',
        '-D',
        action='store_true',
        help=
        'Remove messages from each queue after writing them to the its file. By default the '
        'messages will be returned to the queue')
    sp.add_argument('--no-json-body',
                    '-J',
                    dest='json_body',
                    action='store_false',
                    help='Do not deserialize JSON in queue message body.')

    args = p.parse_args(argv)

    if args.command in ('list', 'purge', 'purge_all'):
        queues = Queues()
        if args.command == 'list':
            queues.list()
        elif args.command == 'purge':
            queues.purge(args.queue)
        elif args.command == 'purge_all':
            queues.purge_all()
        else:
            assert False, args.command
    elif args.command in ('dump', 'dump_all'):
        queues = Queues(delete=args.delete, json_body=args.json_body)
        if args.command == 'dump':
            queues.dump(args.queue, args.path)
        elif args.command == 'dump_all':
            queues.dump_all()
        else:
            assert False, args.command
    elif args.command == 'feed':
        queues = Queues(delete=args.delete)
        queues.feed(args.path, args.queue, force=args.force)
    else:
        p.print_usage()