示例#1
0
    def run(self, args=None):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument('-s', '--storage_config', default=None,
                            help=('Configuration of available storages as a file or a JSON string. '
                                  'Setting "-" will read from the standard input.'))
        parser.add_argument('-t', '--task_id', default=None,
                            help="Identifier of this run.")
        parser.add_argument('-i', '--image', default="?",
                            help="Full URL (registry/image:tag) of the image used for this run.")
        parser.add_argument('-b', '--beat_url', default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi', '--beat_interval', default=30, type=int,
                            help="Interval of beat requests in seconds.")
        parser.add_argument('--statistics_url', default=None,
                            help=('Endpoint that listens to statistics summaries generated '
                                  'at the end of the execution'))

        parser.add_argument('-ms', '--model_storage', default=os.environ["MODELS_DIR"],
                            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument('-msr', '--model_storage_read', default=None,
                            help=('Model storage to read from, in the form <storage_id>:[<path>] '
                                  '(defaults to model_storage).'))
        parser.add_argument('-msw', '--model_storage_write', default=None,
                            help=('Model storage to write to, in the form <storage_id>:[<path>] '
                                  '(defaults to model_storage).'))
        parser.add_argument('-c', '--config', default=None,
                            help=('Configuration as a file or a JSON string. '
                                  'Setting "-" will read from the standard input.'))
        parser.add_argument('--config_update_mode',
                            choices=['default', 'merge', 'replace'],
                            default='default',
                            help=('How to update the parent task configuration with the given '
                                  'configuration. '
                                  '"default": automatic mode based on the configuration, '
                                  '"merge": recursively update configuration fields, '
                                  '"replace": replace the top-most fields.'))
        parser.add_argument('-m', '--model', default=None,
                            help='Model to load.')
        parser.add_argument('-g', '--gpuid', default="0",
                            help="Comma-separated list of 0-indexed GPU identifiers.")
        parser.add_argument('--no_push', default=False, action='store_true',
                            help='Do not push model.')

        self.declare_arguments(parser)
        args = parser.parse_args(args=args)

        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        self._task_id = args.task_id
        self._image = args.image

        start_beat_service(
            os.uname()[1],
            args.beat_url,
            args.task_id,
            interval=args.beat_interval)

        self._storage = StorageClient(
            config=load_config(args.storage_config) if args.storage_config else None)

        if args.model_storage_read is None:
            args.model_storage_read = args.model_storage
        if args.model_storage_write is None:
            args.model_storage_write = args.model_storage

        self._model_storage_read = args.model_storage_read
        self._model_storage_write = args.model_storage_write

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        self._gpuid = args.gpuid

        self._config = load_config(args.config) if args.config is not None else None
        self._model = args.model
        self._no_push = args.no_push

        logger.info('Starting executing utility %s=%s', self.name, args.image)
        start_time = time.time()
        stats = self.exec_function(args)
        end_time = time.time()
        logger.info('Finished executing utility in %s seconds', str(end_time-start_time))

        if args.statistics_url is not None:
            requests.post(args.statistics_url, json={
                'task_id': self._task_id,
                'start_time': start_time,
                'end_time': end_time,
                'statistics': stats or {}
            })
示例#2
0
    def run(self):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  help='Output file.')

        parser_serve = subparsers.add_parser('serve', help='Serve a model.')
        parser_serve.add_argument('-hs',
                                  '--host',
                                  default="0.0.0.0",
                                  help='Serving hostname.')
        parser_serve.add_argument('-p',
                                  '--port',
                                  type=int,
                                  default=4000,
                                  help='Serving port.')

        parser.build_vocab = subparsers.add_parser(
            'preprocess', help='Sample and preprocess corpus.')

        args = parser.parse_args()
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if not self._stateless and args.cmd != 'preprocess' and not args.model_storage:
            parser.error('argument -ms/--model_storage is required')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(tmp_dir=self._tmp_dir,
                                config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage, parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            config = merge_config(model_config, config)
        else:
            model_path = None

        if args.cmd == 'train':
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage,
                               args.image,
                               parent_model=parent_model,
                               model_path=model_path,
                               gpuid=args.gpuid)
        elif args.cmd == 'trans':
            if parent_model is None:
                raise ValueError('translation requires a model')
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               gpuid=args.gpuid)
        elif args.cmd == 'serve':
            if parent_model is None:
                raise ValueError('serving requires a model')
            self.serve_wrapper(config,
                               model_path,
                               args.host,
                               args.port,
                               gpuid=args.gpuid)
        elif args.cmd == 'preprocess':
            self.preprocess(config, storage)
示例#3
0
    def run(self):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            required=not self._stateless,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument('-g',
                            '--gpuid',
                            default=0,
                            type=int,
                            help="1-indexed GPU identifier (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  help='Output file.')

        args = parser.parse_args()
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage, parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            config = merge_config(model_config, config)
        else:
            model_path = None

        if args.cmd == 'train':
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage,
                               args.image,
                               model_path=model_path,
                               gpuid=args.gpuid)
        elif parent_model is None:
            raise ValueError('translation requires a model')
        elif args.cmd == 'trans':
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               gpuid=args.gpuid)
    def run(self, args=None):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            default=None,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument(
            '-msr',
            '--model_storage_read',
            default=None,
            help=(
                'Model storage to read from, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-msw',
            '--model_storage_write',
            default=None,
            help=(
                'Model storage to write to, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")
        parser.add_argument('--no_push',
                            default=False,
                            action='store_true',
                            help='Do not push model.')

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  nargs='+',
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  nargs='+',
                                  help='Output file.')
        parser_trans.add_argument('--as_release',
                                  default=False,
                                  action='store_true',
                                  help='Translate from a released model.')

        parser_release = subparsers.add_parser(
            'release', help='Release a model for serving.')
        parser_release.add_argument(
            '-d',
            '--destination',
            default=None,
            help='Released model storage (defaults to the model storage).')

        parser_serve = subparsers.add_parser('serve', help='Serve a model.')
        parser_serve.add_argument('-hs',
                                  '--host',
                                  default="0.0.0.0",
                                  help='Serving hostname.')
        parser_serve.add_argument('-p',
                                  '--port',
                                  type=int,
                                  default=4000,
                                  help='Serving port.')

        parser_preprocess = subparsers.add_parser(
            'preprocess', help='Sample and preprocess corpus.')
        parser_preprocess.add_argument('--build_model',
                                       default=False,
                                       action='store_true',
                                       help='Preprocess data into a model.')
        parser.build_vocab = subparsers.add_parser('buildvocab',
                                                   help='Build vocabularies.')

        args = parser.parse_args(args=args)
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if args.model_storage_read is None:
            args.model_storage_read = args.model_storage
        if args.model_storage_write is None:
            args.model_storage_write = args.model_storage
        if (not self._stateless
                and (args.cmd != 'preprocess' or args.build_model)
                and (args.model_storage_write is None
                     or args.model_storage_write is None)):
            parser.error('Missing model storage argument')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(tmp_dir=self._tmp_dir,
                                config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage_read,
                                             parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            if 'modelType' not in model_config:
                if parent_model.endswith('_release'):
                    model_config['modelType'] = 'release'
                else:
                    model_config['modelType'] = 'checkpoint'
            config = merge_config(copy.deepcopy(model_config), config)
        else:
            model_path = None
            model_config = None

        if args.cmd == 'train':
            if (parent_model is not None and config['modelType']
                    not in ('checkpoint', 'base', 'preprocess')):
                raise ValueError(
                    'cannot train from a model that is not a training checkpoint, '
                    'a base model, or a preprocess model')
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage_write,
                               args.image,
                               parent_model=parent_model,
                               model_path=model_path,
                               model_config=model_config,
                               gpuid=args.gpuid,
                               push_model=not args.no_push)
        elif args.cmd == 'buildvocab':
            self.build_vocab(args.task_id,
                             config,
                             storage,
                             args.model_storage_write,
                             args.image,
                             push_model=not args.no_push)
        elif args.cmd == 'trans':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'checkpoint')):
                raise ValueError('translation requires a training checkpoint')
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               as_release=args.as_release,
                               gpuid=args.gpuid)
        elif args.cmd == 'release':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'checkpoint')):
                raise ValueError('releasing requires a training checkpoint')
            if args.destination is None:
                args.destination = args.model_storage_write
            self.release_wrapper(config,
                                 model_path,
                                 storage,
                                 args.image,
                                 args.destination,
                                 gpuid=args.gpuid,
                                 push_model=not args.no_push)
        elif args.cmd == 'serve':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'release')):
                raise ValueError('serving requires a released model')
            self.serve_wrapper(config,
                               model_path,
                               args.host,
                               args.port,
                               gpuid=args.gpuid)
        elif args.cmd == 'preprocess':
            if not args.build_model:
                self.preprocess(config, storage)
            else:
                if (parent_model is not None
                        and config['modelType'] not in ('checkpoint', 'base')):
                    raise ValueError(
                        'cannot preprocess from a model that is not a training '
                        'checkpoint or a base model')
                self.preprocess_into_model(args.task_id,
                                           config,
                                           storage,
                                           args.model_storage_write,
                                           args.image,
                                           parent_model=parent_model,
                                           model_path=model_path,
                                           push_model=not args.no_push)
示例#5
0
    def run(self, args=None):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            "-s",
            "--storage_config",
            default=None,
            help=
            ("Configuration of available storages as a file or a JSON string. "
             'Setting "-" will read from the standard input.'),
        )
        parser.add_argument("-t",
                            "--task_id",
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            "-i",
            "--image",
            default="?",
            help=
            "Full URL (registry/image:tag) of the image used for this run.",
        )
        parser.add_argument(
            "-b",
            "--beat_url",
            default=None,
            help=("Endpoint that listens to beat requests "
                  "(push notifications of activity)."),
        )
        parser.add_argument(
            "-bi",
            "--beat_interval",
            default=30,
            type=int,
            help="Interval of beat requests in seconds.",
        )
        parser.add_argument(
            "--statistics_url",
            default=None,
            help=("Endpoint that listens to statistics summaries generated "
                  "at the end of the execution"),
        )

        parser.add_argument(
            "-ms",
            "--model_storage",
            default=os.environ["MODELS_DIR"],
            help="Model storage in the form <storage_id>:[<path>].",
        )
        parser.add_argument(
            "-msr",
            "--model_storage_read",
            default=None,
            help=(
                "Model storage to read from, in the form <storage_id>:[<path>] "
                "(defaults to model_storage)."),
        )
        parser.add_argument(
            "-msw",
            "--model_storage_write",
            default=None,
            help=(
                "Model storage to write to, in the form <storage_id>:[<path>] "
                "(defaults to model_storage)."),
        )
        parser.add_argument(
            "-c",
            "--config",
            default=None,
            help=("Configuration as a file or a JSON string. "
                  'Setting "-" will read from the standard input.'),
        )
        parser.add_argument(
            "--config_update_mode",
            choices=["default", "merge", "replace"],
            default="default",
            help=("How to update the parent task configuration with the given "
                  "configuration. "
                  '"default": automatic mode based on the configuration, '
                  '"merge": recursively update configuration fields, '
                  '"replace": replace the top-most fields.'),
        )
        parser.add_argument("-m",
                            "--model",
                            default=None,
                            help="Model to load.")
        parser.add_argument(
            "-g",
            "--gpuid",
            default="0",
            help="Comma-separated list of 0-indexed GPU identifiers.",
        )
        parser.add_argument("--no_push",
                            default=False,
                            action="store_true",
                            help="Do not push model.")

        self.declare_arguments(parser)
        args = parser.parse_args(args=args)

        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        self._task_id = args.task_id
        self._image = args.image

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        self._storage = StorageClient(config=load_config(args.storage_config)
                                      if args.storage_config else None)

        if args.model_storage_read is None:
            args.model_storage_read = args.model_storage
        if args.model_storage_write is None:
            args.model_storage_write = args.model_storage

        self._model_storage_read = args.model_storage_read
        self._model_storage_write = args.model_storage_write

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(",")
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        self._gpuid = args.gpuid

        self._config = load_config(
            args.config) if args.config is not None else None
        self._model = args.model
        self._no_push = args.no_push

        logger.info("Starting executing utility %s=%s", self.name, args.image)
        start_time = time.time()
        stats = self.exec_function(args)
        end_time = time.time()
        logger.info("Finished executing utility in %.1f seconds",
                    end_time - start_time)

        if args.statistics_url is not None:
            requests.post(
                args.statistics_url,
                json={
                    "task_id": self._task_id,
                    "start_time": start_time,
                    "end_time": end_time,
                    "statistics": stats or {},
                },
            )