Exemplo n.º 1
0
    def run(self, args=None):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            default=None,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument(
            '-msr',
            '--model_storage_read',
            default=None,
            help=(
                'Model storage to read from, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-msw',
            '--model_storage_write',
            default=None,
            help=(
                'Model storage to write to, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")
        parser.add_argument('--no_push',
                            default=False,
                            action='store_true',
                            help='Do not push model.')

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  nargs='+',
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  nargs='+',
                                  help='Output file.')
        parser_trans.add_argument('--as_release',
                                  default=False,
                                  action='store_true',
                                  help='Translate from a released model.')

        parser_release = subparsers.add_parser(
            'release', help='Release a model for serving.')
        parser_release.add_argument(
            '-d',
            '--destination',
            default=None,
            help='Released model storage (defaults to the model storage).')

        parser_serve = subparsers.add_parser('serve', help='Serve a model.')
        parser_serve.add_argument('-hs',
                                  '--host',
                                  default="0.0.0.0",
                                  help='Serving hostname.')
        parser_serve.add_argument('-p',
                                  '--port',
                                  type=int,
                                  default=4000,
                                  help='Serving port.')

        parser_preprocess = subparsers.add_parser(
            'preprocess', help='Sample and preprocess corpus.')
        parser_preprocess.add_argument('--build_model',
                                       default=False,
                                       action='store_true',
                                       help='Preprocess data into a model.')
        parser.build_vocab = subparsers.add_parser('buildvocab',
                                                   help='Build vocabularies.')

        args = parser.parse_args(args=args)
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if args.model_storage_read is None:
            args.model_storage_read = args.model_storage
        if args.model_storage_write is None:
            args.model_storage_write = args.model_storage
        if (not self._stateless
                and (args.cmd != 'preprocess' or args.build_model)
                and (args.model_storage_write is None
                     or args.model_storage_write is None)):
            parser.error('Missing model storage argument')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(tmp_dir=self._tmp_dir,
                                config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage_read,
                                             parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            if 'modelType' not in model_config:
                if parent_model.endswith('_release'):
                    model_config['modelType'] = 'release'
                else:
                    model_config['modelType'] = 'checkpoint'
            config = merge_config(copy.deepcopy(model_config), config)
        else:
            model_path = None
            model_config = None

        if args.cmd == 'train':
            if (parent_model is not None and config['modelType']
                    not in ('checkpoint', 'base', 'preprocess')):
                raise ValueError(
                    'cannot train from a model that is not a training checkpoint, '
                    'a base model, or a preprocess model')
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage_write,
                               args.image,
                               parent_model=parent_model,
                               model_path=model_path,
                               model_config=model_config,
                               gpuid=args.gpuid,
                               push_model=not args.no_push)
        elif args.cmd == 'buildvocab':
            self.build_vocab(args.task_id,
                             config,
                             storage,
                             args.model_storage_write,
                             args.image,
                             push_model=not args.no_push)
        elif args.cmd == 'trans':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'checkpoint')):
                raise ValueError('translation requires a training checkpoint')
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               as_release=args.as_release,
                               gpuid=args.gpuid)
        elif args.cmd == 'release':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'checkpoint')):
                raise ValueError('releasing requires a training checkpoint')
            if args.destination is None:
                args.destination = args.model_storage_write
            self.release_wrapper(config,
                                 model_path,
                                 storage,
                                 args.image,
                                 args.destination,
                                 gpuid=args.gpuid,
                                 push_model=not args.no_push)
        elif args.cmd == 'serve':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'release')):
                raise ValueError('serving requires a released model')
            self.serve_wrapper(config,
                               model_path,
                               args.host,
                               args.port,
                               gpuid=args.gpuid)
        elif args.cmd == 'preprocess':
            if not args.build_model:
                self.preprocess(config, storage)
            else:
                if (parent_model is not None
                        and config['modelType'] not in ('checkpoint', 'base')):
                    raise ValueError(
                        'cannot preprocess from a model that is not a training '
                        'checkpoint or a base model')
                self.preprocess_into_model(args.task_id,
                                           config,
                                           storage,
                                           args.model_storage_write,
                                           args.image,
                                           parent_model=parent_model,
                                           model_path=model_path,
                                           push_model=not args.no_push)
Exemplo n.º 2
0
    def run(self):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  help='Output file.')

        parser_serve = subparsers.add_parser('serve', help='Serve a model.')
        parser_serve.add_argument('-hs',
                                  '--host',
                                  default="0.0.0.0",
                                  help='Serving hostname.')
        parser_serve.add_argument('-p',
                                  '--port',
                                  type=int,
                                  default=4000,
                                  help='Serving port.')

        parser.build_vocab = subparsers.add_parser(
            'preprocess', help='Sample and preprocess corpus.')

        args = parser.parse_args()
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if not self._stateless and args.cmd != 'preprocess' and not args.model_storage:
            parser.error('argument -ms/--model_storage is required')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(tmp_dir=self._tmp_dir,
                                config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage, parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            config = merge_config(model_config, config)
        else:
            model_path = None

        if args.cmd == 'train':
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage,
                               args.image,
                               parent_model=parent_model,
                               model_path=model_path,
                               gpuid=args.gpuid)
        elif args.cmd == 'trans':
            if parent_model is None:
                raise ValueError('translation requires a model')
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               gpuid=args.gpuid)
        elif args.cmd == 'serve':
            if parent_model is None:
                raise ValueError('serving requires a model')
            self.serve_wrapper(config,
                               model_path,
                               args.host,
                               args.port,
                               gpuid=args.gpuid)
        elif args.cmd == 'preprocess':
            self.preprocess(config, storage)
Exemplo n.º 3
0
    def run(self):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            required=not self._stateless,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument('-g',
                            '--gpuid',
                            default=0,
                            type=int,
                            help="1-indexed GPU identifier (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  help='Output file.')

        args = parser.parse_args()
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage, parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            config = merge_config(model_config, config)
        else:
            model_path = None

        if args.cmd == 'train':
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage,
                               args.image,
                               model_path=model_path,
                               gpuid=args.gpuid)
        elif parent_model is None:
            raise ValueError('translation requires a model')
        elif args.cmd == 'trans':
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               gpuid=args.gpuid)