def main(cmd_args): """Run the main training function.""" parser = get_parser() args, _ = parser.parse_known_args(cmd_args) if args.backend == "chainer" and args.train_dtype != "float32": raise NotImplementedError( f"chainer backend does not support --train-dtype {args.train_dtype}." "Use --dtype float32.") if args.ngpu == 0 and args.train_dtype in ("O0", "O1", "O2", "O3", "float16"): raise ValueError( f"--train-dtype {args.train_dtype} does not support the CPU backend." ) from espnet.utils.dynamic_import import dynamic_import if args.model_module is None: model_module = "espnet.nets." + args.backend + "_backend.e2e_mt:E2E" else: model_module = args.model_module model_class = dynamic_import(model_module) model_class.add_arguments(parser) args = parser.parse_args(cmd_args) args.model_module = model_module if 'chainer_backend' in args.model_module: args.backend = 'chainer' if 'pytorch_backend' in args.model_module: args.backend = 'pytorch' # logging info if args.verbose > 0: logging.basicConfig( level=logging.INFO, format= '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s') else: logging.basicConfig( level=logging.WARN, format= '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s') logging.warning('Skip DEBUG/INFO messages') # If --ngpu is not given, # 1. if CUDA_VISIBLE_DEVICES is set, all visible devices # 2. if nvidia-smi exists, use all devices # 3. else ngpu=0 if args.ngpu is None: cvd = os.environ.get("CUDA_VISIBLE_DEVICES") if cvd is not None: ngpu = len(cvd.split(',')) else: logging.warning("CUDA_VISIBLE_DEVICES is not set.") try: p = subprocess.run(['nvidia-smi', '-L'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except (subprocess.CalledProcessError, FileNotFoundError): ngpu = 0 else: ngpu = len(p.stderr.decode().split('\n')) - 1 else: if is_torch_1_2_plus and args.ngpu != 1: logging.debug( "There are some bugs with multi-GPU processing in PyTorch 1.2+" + " (see https://github.com/pytorch/pytorch/issues/21108)") ngpu = args.ngpu logging.info(f"ngpu: {ngpu}") # display PYTHONPATH logging.info('python path = ' + os.environ.get('PYTHONPATH', '(None)')) # set random seed logging.info('random seed = %d' % args.seed) random.seed(args.seed) np.random.seed(args.seed) # load dictionary for debug log if args.dict is not None: with open(args.dict, 'rb') as f: dictionary = f.readlines() char_list = [ entry.decode('utf-8').split(' ')[0] for entry in dictionary ] char_list.insert(0, '<blank>') char_list.append('<eos>') args.char_list = char_list else: args.char_list = None # train logging.info('backend = ' + args.backend) if args.backend == "pytorch": from espnet.mt.pytorch_backend.mt import train train(args) else: raise ValueError("Only pytorch are supported.")
def main(cmd_args): parser = get_parser() args, _ = parser.parse_known_args(cmd_args) from espnet.utils.dynamic_import import dynamic_import if args.model_module is not None: model_class = dynamic_import(args.model_module) model_class.add_arguments(parser) args = parser.parse_args(cmd_args) if args.model_module is None: args.model_module = "espnet.nets." + args.backend + "_backend.e2e_mt:E2E" if 'chainer_backend' in args.model_module: args.backend = 'chainer' if 'pytorch_backend' in args.model_module: args.backend = 'pytorch' # logging info if args.verbose > 0: logging.basicConfig( level=logging.INFO, format= '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s') else: logging.basicConfig( level=logging.WARN, format= '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s') logging.warning('Skip DEBUG/INFO messages') # check CUDA_VISIBLE_DEVICES if args.ngpu > 0: # python 2 case if platform.python_version_tuple()[0] == '2': if "clsp.jhu.edu" in subprocess.check_output(["hostname", "-f"]): cvd = subprocess.check_output( ["/usr/local/bin/free-gpu", "-n", str(args.ngpu)]).strip() logging.info('CLSP: use gpu' + cvd) os.environ['CUDA_VISIBLE_DEVICES'] = cvd # python 3 case else: if "clsp.jhu.edu" in subprocess.check_output(["hostname", "-f"]).decode(): cvd = subprocess.check_output( ["/usr/local/bin/free-gpu", "-n", str(args.ngpu)]).decode().strip() logging.info('CLSP: use gpu' + cvd) os.environ['CUDA_VISIBLE_DEVICES'] = cvd cvd = os.environ.get("CUDA_VISIBLE_DEVICES") if cvd is None: logging.warning("CUDA_VISIBLE_DEVICES is not set.") elif args.ngpu != len(cvd.split(",")): logging.error("#gpus is not matched with CUDA_VISIBLE_DEVICES.") sys.exit(1) # display PYTHONPATH logging.info('python path = ' + os.environ.get('PYTHONPATH', '(None)')) # set random seed logging.info('random seed = %d' % args.seed) random.seed(args.seed) np.random.seed(args.seed) # load dictionary for debug log if args.dict_tgt is not None: with open(args.dict_tgt, 'rb') as f: dictionary = f.readlines() char_list = [ entry.decode('utf-8').split(' ')[0] for entry in dictionary ] char_list.insert(0, '<blank>') char_list.append('<eos>') args.char_list = char_list else: args.char_list = None # train logging.info('backend = ' + args.backend) if args.backend == "chainer": raise NotImplementedError("chainer is not supported for MT now.") # TODO(hirofumi): support chainer backend elif args.backend == "pytorch": from espnet.mt.pytorch_backend.mt import train train(args) else: raise ValueError("Only chainer and pytorch are supported.")