) parser.add_argument( "indexfile", nargs="?", default=TRAINING_INDEX, help="path to index of resulting parquet files", ) parser.add_argument( "outdir", nargs="?", default=TRAINING_DIR, help="directory of parquet files", ) parser.add_argument( "--max-token-count", type=int, default=5, help="maximum number of contiguous tokens to match against each label", ) parser.add_argument("--log-level", dest="log_level", default="INFO") args = parser.parse_args() logger.setLevel(args.log_level.upper()) logger.info(f"Reading {Path(args.manifest).resolve()}") manifest = pd.read_csv(args.manifest) indir, index, outdir = Path(args.indir), Path(args.indexfile), Path(args.outdir) index.parent.mkdir(parents=True, exist_ok=True) outdir.mkdir(parents=True, exist_ok=True) extend_and_write_docs(indir, manifest, index, outdir, args.max_token_count)
# First read in the initial configuration. os.environ["WANDB_CONFIG_PATHS"] = "config-defaults.yaml" run = wandb.init( project=WANDB_PROJECT, job_type="train", allow_val_change=True, ) config = run.config # Then override it with any parameters passed along the command line. parser = argparse.ArgumentParser() # Anything in the config is fair game to be overridden by a command line flag. for key, value in config.items(): cli_flag = f"--{key}".replace("_", "-") parser.add_argument(cli_flag, dest=key, type=type(value), default=value) args = parser.parse_args() config.update(args, allow_val_change=True) if not config.use_wandb: os.environ["WANDB_SILENT"] = "true" os.environ["WANDB_MODE"] = "dryrun" wandb.log = lambda *args, **kwargs: None logger.setLevel(config.log_level) main(config)