def prepare_text_with_yaspi( yaspi_defaults: Dict[str, Union[str, int]], common_kwargs: Dict, datasets: List[str], embedding_names: List[str], ): cmd_args = sys.argv remove = ["--yaspify", "--datasets", "--embedding_name"] cmd_args = filter_cmd_args(cmd_args, remove=remove) base_cmd = f"python {' '.join(cmd_args)} --slurm" # avoid filename limit embedding_acronyms = [] for embedding_name in embedding_names: acronym = "".join([x[0].upper() for x in embedding_name.split("-")]) embedding_acronyms.append(acronym) job_name = f"prepare-text-{'-'.join(datasets)}-{'-'.join(embedding_acronyms)}" pairs = list(itertools.product(embedding_names, datasets)) job_queue = [] for embedding_name, dataset in pairs: job_queue.append( f'"--embedding_name {embedding_name} --datasets {dataset}"') job_queue = " ".join(job_queue) job = Yaspi( cmd=base_cmd, job_queue=job_queue, job_name=job_name, job_array_size=len(pairs), **yaspi_defaults, ) job.submit(watch=True, conserve_resources=5) extract_embeddings(**common_kwargs)
def train_baselines_with_yaspi( yaspi_defaults_path: Path, common_kwargs: Dict, timestamp: str, datasets: List[str], ): with open(yaspi_defaults_path, "r") as f: yaspi_defaults = json.load(f) cmd_args = sys.argv remove = ["--yaspify", "--datasets"] cmd_args = filter_cmd_args(cmd_args, remove=remove) cmd_args.extend(["--timestamp", timestamp]) base_cmd = f"python {' '.join(cmd_args)}" job_name = f"baselines-{timestamp}" job_queue = [f'"--datasets {dataset}"' for dataset in datasets] job_queue = " ".join(job_queue) job = Yaspi( cmd=base_cmd, job_queue=job_queue, job_name=job_name, job_array_size=len(datasets), **yaspi_defaults, ) job.submit(watch=True, conserve_resources=5) train_baselines(**common_kwargs, aggregate=True)
def test_yaspi_object_line_deletion(): with open("yaspi_test/misc/dummy_yaspi_config.json", "r") as f: yaspi_defaults = json.load(f) for key, val in yaspi_defaults.items(): if key in PATH_ARGS: yaspi_defaults[key] = Path(val) cmd = "python yaspi_test/misc/hello_world.py" job_name = "test_yaspi" job_queue = "" # Check that yaspi only includes sbatch directives for values that # are not None when OR_DELETE_LINE is specified in the sbatch template. # This test uses the "constraint_str" flag as an example of a directive # that should be None by default # First, check that supplying a yaspi key-value pair ensures it is present yaspi_defaults["constraint_str"] = "p40" sbatch_directive = "#SBATCH --constraint" yaspi = Yaspi( cmd=cmd, job_name=job_name, job_queue=job_queue, job_array_size=1, **yaspi_defaults, ) # Read the template that was written to disk with open("data/slurm-gen-scripts/cpu-proc/template.sh", "r") as f: template_contents = f.read() assert sbatch_directive in template_contents, ( f"Expected to find {sbatch_directive} in template contents") # Check that supplying a None-valued yaspi key-value pair ensures it is not present yaspi_defaults["constraint_str"] = None yaspi = Yaspi( cmd=cmd, job_name=job_name, job_queue=job_queue, job_array_size=1, **yaspi_defaults, ) # Read the template that was written to disk with open("data/slurm-gen-scripts/cpu-proc/template.sh", "r") as f: template_contents = f.read() assert sbatch_directive not in template_contents, ( f"Expected not to find {sbatch_directive} in template contents") if socket.gethostname() in HOSTS_WITH_SLURM: yaspi.submit()
def test_yaspi_object_creation(): with open("yaspi_test/misc/dummy_yaspi_config.json", "r") as f: yaspi_defaults = json.load(f) for key, val in yaspi_defaults.items(): if key in PATH_ARGS: yaspi_defaults[key] = Path(val) cmd = "python yaspi_test/misc/hello_world.py" job_name = "test_yaspi" job_queue = "" yaspi = Yaspi( cmd=cmd, job_name=job_name, job_queue=job_queue, job_array_size=1, **yaspi_defaults, ) print(f"Test yaspi object: {yaspi}") if socket.gethostname() in HOSTS_WITH_SLURM: yaspi.submit()
episode2subset = get_episode2subset_map(args.subset2episode) if args.yaspify: with open(args.yaspi_defaults_path, "r") as f: yaspi_defaults = json.load(f) cmd_args = sys.argv cmd_args.remove("--yaspify") base_cmd = f"python {' '.join(cmd_args)}" job_name = f"create-{args.num_partitions}-info-windows" yaspi_defaults["constraint_str"] = args.constraint_str yaspi_defaults[ "partition"] = "gpu" if args.run_on_gnodes else "compute" job = Yaspi( cmd=base_cmd, job_queue=None, gpus_per_task=0, job_name=job_name, job_array_size=args.num_partitions, **yaspi_defaults, ) job.submit(watch=True, conserve_resources=5) else: if args.slurm: if socket.gethostname().endswith("cluster"): os.system(str(Path.home() / "configure_tmp_data.sh")) main( limit=args.limit, refresh=args.refresh, data_dir=args.data_dir, processes=args.processes, prob_thres=args.prob_thres, trim_format=args.trim_format,
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=14, metavar='N', help='number of epochs to train (default: 14)') parser.add_argument('--lr', type=float, default=1.0, metavar='LR', help='learning rate (default: 1.0)') parser.add_argument('--gamma', type=float, default=0.7, metavar='M', help='Learning rate step gamma (default: 0.7)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--dry-run', action='store_true', default=False, help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') # Additional flags used by yaspi parser.add_argument("--yaspify", action="store_true") parser.add_argument( "--yaspi_settings", default="yaspi_settings.json", help="file of SLURM specific options (e.g. number of GPUS)") parser.add_argument("--hyperparams", default="mnist_hyperparams.json") args = parser.parse_args() if args.yaspify: # -------------------------------------------------------------------- # This section contains the logic for launching multiple runs # -------------------------------------------------------------------- # The command that will be launched on each worker will be identical to the # python command used to launch this script (including all flags), except: # 1. The --yaspify flag will be removed # 2. Flags from hyperparams will be inserted # ------------------------------------------------------------------------- # load the hyperparameters with open(args.hyperparams, "r") as f: hyperparams = json.load(f) exp_flags = [] for exp in hyperparams: exp_flags.append(" ".join( [f"--{key} {val}" for key, val in exp.items()])) # Select a name for your jobs (this is what will be visible via the `sinfo` # SLURM command) num_jobs = len(exp_flags) job_name = f"train-mnist-{num_jobs}-jobs" # Provide the arguments to each SLURM worker as space-separated quoted strings job_queue = " ".join([f'"{flags}"' for flags in exp_flags]) # remove the yaspify flag cmd_args = sys.argv cmd_args.remove("--yaspify") # construct the final command that will run each worker, together with job_queue base_cmd = f"python {' '.join(cmd_args)}" # load SLURM specific settings with open(args.yaspi_settings, "r") as f: yaspi_defaults = json.load(f) # Launch the jobs over SLURM job = Yaspi( cmd=base_cmd, job_queue=job_queue, job_name=job_name, job_array_size=num_jobs, **yaspi_defaults, ) # The `watch` argument will keep job.submit(watch=True, conserve_resources=5) else: # -------------------------------------------------------------------- # This section contains the original, unmodified code # -------------------------------------------------------------------- use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") train_kwargs = {'batch_size': args.batch_size} test_kwargs = {'batch_size': args.test_batch_size} if use_cuda: cuda_kwargs = { 'num_workers': 1, 'pin_memory': True, 'shuffle': True } train_kwargs.update(cuda_kwargs) test_kwargs.update(cuda_kwargs) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset1 = datasets.MNIST('../data', train=True, download=True, transform=transform) dataset2 = datasets.MNIST('../data', train=False, transform=transform) train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs) test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) model = Net().to(device) optimizer = optim.Adadelta(model.parameters(), lr=args.lr) scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) test(model, device, test_loader) scheduler.step() if args.save_model: torch.save(model.state_dict(), "mnist_cnn.pt")
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--dataset", default="bsl_signbank", choices=[ "bsl_signbank", "bsl_signdict", "bbcsl", "msasl", "wlasl", "bbcsl_annotated", "bbcsl_raw", "BSLCP_raw", ], ) parser.add_argument("--limit", type=int, default=0) parser.add_argument("--processes", type=int, default=1) parser.add_argument("--resize_res", type=int, default=256) parser.add_argument("--worker_id", default=0, type=int) parser.add_argument("--progress_markers", type=int, default=100) parser.add_argument("--yaspify", action="store_true") parser.add_argument("--use_gnodes", action="store_true") parser.add_argument("--slurm", action="store_true") parser.add_argument("--refresh", action="store_true") parser.add_argument( "--mouthing_window_secs", default=0, type=int, help="if given, preprocess videos from different windows.", ) parser.add_argument("--num_partitions", default=1, type=int) parser.add_argument("--relevant_ids_path", help="if given, filter to these ids") parser.add_argument("--yaspi_defaults_path", default="misc/yaspi_cpu_defaults.json") parser.add_argument("--vis", action="store_true") args = parser.parse_args() fname_suffix = "" if args.mouthing_window_secs: assert args.dataset == "bbcsl", f"Mouthing windows are only supported for bbcsl" fname_suffix = f"-{args.mouthing_window_secs}sec-window-signhd" exclude_pattern = None if args.dataset in {"bsl_signbank", "bsl_signdict", "msasl", "wlasl"}: tag, suffix = "videos_360h_25fps", ".mp4" elif args.dataset in {"bbcsl", "bbcsl_annotated"}: fname_suffix += "-videos-fixed" tag, suffix = "annotated-videos-fixed", ".mp4" elif args.dataset == "bbcsl_raw": tag, suffix = "videos-mp4", "signhd-dense-fast-audio.mp4" elif args.dataset == "BSLCP_raw": tag, suffix, exclude_pattern = "videos", ".mov", "+" else: raise ValueError(f"Unknown dataset: {args.dataset}") dest_fname = f"videos-resized-25fps-{args.resize_res}x{args.resize_res}" dataset_name2dir = {"BSLCP_raw": "BSLCP"} dataset_dir = dataset_name2dir.get(args.dataset, args.dataset) src_video_dir = Path("data") / dataset_dir / f"{tag}{fname_suffix}" dest_video_dir = src_video_dir.parent / f"{dest_fname}{fname_suffix}" if getpass.getuser() == "albanie" and socket.gethostname().endswith( "cluster"): os.system(str(Path.home() / "configure_tmp_data.sh")) if args.yaspify: with open(args.yaspi_defaults_path, "r") as f: yaspi_defaults = json.load(f) cmd_args = sys.argv cmd_args.remove("--yaspify") base_cmd = f"python {' '.join(cmd_args)}" job_name = f"preproc-videos-{args.num_partitions}-partitions" if args.use_gnodes: yaspi_defaults["partition"] = "gpu" job = Yaspi( cmd=base_cmd, job_queue=None, gpus_per_task=0, job_name=job_name, job_array_size=args.num_partitions, **yaspi_defaults, ) job.submit(watch=True, conserve_resources=5) else: resize_videos( vis=args.vis, suffix=suffix, limit=args.limit, refresh=args.refresh, worker_id=args.worker_id, processes=args.processes, resize_res=args.resize_res, num_partitions=args.num_partitions, src_video_dir=src_video_dir, dest_video_dir=dest_video_dir, relevant_ids_path=args.relevant_ids_path, progress_markers=args.progress_markers, exclude_pattern=exclude_pattern, )