Exemplo n.º 1
0
from omegaconf import OmegaConf
import os

cfg = OmegaConf.load(
    os.path.join(os.getenv("PROJECT_DIR"), "config/config.yaml"))
Exemplo n.º 2
0
                        '--config',
                        type=str,
                        required=True,
                        help="path of configuration yaml file")
    parser.add_argument('--num_workers',
                        type=int,
                        default=32,
                        help="number of workers")
    parser.add_argument('-o',
                        '--output_filename',
                        type=str,
                        default='f0s.txt',
                        help="name of the output file")
    args = parser.parse_args()

    hp = OmegaConf.load(args.config)
    with open(os.path.join(hp.data.train_dir, hp.data.train_meta),
              'r',
              encoding='utf-8') as g:
        data = g.readlines()
    wavdir = [x.split('|')[0].strip() for x in data]
    speaker = [x.split('|')[2].strip() for x in data]
    speaker_dict = set()

    speaker_dict = hp.data.speakers

    n = len(speaker_dict)
    print(speaker_dict)
    speaker_to_idx = {spk: idx for idx, spk in enumerate(speaker_dict)}

    squares = [0. for i in range(n)]
Exemplo n.º 3
0
        elif opt.base:
            cfg_fname = os.path.split(opt.base[0])[-1]
            cfg_name = os.path.splitext(cfg_fname)[0]
            name = "_" + cfg_name
        else:
            name = ""
        nowname = now + name
        logdir = os.path.join("logs", nowname)

    ckptdir = os.path.join(logdir, "checkpoints")
    cfgdir = os.path.join(logdir, "configs")
    seed_everything(opt.seed)

    try:
        # init and save configs
        configs = [OmegaConf.load(cfg) for cfg in opt.base]
        cli = OmegaConf.from_dotlist(unknown)
        config = OmegaConf.merge(*configs, cli)
        lightning_config = config.pop("lightning", OmegaConf.create())
        # merge trainer cli with config
        trainer_config = lightning_config.get("trainer", OmegaConf.create())
        # default to ddp
        trainer_config["distributed_backend"] = "ddp"
        for k in nondefault_trainer_args(opt):
            trainer_config[k] = getattr(opt, k)
        trainer_opt = argparse.Namespace(**trainer_config)
        lightning_config.trainer = trainer_config

        # model
        model = instantiate_from_config(config.model)
Exemplo n.º 4
0
def show_yaml(base: Path):

    yaml_path = base / 'hydra/config.yaml'
    cfg = OmegaConf.load(str(yaml_path))
    st.markdown(f'```{cfg.pretty()}```')
Exemplo n.º 5
0
def main(cfg: DictConfig) -> None:
    cwd = Path(hydra.utils.get_original_cwd())

    myutil.print_config(cfg)

    # Setting history directory
    # All outputs will be written into (p / "history" / train_id).
    train_id = tid.generate_train_id(cfg)
    history_dir = cwd / "history" / train_id
    if not history_dir.exists():
        history_dir.mkdir(parents=True, exist_ok=True)

    cfg_path = history_dir / "config.yaml"
    if cfg_path.exists():
        existing_cfg = OmegaConf.load(str(history_dir / "config.yaml"))
        if not myutil.is_same_config(cfg, existing_cfg):
            raise ValueError("Train ID {} already exists, but config is different".format(train_id))

    # Saving cfg
    OmegaConf.save(cfg, str(history_dir / "config.yaml"))

    # Setting seed 
    if cfg.seed is not None:
        myutil.set_random_seed(cfg.seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Training
    trainloaders, valloaders = get_data_loaders(cfg)

    # Confirming dataset
    dataiter = iter(trainloaders)
    inputs, labels = next(dataiter)
    hdrpy.io.write(
        history_dir / 'input_sample.jpg',
        inputs[0].clone().detach().numpy().transpose((1, 2, 0)))
    hdrpy.io.write(
        history_dir / 'label_sample.jpg',
        labels[0].clone().detach().numpy().transpose((1, 2, 0)))
    
    net = myutil.get_model(cfg)
    # Checking initial DNN
    # outputs = net(inputs.to(device)).to('cpu').clone().detach()
    # hdrpy.io.write(
    #     history_dir / 'initial_output_sample.jpg',
    #     outputs[0].clone().detach().numpy().transpose((1, 2, 0)))

    criterion = nn.L1Loss()
    optimizer = get_optimizer(net.parameters(), cfg)
    scheduler = get_scheduler(optimizer, cfg)
    extensions = [ModelSaver(directory=history_dir,
                             name=lambda x: cfg.model.name+"_best.pth",
                             trigger=MinValueTrigger(mode="validation", key="loss")),
                  HistorySaver(directory=history_dir,
                               name=lambda x: cfg.model.name+"_history.pth",
                               trigger=IntervalTrigger(period=1))]

    trainer = RegressorTrainer(net, optimizer, criterion, trainloaders,
                               scheduler=scheduler, extensions=extensions,
                               init_epoch=0,
                               device=device)
    trainer.train(cfg.epoch, valloaders)

    # Checking trained DNN
    # outputs = net(inputs.to(device)).to('cpu').clone().detach()
    # hdrpy.io.write(
    #     history_dir / 'output_sample.jpg',
    #     outputs[0].clone().detach().numpy().transpose((1, 2, 0)))
    # print(outputs[0])

    save_model(net, str(history_dir / "{}.pth".format(cfg.model.name)))
Exemplo n.º 6
0
def main():
    version = pkg_resources.require("snapdump")[0].version
    parser = argparse.ArgumentParser(
        description=
        "snapdump : backup and restore zfs snapshots to/from a foreign file system"
    )
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version=f"snapdump {version}")
    parser.add_argument("--cron",
                        "-q",
                        help="Do not log anything except errors",
                        action="store_true")
    parser.add_argument("--conf",
                        "-c",
                        help="Config file name",
                        type=str,
                        default="config.yml")
    subparsers = parser.add_subparsers(help="sub-command help", dest="command")
    backup_parser = subparsers.add_parser("backup", help="Backup")
    backup_parser.add_argument("--dataset",
                               "-d",
                               help="Optional dataset to operate on",
                               type=str)
    backup_parser.add_argument(
        "--no-verify",
        "-n",
        help="Do not verify created stream",
        type=bool,
        nargs="?",
        const=True,
        default=False,
    )
    restore_parser = subparsers.add_parser("restore", help="Restore")
    restore_parser.add_argument(
        "--snapshot",
        "-s",
        help=
        "Snapshot to restore (for example storage/datasets01@2018_12_06__21_47_58)",
        type=str,
        required=True,
    )
    restore_parser.add_argument(
        "--dest-dataset",
        "-d",
        help=
        "Name of destination dataset, by default uses DATASET_restore as the name (ex storage/home_restore)",
        type=str,
        required=False,
    )
    list_parser = subparsers.add_parser(
        "list", help="List available snapshots to restore")
    list_parser.add_argument("--dataset",
                             "-d",
                             help="Dataset to list snapshots for, default all",
                             type=str)
    cleanup_parser = subparsers.add_parser(
        "cleanup", help="Cleanup old snapshots and dump directories")
    cleanup_parser.add_argument("--dataset",
                                "-d",
                                help="Dataset to cleanup, default all",
                                type=str)

    verify_parser = subparsers.add_parser(
        "verify", help="Verify the integrity of a snapshot chain")
    verify_parser.add_argument(
        "--snapshot",
        "-s",
        help=
        "Snapshot to restore (for example storage/datasets01@2018_12_06__21_47_58)",
        type=str,
        required=True,
    )
    args = parser.parse_args()
    conf = OmegaConf.load(args.conf)
    global CRON
    CRON = args.cron
    if args.command == "backup":
        backup(conf, args)
    elif args.command == "restore":
        restore(conf, args)
    elif args.command == "verify":
        verify(conf, args)
    elif args.command == "list":
        list_snapshots(conf, args)
    elif args.command == "cleanup":
        cleanup_snapshots(conf, args)
    else:
        parser.print_help()
Exemplo n.º 7
0
# asserting corect input for gpus
gpu_list = args.gpus.split(",")
assert len(gpu_list) == sum(list(map(lambda x: x.isnumeric(), gpu_list)))

# creating a numeric gpu list
gpu_list = list(map(int, gpu_list))

# TODO :- make inference.py file/functions so that can work with it in validation_end() func, and calc AP (remaining)

if __name__ == '__main__':
    # Set seed
    set_seed()

    # parse config
    config = OmegaConf.load('config.yaml')
    print(config.pretty())

    print('Loading Model....')
    model = SSD300_COCO(cfg=config)

    logger = TensorBoardLogger(save_dir="runs")
    # Create model save checkpoint
    save_checkpoint = pl.callbacks.ModelCheckpoint(monitor='val_loss',
                                                   save_top_k=5,
                                                   save_weights_only=True)
    lr_logger = pl.callbacks.LearningRateLogger()

    # Finding gpu configuration

    if (len(gpu_list) > 1):
Exemplo n.º 8
0
def setup(run_name, training_root, validation_root, base_directory,
          cfg_filename, device, num_workers, resume, machine_rank):
    experiment_dir = base_directory / run_name

    if communication.get_local_rank() == 0:
        # Want to prevent multiple workers from trying to write a directory
        # This is required in the logging below
        experiment_dir.mkdir(parents=True, exist_ok=True)
    communication.synchronize()  # Ensure folders are in place.

    # Load configs from YAML file to check which model needs to be loaded.
    cfg_from_file = OmegaConf.load(cfg_filename)
    model_name = cfg_from_file.model_name + 'Config'
    try:
        model_cfg = str_to_class(f'direct.nn.{cfg_from_file.model_name.lower()}.config', model_name)
    except (AttributeError, ModuleNotFoundError) as e:
        logger.error(f'Model configuration does not exist for {cfg_from_file.model_name} (err = {e}).')
        sys.exit(-1)

    # Load the default configs to ensure type safety
    base_cfg = OmegaConf.structured(DefaultConfig)
    base_cfg = OmegaConf.merge(base_cfg, {'model': model_cfg, 'training': TrainingConfig()})
    cfg = OmegaConf.merge(base_cfg, cfg_from_file)

    # Setup logging
    log_file = experiment_dir / f'log_{machine_rank}_{communication.get_local_rank()}.txt'
    direct.utils.logging.setup(
        use_stdout=communication.get_local_rank() == 0 or cfg.debug,
        filename=log_file,
        log_level=('INFO' if not cfg.debug else 'DEBUG')
    )
    logger.info(f'Machine rank: {machine_rank}.')
    logger.info(f'Local rank: {communication.get_local_rank()}.')
    logger.info(f'Logging: {log_file}.')
    logger.info(f'Saving to: {experiment_dir}.')
    logger.info(f'Run name: {run_name}.')
    logger.info(f'Config file: {cfg_filename}.')
    logger.info(f'Python version: {sys.version}.')
    logger.info(f'PyTorch version: {torch.__version__}.')  # noqa
    logger.info(f'CUDA {torch.version.cuda} - cuDNN {torch.backends.cudnn.version()}.')
    logger.info(f'Configuration: {pformat(dict(cfg))}.')

    # Create the model
    logger.info('Building model.')
    model = MRIReconstruction(2, **cfg.model).to(device)
    n_params = sum(p.numel() for p in model.parameters())
    logger.info(f'Number of parameters: {n_params} ({n_params / 10.0**3:.2f}k).')
    logger.debug(model)

    # Create training and validation data
    train_mask_func, val_mask_func = build_masking_functions(**cfg.masking)
    train_transforms, val_transforms = build_mri_transforms(
        train_mask_func, val_mask_func=val_mask_func, crop=cfg.dataset.transforms.crop)

    training_data, validation_data = build_datasets(
        cfg.dataset.name, training_root, train_sensitivity_maps=None, train_transforms=train_transforms,
        validation_root=validation_root, val_sensitivity_maps=None, val_transforms=val_transforms)

    # Create the optimizers
    logger.info('Building optimizers.')
    optimizer: torch.optim.Optimizer = str_to_class('torch.optim', cfg.training.optimizer)(  # noqa
        model.parameters(), lr=cfg.training.lr, weight_decay=cfg.training.weight_decay
    )  # noqa

    # Build the LR scheduler, we use a fixed LR schedule step size, no adaptive training schedule.
    solver_steps = list(range(cfg.training.lr_step_size, cfg.training.num_iterations, cfg.training.lr_step_size))
    lr_scheduler = WarmupMultiStepLR(
        optimizer, solver_steps, cfg.training.lr_gamma, warmup_factor=1 / 3.,
        warmup_iters=cfg.training.lr_warmup_iter, warmup_method='linear')

    # Just to make sure.
    torch.cuda.empty_cache()

    # Setup training engine.
    engine = RIMEngine(cfg, model, device=device)

    engine.train(
        optimizer, lr_scheduler, training_data, experiment_dir,
        validation_data=validation_data, resume=resume, num_workers=num_workers)
Exemplo n.º 9
0
 def read_model_config(self, model_id):
     path = os.path.join(self.model_folder, model_id + ".yaml")
     return OmegaConf.load(path)
Exemplo n.º 10
0
 def add_tokenizer(self, langpair: str) -> None:
     langpair = normalize_langpair(langpair)
     tokenizer_config = (
         self.config_dir / "tokenizer" / f"sentencepiece_bpe_wmt14_{langpair}.yaml"
     )
     self.configs.update({"tokenizer": OmegaConf.load(tokenizer_config)})
Exemplo n.º 11
0
 def add_model(self, is_base: bool = True) -> None:
     model_type = "base" if is_base else "big"
     model_config = self.config_dir / "model" / f"transformer-{model_type}.yaml"
     self.configs.update({"model": OmegaConf.load(model_config)})
Exemplo n.º 12
0
 def add_data(self, langpair: str) -> None:
     langpair = normalize_langpair(langpair)
     data_config = self.config_dir / "data" / f"wmt14.{langpair}.yaml"
     self.configs.update({"data": OmegaConf.load(data_config)})
def train(cfg,
          dataset,
          dataset_args,
          module,
          ckpt_name='{epoch:02}-{val_loss:.3f}',
          kwargs={}):

    gpu = cfg.experiment.gpu
    torch.cuda.set_device(gpu)
    seed = np.random.randint(
        65535) if cfg.experiment.seed is None else cfg.experiment.seed
    seed_everything(seed)

    logger = WandbLogger(name=cfg.experiment.runName,
                         project=cfg.experiment.project,
                         offline=cfg.experiment.offline)
    logger.experiment.config.update(dict(cfg))
    logger.experiment.config.update({"dir": logger.experiment.dir})

    ckpt_dir = Path(logger.experiment.dir) / 'checkpoints'
    ckpt_dir.mkdir(exist_ok=True, parents=True)
    checkpoint = ModelCheckpoint(
        filepath=ckpt_dir / ckpt_name,
        save_top_k=cfg.checkpoint.save_top_k,
        save_weights_only=cfg.checkpoint.save_weights_only,
        verbose=True,
        monitor=cfg.checkpoint.monitor,
        mode=cfg.checkpoint.mode)

    trainer = Trainer(
        logger=logger,
        max_epochs=cfg.train.epoch,
        accumulate_grad_batches=cfg.train.n_accumulations,
        limit_val_batches=1.0,
        val_check_interval=cfg.train.val_check_interval,
        early_stop_callback=cfg.train.early_stopping,
        gpus=[gpu],
        checkpoint_callback=checkpoint,
        precision=16 if cfg.train.amp else 32,
        amp_level=cfg.train.amp_level,
    )

    net = getattr(models, cfg.model.name)(**cfg.model.args)
    net.to(gpu)

    if cfg.model.load_checkpoint is not None:
        ckpt = torch.load(cfg.model.load_checkpoint,
                          map_location=f'cuda:{gpu}')['state_dict']
        ckpt = {k[k.find('.') + 1:]: v for k, v in ckpt.items()}
        net.load_state_dict(ckpt, strict=False)
        print(f'\nload checkpoint: {cfg.model.load_checkpoint}\n')

    loss_args = dict(cfg.loss.args) if cfg.loss.args else {}
    loss = getattr(losses, cfg.loss.name)(**loss_args).cuda(gpu)

    if cfg.optimizer.scheduler.name == 'CosineAnnealingLR':
        cfg.optimizer.scheduler.args.T_max = cfg.train.epoch

    model = module(dataset,
                   dataset_args,
                   cfg.train.batch_size,
                   net,
                   loss,
                   n_workers=cfg.experiment.n_workers,
                   optimizer=cfg.optimizer.name,
                   optimizer_args=cfg.optimizer.args,
                   scheduler=cfg.optimizer.scheduler.name,
                   scheduler_args=cfg.optimizer.scheduler.args,
                   freeze_start=cfg.model.freeze_start.target_epoch,
                   unfreeze_params=cfg.model.freeze_start.unfreeze_params,
                   **kwargs)

    with open(ckpt_dir.parent / 'train_config.yaml', 'w',
              encoding='utf-8') as f:
        yaml.dump(omegaconf_to_yaml(cfg), f)

    with open(ckpt_dir.parent / 'augmentation.txt', 'w',
              encoding='utf-8') as f:
        transform_train = dataset_args['train']['transform']
        transform_val = dataset_args['val']['transform']
        f.write("---train augmentation---\n")
        if transform_train is not None:
            f.write(str(transform_train.transform) + "\n\n")
        f.write("---val augmentation---\n")
        if transform_val is not None:
            f.write(str(transform_val.transform) + "\n")

    trainer.fit(model)

    for n, model_path in enumerate(
            Path(logger.experiment.dir).glob('**/*.ckpt')):
        name = f"model_{n}"
        logger.experiment.config.update({name: model_path})

    if cfg.experiment.network_type == 'rnn':
        cnn_path = OmegaConf.load(Path(cfg.dataset.img_dir) /
                                  'config.yaml').ckpt_path
        logger.experiment.config.update({'cnn_path': cnn_path})
Exemplo n.º 14
0
from fastapi import FastAPI
from pydantic import BaseModel
from omegaconf import OmegaConf

from src.datasource import DataSource, WebSourceType
from src.datapipeline import DataPipeline, HTMLTextFormat
from src.database import Database, SqliteDB
from src.model import Model

app = FastAPI()

conf = OmegaConf.load("config/config.yaml")
database_file = conf["database_file"]
datasource_type = conf["datasource_type"]
text_format = conf["text_format"]
text_format_params = conf[text_format + "_params"]
database_type = conf["database_type"]

# Add new conditions if there are new types
if datasource_type == "web_source":
    datasource = DataSource(WebSourceType())

if text_format == "html":
    datapipeline = DataPipeline(HTMLTextFormat(**text_format_params))

if database_type == "sqlite":
    database = Database(SqliteDB(database_file))

model = Model()

def objective(trial):
    # Load Configuration
    YAML_CONFIG = OmegaConf.load("lstm.yaml")
    CLI_CONFIG = OmegaConf.from_cli()
    CONFIG = OmegaConf.merge(YAML_CONFIG, CLI_CONFIG)

    # Reproducibility
    random.seed(CONFIG.SEED)
    np.random.seed(CONFIG.SEED)
    torch.manual_seed(CONFIG.SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Setup CPU or GPU
    if CONFIG.USE_GPU and not torch.cuda.is_available():
        raise ValueError("GPU not detected but CONFIG.USE_GPU is set to True.")
    device = torch.device("cuda" if CONFIG.USE_GPU else "cpu")

    # Setup dataset and dataloader
    # NOTE(seungjaeryanlee): Load saved dataset for speed
    dataset = get_dataset()
    #dataset = load_dataset()
    train_size = int(0.6 * len(dataset))
    valid_size = int(0.2 * len(dataset))
    test_size = len(dataset) - train_size - valid_size
    train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, valid_size, test_size])
    kwargs = {'num_workers': 1, 'pin_memory': True} if CONFIG.USE_GPU else {}
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=True, **kwargs)
    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False, **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False, **kwargs)

    # Setup neural network and optimizer
    net = Net(trial).double().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(net.parameters(), lr=0.001)
    # Log number of parameters
    CONFIG.NUM_PARAMETERS = count_parameters(net)

    # Setup wandb
    # wandb.init(project="MagNet", config=CONFIG)
    # wandb.watch(net)

    # Training
    for epoch_i in range(1, CONFIG.NUM_EPOCH+1):
        # Train for one epoch
        epoch_train_loss = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = net(inputs.to(device))
            loss = criterion(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            epoch_train_loss += loss.item()

        # Compute Validation Loss
        with torch.no_grad():
            epoch_valid_loss = 0
            for inputs, labels in valid_loader:
                outputs = net(inputs.to(device))
                loss = criterion(outputs, labels.to(device))

                epoch_valid_loss += loss.item()

        print(f"Epoch {epoch_i:2d} "
            f"Train {epoch_train_loss / len(train_dataset):.5f} "
            f"Valid {epoch_valid_loss / len(valid_dataset):.5f}")
        # wandb.log({
        #     "train/loss": epoch_train_loss / len(train_dataset),
        #     "valid/loss": epoch_valid_loss / len(valid_dataset),
        # })

    # Evaluation
    net.eval()
    y_meas = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            y_pred.append(net(inputs.to(device)))
            y_meas.append(labels.to(device))

    y_meas = torch.cat(y_meas, dim=0)
    y_pred = torch.cat(y_pred, dim=0)
    # print(f"Test Loss: {F.mse_loss(y_meas, y_pred).item() / len(test_dataset):.8f}")
    # wandb.log({"test/loss": F.mse_loss(y_meas, y_pred).item() / len(test_dataset)})

    # Predicton vs Target Plot
    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(8, 8)
    ax.scatter(y_meas.cpu().numpy(), y_pred.cpu().numpy(), label="Prediction")
    ax.plot(y_meas.cpu().numpy(), y_meas.cpu().numpy(), 'k--', label="Target")
    ax.grid(True)
    ax.legend()
    # wandb.log({"prediction_vs_target": wandb.Image(fig)})
    
    # Relative Error
    Error_re1 = abs(y_pred.cpu().numpy()-y_meas.cpu().numpy())/abs(y_meas.cpu().numpy())*100
    Error_re1[np.where(Error_re1>500)] = 500
    # Error_re_max1 = np.max(Error_re1);
    Error_re_avg1 = np.mean(Error_re1);
    # print(f"Relative Error: {Error_re_avg1:.8f}")
    # wandb.log({"Relative Error": Error_re_avg1})
    
    # np.savetxt("pred.csv", y_pred.cpu().numpy())
    # np.savetxt("meas.csv", y_meas.cpu().numpy())
    return Error_re_avg1
Exemplo n.º 16
0
from omegaconf import OmegaConf

from src.utils.utils import read_data, get_all_db_files, get_complete_table

import re
import unidecode

from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn import metrics
# -

paths = OmegaConf.load("config/paths.yaml")

# # Settings

DATASET = "dataset_v09"
TEST_SET_NUM = "2"

SEED = int(TEST_SET_NUM)

# # Functions


def clean_message(message):
    stop_words = set(stopwords.words("spanish"))
    message = message.lower()
    message = unidecode.unidecode(message)
Exemplo n.º 17
0
    # Load model
    code2seq = Code2Seq(config.model, config.optimizer, data_module.vocabulary,
                        config.train.teacher_forcing)

    train(code2seq, data_module, config)


def test_code2seq(config: DictConfig):
    filter_warnings()

    # Load data module
    data_module = PathContextDataModule(config.data_folder, config.data)

    # Load model
    code2seq = Code2Seq.load_from_checkpoint(config.checkpoint,
                                             map_location=torch.device("cpu"))

    test(code2seq, data_module, config.seed)


if __name__ == "__main__":
    __arg_parser = configure_arg_parser()
    __args = __arg_parser.parse_args()

    __config = cast(DictConfig, OmegaConf.load(__args.config))
    if __args.mode == "train":
        train_code2seq(__config)
    else:
        test_code2seq(__config)
Exemplo n.º 18
0
    def __init__(self,
                 original: bool = True,
                 backbone: str = "pointnet2",
                 input_nc: int = None,
                 num_classes: int = None,
                 mean_size_arr=[],
                 compute_loss=False,
                 *args,
                 **kwargs):
        """Initialize this model class.
        Parameters:
            opt -- training/test options
        A few things can be done here.
        - (required) call the initialization function of BaseModel
        - define loss function, visualization images, model names, and optimizers
        """
        assert input_nc is not None, "VoteNet requieres input_nc to be defined"
        assert num_classes is not None, "VoteNet requieres num_classes to be defined"
        try:
            self._backbone = MAPPING_BACKBONES_TO_API_NAMES[backbone.lower()]
        except:
            raise Exception("Backbone should be within {}".format(
                MAPPING_BACKBONES_TO_API_NAMES.keys()))

        if original:
            option = OmegaConf.load(
                os.path.join(PATH_TO_CONFIG, "votenet.yaml"))
        else:
            option = OmegaConf.load(
                os.path.join(PATH_TO_CONFIG, "votenet_backbones.yaml"))

        ModelFactory.resolve_model(option, input_nc, kwargs)

        self._original = original
        self._kwargs = kwargs
        self._compute_loss = compute_loss

        super(VoteNet, self).__init__(option)

        # 1 - CREATE BACKBONE MODEL
        if original:
            backbone_option = option.backbone
            backbone_cls = getattr(models, backbone_option.model_type)
            self.backbone_model = backbone_cls(architecture="unet",
                                               input_nc=input_nc,
                                               config=backbone_option)
        else:
            backbone_cls = getattr(models, self._backbone)
            voting_option = option.voting
            self.backbone_model = backbone_cls(
                architecture="unet",
                input_nc=input_nc,
                num_layers=4,
                output_nc=self._get_attr(voting_option, "feat_dim"),
                **kwargs,
            )
            self._kpconv_backbone = self._backbone == "KPConv"
            self.sampler = RandomSamplerToDense(
                num_to_sample=voting_option.num_points_to_sample)

        self.conv_type = self.backbone_model.conv_type
        self.is_dense_format = self.conv_type == "DENSE"

        # 2 - CREATE VOTING MODEL
        voting_option = option.voting
        voting_cls = getattr(votenet_module, voting_option.module_name)
        self.voting_module = voting_cls(
            vote_factor=self._get_attr(voting_option, "vote_factor"),
            seed_feature_dim=self._get_attr(voting_option, "feat_dim"),
        )

        # 3 - CREATE PROPOSAL MODULE
        proposal_option = option.proposal
        proposal_cls = getattr(votenet_module, proposal_option.module_name)
        self.proposal_cls_module = proposal_cls(
            num_class=num_classes,
            vote_aggregation_config=proposal_option.vote_aggregation,
            num_heading_bin=proposal_option.num_heading_bin,
            mean_size_arr=mean_size_arr,
            num_proposal=proposal_option.num_proposal,
            sampling=proposal_option.sampling,
        )

        # Loss params
        self.loss_params = option.loss_params
        self.loss_params.num_heading_bin = proposal_option.num_heading_bin
        if isinstance(mean_size_arr, np.ndarray):
            self.loss_params.mean_size_arr = mean_size_arr.tolist()
        else:
            self.loss_params.mean_size_arr = mean_size_arr

        self.losses_has_been_added = False
        self.loss_names = []
Exemplo n.º 19
0
def load_splitter_config() -> SplitterParams:
    omega_splitter_config = OmegaConf.load(SPLITTER_CONFIG_PATH)
    data_config = read_splitter_params(omega_splitter_config)
    return data_config
Exemplo n.º 20
0
def test_load_from_invalid() -> None:
    with pytest.raises(TypeError):
        OmegaConf.load(3.1415)  # type: ignore
Exemplo n.º 21
0
class TableModelConfig:
    train_data_folds: List[str] = MISSING
    val_data_folds: List[str] = MISSING
    train_feats: List[str] = MISSING
    target: str = MISSING
    model_name: str = MISSING
    classifier_model: str = MISSING
    model_type: Optional[str] = "CatBoostRegressor"
    construct_params: Dict[Any, Any] = field(default_factory=dict)
    train_params: Dict[Any, Any] = field(default_factory=dict)


#%%
train_config_path = util.get_my_data_dir(
) / os.environ["TRAIN_CONFIG_FILENAME"]
conf = OmegaConf.create(TableModelConfig(**OmegaConf.load(train_config_path)))

#%%
train_df, val_df, = io.load_dataset(conf.train_data_folds, conf.val_data_folds)

from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.load_model(conf.classifier_model)
t = [l for l in conf.train_feats if l != "rank_class"]
train_df["rank_class"] = classifier.predict(train_df[t])
val_df["rank_class"] = classifier.predict(val_df[t])

# %%
import pandas as pd
train_data_concat = pd.concat([train_df, val_df], axis=0, ignore_index=True)
#%%
 def from_file(cls, filepath: str):
     config = OmegaConf.load(filepath)
     return ASRTarredDatasetMetadata.from_config(config=config)
Exemplo n.º 23
0
def setup_common_environment(
    run_name,
    base_directory,
    cfg_filename,
    device,
    machine_rank,
    mixed_precision,
    debug=False,
):
    experiment_dir = base_directory / run_name
    if communication.get_local_rank() == 0:
        # Want to prevent multiple workers from trying to write a directory
        # This is required in the logging below
        experiment_dir.mkdir(parents=True, exist_ok=True)
    communication.synchronize()  # Ensure folders are in place.

    # Load configs from YAML file to check which model needs to be loaded.
    cfg_from_file = OmegaConf.load(cfg_filename)

    # Load the default configs to ensure type safety
    cfg = OmegaConf.structured(DefaultConfig)

    models, models_config = load_models_into_environment_config(cfg_from_file)
    cfg.model = models_config.model
    del models_config["model"]
    cfg.additional_models = models_config

    # Setup everything for training
    cfg.training = TrainingConfig
    cfg.validation = ValidationConfig
    cfg.inference = InferenceConfig

    cfg_from_file_new = cfg_from_file.copy()
    for key in cfg_from_file:
        # TODO: This does not really do a full validation.
        # BODY: This will be handeled once Hydra is implemented.
        if key in ["models", "additional_models"]:  # Still handled separately
            continue

        elif key in ["training", "validation", "inference"]:
            if not cfg_from_file[key]:
                logger.info(f"key {key} missing in config.")
                continue

            if key in ["training", "validation"]:
                dataset_cfg_from_file = extract_names(cfg_from_file[key].datasets)
                for idx, (dataset_name, dataset_config) in enumerate(
                    dataset_cfg_from_file
                ):
                    cfg_from_file_new[key].datasets[idx] = dataset_config
                    cfg[key].datasets.append(load_dataset_config(dataset_name))
            else:
                dataset_name, dataset_config = extract_names(cfg_from_file[key].dataset)
                cfg_from_file_new[key].dataset = dataset_config
                cfg[key].dataset = load_dataset_config(dataset_name)

        cfg[key] = OmegaConf.merge(cfg[key], cfg_from_file_new[key])
    # sys.exit()
    # Make configuration read only.
    # TODO(jt): Does not work when indexing config lists.
    # OmegaConf.set_readonly(cfg, True)
    setup_logging(machine_rank, experiment_dir, run_name, cfg_filename, cfg, debug)
    forward_operator, backward_operator = build_operators(cfg.physics)

    model, additional_models = initialize_models_from_config(
        cfg, models, forward_operator, backward_operator, device
    )

    engine = setup_engine(
        cfg,
        device,
        model,
        additional_models,
        forward_operator=forward_operator,
        backward_operator=backward_operator,
        mixed_precision=mixed_precision,
    )

    environment = namedtuple(
        "environment",
        ["cfg", "experiment_dir", "engine"],
    )
    return environment(cfg, experiment_dir, engine)
Exemplo n.º 24
0
def main():
    parser = ArgumentParser()
    parser.add_argument(
        "--pretrained_model",
        type=str,
        default="titanet_large",
        required=False,
        help="Pass your trained .nemo model",
    )
    parser.add_argument(
        "--finetune_config_file",
        type=str,
        required=True,
        help=
        "path to speakernet config yaml file to load train, validation dataset and also for trainer parameters",
    )

    parser.add_argument(
        "--freeze_encoder",
        type=bool,
        required=False,
        default=True,
        help=
        "True if speakernet encoder paramteres needs to be frozen while finetuning",
    )

    args = parser.parse_args()

    if args.pretrained_model.endswith('.nemo'):
        logging.info(f"Using local speaker model from {args.pretrained_model}")
        speaker_model = EncDecSpeakerLabelModel.restore_from(
            restore_path=args.pretrained_model)
    elif args.pretrained_model.endswith('.ckpt'):
        logging.info(
            f"Using local speaker model from checkpoint {args.pretrained_model}"
        )
        speaker_model = EncDecSpeakerLabelModel.load_from_checkpoint(
            checkpoint_path=args.pretrained_model)
    else:
        logging.info("Using pretrained speaker recognition model from NGC")
        speaker_model = EncDecSpeakerLabelModel.from_pretrained(
            model_name=args.pretrained_model)

    finetune_config = OmegaConf.load(args.finetune_config_file)

    if 'test_ds' in finetune_config.model and finetune_config.model.test_ds is not None:
        finetune_config.model.test_ds = None
        logging.warning("Removing test ds")

    speaker_model.setup_finetune_model(finetune_config.model)
    finetune_trainer = pl.Trainer(**finetune_config.trainer)
    speaker_model.set_trainer(finetune_trainer)

    _ = exp_manager(finetune_trainer, finetune_config.get('exp_manager', None))
    speaker_model.setup_optimization(finetune_config.optim)

    if args.freeze_encoder:
        for param in speaker_model.encoder.parameters():
            param.requires_grad = False

    finetune_trainer.fit(speaker_model)
Exemplo n.º 25
0
def main():
    args = parse_args()
    config = OmegaConf.load(args.config)
    config.merge_with_dotlist(args.options)
    atexit.register(remove_abnormal_exp,
                    log_path=config.log_path,
                    config_path=config.config_path)
    seed_everything(config.seed)

    exp_num = find_exp_num(log_path=config.log_path)
    exp_num = str(exp_num).zfill(3)
    config.weight_path = os.path.join(config.weight_path, f'exp_{exp_num}')
    os.makedirs(config.weight_path, exist_ok=True)
    OmegaConf.save(config,
                   os.path.join(config.config_path, f'exp_{exp_num}.yaml'))
    logger, csv_logger = get_logger(config, exp_num)
    timer = mlc.time.Timer()
    logger.info(mlc.time.now())
    logger.info(f'config: {config}')

    train_df = pd.read_csv(os.path.join(config.root, 'train.csv'))
    X = train_df['id']
    X = np.array([os.path.join(config.root, 'train', f'{i}.png') for i in X])
    y = np.load(os.path.join(config.root, 'labels.npy'))

    transform = eval(config.transform.name)(config.transform.size)
    logger.info(f'augmentation: {transform}')
    strong_transform = eval(config.strong_transform.name)
    logger.info(f'strong augmentation: {config.strong_transform.name}')

    for fold in range(config.train.n_splits):
        train_idx = np.load(
            os.path.join(config.root, 'data', f'train_idx_fold{fold}.npy'))
        val_idx = np.load(
            os.path.join(config.root, 'data', f'val_idx_fold{fold}.npy'))

        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]
        train_data = MetDataset('train', X_train, y_train,
                                transform['albu_train'])
        val_data = MetDataset('val', X_val, y_val, transform['albu_val'])
        train_loader = DataLoader(train_data, **config.train_loader)
        val_loader = DataLoader(val_data, **config.val_loader)

        model = eval(config.model)(True)
        if 'fc.weight' in model.state_dict().keys():
            model.fc = nn.Linear(model.fc.in_features, config.train.num_labels)
        elif 'classifier.weight' in model.state_dict().keys():
            model.classifier = nn.Linear(model.classifier.in_features,
                                         config.train.num_labels)
        elif 'head.fc.weight' in model.state_dict().keys():
            model.head.fc = nn.Linear(model.head.fc.in_features,
                                      config.train.num_labels)
        model = model.cuda()
        optimizer = eval(config.optimizer.name)(model.parameters(),
                                                lr=config.optimizer.lr)
        scheduler = eval(config.scheduler.name)(
            optimizer,
            config.train.epoch // config.scheduler.cycle,
            eta_min=config.scheduler.eta_min)
        criterion = eval(config.loss)()
        scaler = GradScaler()

        best_acc = 0
        best_loss = 1e10
        mb = master_bar(range(config.train.epoch))
        for epoch in mb:
            timer.add('train')
            train_loss, train_acc = train(config, model,
                                          transform['torch_train'],
                                          strong_transform, train_loader,
                                          optimizer, criterion, mb, epoch,
                                          scaler)
            train_time = timer.fsince('train')

            timer.add('val')
            val_loss, val_acc = validate(config, model, transform['torch_val'],
                                         val_loader, criterion, mb, epoch)
            val_time = timer.fsince('val')

            output1 = 'epoch: {} train_time: {} validate_time: {}'.format(
                epoch, train_time, val_time)
            output2 = 'train_loss: {:.3f} train_acc: {:.3f} val_loss: {:.3f} val_acc: {:.3f}'.format(
                train_loss, train_acc, val_loss, val_acc)
            logger.info(output1)
            logger.info(output2)
            mb.write(output1)
            mb.write(output2)
            csv_logger.write([epoch, train_loss, train_acc, val_loss, val_acc])

            scheduler.step()

            if val_loss < best_loss:
                best_loss = val_loss
                save_name = os.path.join(config.weight_path,
                                         f'best_loss_fold{fold}.pth')
                save_model(save_name, epoch, val_loss, val_acc, model,
                           optimizer)
            if val_acc > best_acc:
                best_acc = val_acc
                save_name = os.path.join(config.weight_path,
                                         f'best_acc_fold{fold}.pth')
                save_model(save_name, epoch, val_loss, val_acc, model,
                           optimizer)

            save_name = os.path.join(config.weight_path,
                                     f'last_epoch_fold{fold}.pth')
            save_model(save_name, epoch, val_loss, val_acc, model, optimizer)
Exemplo n.º 26
0
    def load_from_checkpoint(cls, dir: str):
        """Loads a saved model from the directory

        Args:
            dir (str): The directory where the model wa saved, along with the checkpoints

        Returns:
            TabularModel: The saved TabularModel
        """
        config = OmegaConf.load(os.path.join(dir, "config.yml"))
        datamodule = joblib.load(os.path.join(dir, "datamodule.sav"))
        if (
            hasattr(config, "log_target")
            and (config.log_target is not None)
            and os.path.exists(os.path.join(dir, "exp_logger.sav"))
        ):
            logger = joblib.load(os.path.join(dir, "exp_logger.sav"))
        else:
            logger = None
        if os.path.exists(os.path.join(dir, "callbacks.sav")):
            callbacks = joblib.load(os.path.join(dir, "callbacks.sav"))
        else:
            callbacks = []
        if os.path.exists(os.path.join(dir, "custom_model_callable.sav")):
            model_callable = joblib.load(os.path.join(dir, "custom_model_callable.sav"))
            custom_model = True
        else:
            model_callable = getattr(
                getattr(models, config._module_src), config._model_name
            )
            custom_model = False
        custom_params = joblib.load(os.path.join(dir, "custom_params.sav"))
        model_args = {}
        if custom_params.get("custom_loss") is not None:
            model_args['loss'] = "MSELoss"
        if custom_params.get("custom_metrics") is not None:
            model_args['metrics'] = ["mean_squared_error"]
            model_args['metric_params'] = [{}]
        if custom_params.get("custom_optimizer") is not None:
            model_args['optimizer'] = "Adam"
        if custom_params.get("custom_optimizer_params") is not None:
            model_args['optimizer_params'] = {}
        
        # Initializing with default metrics, losses, and optimizers. Will revert once initialized
        model = model_callable.load_from_checkpoint(
            checkpoint_path=os.path.join(dir, "model.ckpt"),
            **model_args
        )
        # else:
        #     # Initializing with default values
        #     model = model_callable.load_from_checkpoint(
        #         checkpoint_path=os.path.join(dir, "model.ckpt"),
        #     )
        # Updating config with custom parameters for experiment tracking
        if custom_params.get("custom_loss") is not None:
            model.custom_loss = custom_params["custom_loss"]
        if custom_params.get("custom_metrics") is not None:
            model.custom_metrics = custom_params["custom_metrics"]
        if custom_params.get("custom_optimizer") is not None:
            model.custom_optimizer = custom_params["custom_optimizer"]
        if custom_params.get("custom_optimizer_params") is not None:
            model.custom_optimizer_params = custom_params["custom_optimizer_params"]
        model._setup_loss()
        model._setup_metrics()
        tabular_model = cls(config=config, model_callable=model_callable)
        tabular_model.model = model
        tabular_model.custom_model = custom_model
        tabular_model.datamodule = datamodule
        tabular_model.callbacks = callbacks
        tabular_model._prepare_trainer()
        tabular_model.trainer.model = model
        tabular_model.logger = logger
        return tabular_model
Exemplo n.º 27
0
 logging.basicConfig(level=logging.INFO, 
                     format="%(asctime)s [%(levelname)s] %(message)s",
                     handlers=[
                         logging.FileHandler("debug.log"),
                         logging.StreamHandler()
                     ])
 
 cfg = OmegaConf.from_cli()
 if cfg.project.path is None and cfg.project.config_file is None:
     raise ValueError('must input either a path or a config file')
 elif cfg.project.path is not None:
     cfg.project.config_file = os.path.join(cfg.project.path, 'project_config.yaml')
 elif cfg.project.config_file is not None:
     cfg.project.path = os.path.dirname(cfg.project.config_file)    
 else:
     raise ValueError('must input either a path or a config file, not {}'.format(cfg))
     
 assert os.path.isfile(cfg.project.config_file) and os.path.isdir(cfg.project.path)
 
 user_cfg = OmegaConf.load(cfg.project.config_file)
 cfg = OmegaConf.merge(cfg, user_cfg)
 cfg = projects.convert_config_paths_to_absolute(cfg)
 # print(cfg)
 
 logging.info(OmegaConf.to_yaml(cfg))
 
 print_models(cfg.project.model_path)
 
 print_dataset_info(cfg.project.data_path)
 
 try_load_all_frames(cfg.project.data_path)
Exemplo n.º 28
0
    model_type: str = MISSING
    model_name: str = MISSING
    train_params: Dict[Any, Any] = field(default_factory=dict)


#%%
try:
    yaml_path = sys.argv[1]
except IndexError:
    print(
        "usage: python train_decision_tree.py <path to configguraiton yaml>",
        file=sys.stderr,
    )
    sys.exit(-1)

conf = OmegaConf.create(TableModelConfig(**OmegaConf.load(yaml_path)))

#%%
def make_sklearn_api_model(train_df, conf):
    from importlib import import_module

    module = import_module(conf.model_module)
    model = getattr(module, conf.model_type)(**conf.train_params)
    model.fit(train_df[conf.train_feats], train_df[conf.target])
    return model


def mse(model, x, y) -> float:
    diff = y - model.predict(x)
    return float(diff @ diff) / len(y)
Exemplo n.º 29
0
def my_app(config: DictConfig) -> None:
    global logger
    logger = getLogger(config.verbose)
    logger.info(OmegaConf.to_yaml(config))

    if not torch.cuda.is_available():
        device = torch.device("cpu")
    else:
        device = torch.device(config.device)

    # timelag
    timelag_config = OmegaConf.load(to_absolute_path(
        config.timelag.model_yaml))
    timelag_model = hydra.utils.instantiate(timelag_config.netG).to(device)
    checkpoint = torch.load(
        to_absolute_path(config.timelag.checkpoint),
        map_location=lambda storage, loc: storage,
    )
    timelag_model.load_state_dict(checkpoint["state_dict"])
    timelag_in_scaler = joblib.load(
        to_absolute_path(config.timelag.in_scaler_path))
    timelag_out_scaler = joblib.load(
        to_absolute_path(config.timelag.out_scaler_path))
    timelag_model.eval()

    # duration
    duration_config = OmegaConf.load(
        to_absolute_path(config.duration.model_yaml))
    duration_model = hydra.utils.instantiate(duration_config.netG).to(device)
    checkpoint = torch.load(
        to_absolute_path(config.duration.checkpoint),
        map_location=lambda storage, loc: storage,
    )
    duration_model.load_state_dict(checkpoint["state_dict"])
    duration_in_scaler = joblib.load(
        to_absolute_path(config.duration.in_scaler_path))
    duration_out_scaler = joblib.load(
        to_absolute_path(config.duration.out_scaler_path))
    duration_model.eval()

    # acoustic model
    acoustic_config = OmegaConf.load(
        to_absolute_path(config.acoustic.model_yaml))
    acoustic_model = hydra.utils.instantiate(acoustic_config.netG).to(device)
    checkpoint = torch.load(
        to_absolute_path(config.acoustic.checkpoint),
        map_location=lambda storage, loc: storage,
    )
    acoustic_model.load_state_dict(checkpoint["state_dict"])
    acoustic_in_scaler = joblib.load(
        to_absolute_path(config.acoustic.in_scaler_path))
    acoustic_out_scaler = joblib.load(
        to_absolute_path(config.acoustic.out_scaler_path))
    acoustic_model.eval()

    # Run synthesis for each utt.
    question_path = to_absolute_path(config.question_path)

    if config.utt_list is not None:
        in_dir = to_absolute_path(config.in_dir)
        out_dir = to_absolute_path(config.out_dir)
        os.makedirs(out_dir, exist_ok=True)
        with open(to_absolute_path(config.utt_list)) as f:
            lines = list(filter(lambda s: len(s.strip()) > 0, f.readlines()))
            logger.info("Processes %s utterances...", len(lines))
            for idx in tqdm(range(len(lines))):
                utt_id = lines[idx].strip()
                label_path = join(in_dir, f"{utt_id}.lab")
                if not exists(label_path):
                    raise RuntimeError(
                        f"Label file does not exist: {label_path}")

                wav = synthesis(
                    config,
                    device,
                    label_path,
                    question_path,
                    timelag_model,
                    timelag_config,
                    timelag_in_scaler,
                    timelag_out_scaler,
                    duration_model,
                    duration_config,
                    duration_in_scaler,
                    duration_out_scaler,
                    acoustic_model,
                    acoustic_config,
                    acoustic_in_scaler,
                    acoustic_out_scaler,
                )
                wav = np.clip(wav, -32768, 32767)
                if config.gain_normalize:
                    wav = wav / np.max(np.abs(wav)) * 32767

                out_wav_path = join(out_dir, f"{utt_id}.wav")
                wavfile.write(out_wav_path,
                              rate=config.sample_rate,
                              data=wav.astype(np.int16))
    else:
        assert config.label_path is not None
        logger.info("Process the label file: %s", config.label_path)
        label_path = to_absolute_path(config.label_path)
        out_wav_path = to_absolute_path(config.out_wav_path)

        wav = synthesis(
            config,
            device,
            label_path,
            question_path,
            timelag_model,
            timelag_config,
            timelag_in_scaler,
            timelag_out_scaler,
            duration_model,
            duration_config,
            duration_in_scaler,
            duration_out_scaler,
            acoustic_model,
            acoustic_config,
            acoustic_in_scaler,
            acoustic_out_scaler,
        )
        wav = wav / np.max(np.abs(wav)) * (2**15 - 1)
        wavfile.write(out_wav_path,
                      rate=config.sample_rate,
                      data=wav.astype(np.int16))
Exemplo n.º 30
0
 def load_yaml(config):
     return dict(OmegaConf.load(config))