def __init__(self, args): self.args = args self.cuda = (args.cuda and torch.cuda.is_available()) self.epoch = args.epoch self.batch_size = args.batch_size self.lr = args.lr self.eps = 1e-9 self.K = args.K self.beta = args.beta self.num_avg = args.num_avg self.global_iter = 0 self.global_epoch = 0 # Network & Optimizer self.toynet = cuda(ToyNet(self.K), self.cuda) self.toynet.weight_init() self.toynet_ema = Weight_EMA_Update(cuda(ToyNet(self.K), self.cuda),\ self.toynet.state_dict(), decay=0.999) self.optim = optim.Adam(self.toynet.parameters(), lr=self.lr, betas=(0.5, 0.999)) self.scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=0.97) self.ckpt_dir = Path(args.ckpt_dir).joinpath(args.env_name) if not self.ckpt_dir.exists(): self.ckpt_dir.mkdir(parents=True, exist_ok=True) self.load_ckpt = args.load_ckpt if self.load_ckpt != '': self.load_checkpoint(self.load_ckpt) # History self.history = dict() self.history['avg_acc'] = 0. self.history['info_loss'] = 0. self.history['class_loss'] = 0. self.history['total_loss'] = 0. self.history['epoch'] = 0 self.history['iter'] = 0 # Tensorboard self.tensorboard = args.tensorboard if self.tensorboard: self.env_name = args.env_name self.summary_dir = Path(args.summary_dir).joinpath(args.env_name) if not self.summary_dir.exists(): self.summary_dir.mkdir(parents=True, exist_ok=True) self.tf = SummaryWriter(log_dir=self.summary_dir) self.tf.add_text(tag='argument', text_string=str(args), global_step=self.global_epoch) # Dataset self.data_loader = return_data(args)
def __init__(self, args): self.args = args # Basic self.cuda = (args.cuda and torch.cuda.is_available()) self.epoch = args.epoch self.batch_size = args.batch_size self.lr = args.lr self.y_dim = args.y_dim # MNIST and CIFAR10 have class 10 self.target = args.target # if you want to give pertubation to specific class then use it self.dataset = args.dataset self.data_loader = return_data(args) self.global_epoch = 0 self.global_iter = 0 self.print_ = not args.silent self.env_name = args.env_name # experiment name self.visdom = args.visdom # I have installed it but don't use it self.ckpt_dir = Path(args.ckpt_dir) self.save_ckpt_dir = Path('./checkpoints/' + args.env_name) print(self.save_ckpt_dir) if not self.ckpt_dir.exists(): self.ckpt_dir.mkdir(parents=True, exist_ok=True) if not self.save_ckpt_dir.exists(): self.save_ckpt_dir.mkdir(parents=True, exist_ok=True) self.output_dir = Path(args.output_dir).joinpath(args.env_name) if not self.output_dir.exists(): self.output_dir.mkdir(parents=True, exist_ok=True) # Visualization Tools self.visualization_init(args) # Histories self.history = dict() self.history['acc'] = 0. self.history['epoch'] = 0 self.history['iter'] = 0 # Models & Optimizers self.model_init(args) self.load_ckpt = args.load_ckpt if args.load_ckpt_flag == True and self.load_ckpt != '': self.load_checkpoint(self.load_ckpt) # Adversarial Perturbation Generator #criterion = cuda(torch.nn.CrossEntropyLoss(), self.cuda) criterion = F.cross_entropy self.attack_mode = args.attack_mode if self.attack_mode == 'FGSM': self.attack = Attack(self.net, criterion=criterion) elif self.attack_mode == 'ILLC': self.attack = Attack(self.net, criterion=criterion)
def get_dataset_config(self, config): self.data_loader = return_data(config) self.ignore_index = config.get('ignore_index', -100) self.n_visual_class = self.data_loader['train']\ .dataset.preprocessor.num_visual_words self.n_phone_class = self.data_loader['train'].dataset.preprocessor.num_tokens self.visual_words = self.data_loader['train'].dataset.preprocessor.visual_words self.phone_set = self.data_loader['train'].dataset.preprocessor.tokens self.max_feat_len = self.data_loader['train'].dataset.max_feat_len self.max_word_len = self.data_loader['train'].dataset.max_word_len self.max_normalize = config.get('max_normalize', False) print(f'Number of visual label classes = {self.n_visual_class}') print(f'Number of phone classes = {self.n_phone_class}') print(f'Max normalized: {self.max_normalize}')
def get_dataset_config(self, config): self.data_loader = return_data(config) self.ignore_index = config.get('ignore_index', -100) self.n_visual_class = self.data_loader['train']\ .dataset.preprocessor.num_visual_words self.n_phone_class = self.data_loader['train'].dataset.preprocessor.num_tokens self.visual_words = self.data_loader['train'].dataset.preprocessor.visual_words self.phone_set = self.data_loader['train'].dataset.preprocessor.tokens self.max_feat_len = self.data_loader['train'].dataset.max_feat_len self.max_word_len = self.data_loader['train'].dataset.max_word_len self.max_segment_num = self.data_loader['train'].dataset.max_segment_num self.n_clusters = config.get("n_clusters", self.n_phone_class) print(f'Number of visual label classes = {self.n_visual_class}') print(f'Number of phone classes = {self.n_phone_class}') print(f'Number of clusters = {self.n_clusters}')
def __init__(self, args): self.args = args # Basic self.cuda = (args.cuda and torch.cuda.is_available()) self.epoch = args.epoch self.batch_size = args.batch_size self.eps = args.eps self.lr = args.lr self.y_dim = args.y_dim self.target = args.target self.dataset = args.dataset self.data_loader = return_data(args) self.global_epoch = 0 self.global_iter = 0 self.print_ = not args.silent self.env_name = args.env_name self.tensorboard = args.tensorboard self.visdom = args.visdom self.ckpt_dir = Path(args.ckpt_dir).joinpath(args.env_name) if not self.ckpt_dir.exists(): self.ckpt_dir.mkdir(parents=True, exist_ok=True) self.output_dir = Path(args.output_dir).joinpath(args.env_name) if not self.output_dir.exists(): self.output_dir.mkdir(parents=True, exist_ok=True) # Visualization Tools self.visualization_init(args) # Histories self.history = dict() self.history['acc'] = 0. self.history['epoch'] = 0 self.history['iter'] = 0 # Models & Optimizers self.model_init(args) self.load_ckpt = args.load_ckpt if self.load_ckpt != '': self.load_checkpoint(self.load_ckpt) # Adversarial Perturbation Generator #criterion = cuda(torch.nn.CrossEntropyLoss(), self.cuda) criterion = F.cross_entropy self.attack = Attack(self.net, criterion=criterion)
def __init__(self, args): self.args = args self.device = torch.device( args.device if torch.cuda.is_available() else 'cpu') self.epoch = args.epoch self.batch_size = args.batch_size self.lr = args.lr self.eps = 1e-9 self.K = args.K self.beta = args.beta self.num_avg = args.num_avg self.global_iter = 0 self.global_epoch = 0 # Network & Optimizer self.toynet = ToyNet(self.K).to(self.device) self.toynet.weight_init() self.toynet_ema = Weight_EMA_Update(ToyNet(self.K).to(self.device), self.toynet.state_dict(), decay=0.999) self.optim = optim.Adam(self.toynet.parameters(), lr=self.lr, betas=(0.5, 0.999)) self.scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=0.97) self.ckpt_dir = Path(args.ckpt_dir) if not self.ckpt_dir.exists(): self.ckpt_dir.mkdir(parents=True, exist_ok=True) self.load_ckpt = args.load_ckpt if self.load_ckpt != '': self.load_checkpoint(self.load_ckpt) # History self.history = dict() self.history['avg_acc'] = 0. self.history['info_loss'] = 0. self.history['class_loss'] = 0. self.history['total_loss'] = 0. self.history['epoch'] = 0 self.history['iter'] = 0 # Dataset self.data_loader = return_data(args)
def __init__(self, config): self.config = config self.cuda = torch.cuda.is_available() self.epoch = config.epoch self.batch_size = config.batch_size self.beta = config.beta self.lr = config.lr self.anneal_rate = config.get('anneal_rate', 3e-6) self.num_sample = config.get('num_sample', 1) self.use_segment = config.get('use_segment', False) self.ds_method = config.get('downsample_method', 'average') self.eps = 1e-9 if config.audio_feature == 'mfcc': self.input_size = 80 elif config.audio_feature == 'cpc': self.input_size = 256 else: Exception( f'Audio feature type {config.audio_feature_type} not supported' ) if self.use_segment and ( self.ds_method == 'resample' ): # input size is the concatenation of 4 frames for resample self.input_size = 4 * self.input_size self.K = config.K self.global_iter = 0 self.global_epoch = 0 self.audio_feature = config.audio_feature self.image_feature = config.image_feature self.debug = config.debug self.dataset = config.dataset # Dataset self.data_loader = return_data(config) self.n_class = self.data_loader['train']\ .dataset.preprocessor.num_tokens self.class_names = self.data_loader['train']\ .dataset.preprocessor.tokens print(f'visual label class: {self.n_class}') self.image_net = Resnet34(pretrained=True, n_class=self.n_class) ''' self.image_net.load_state_dict( torch.load( os.path.join(config.image_model_dir, 'best_image_model.pth' ) ) ) ''' self.image_net = cuda(self.image_net, self.cuda) if config.model_type == 'blstm': self.audio_net = cuda( GumbelBLSTM(self.K, input_size=self.input_size, n_layers=1, n_class=self.n_class, ds_ratio=1, bidirectional=True), self.cuda) self.K = 2 * self.K elif config.model_type == 'mlp': self.audio_net = cuda( GumbelMLP(self.K, input_size=self.input_size, n_class=self.n_class), self.cuda) else: Exception(f'Model type {config.model_type} not defined') self.position_model = cuda( PositionPredictor(input_size=self.K, vocab_size=self.n_class, embedding_size=50), self.cuda) trainables = [p for p in self.audio_net.parameters()] self.optim = optim.Adam(trainables, lr=self.lr, betas=(0.5, 0.999)) self.scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=0.97) self.ckpt_dir = Path(config.ckpt_dir) if not self.ckpt_dir.exists(): self.ckpt_dir.mkdir(parents=True, exist_ok=True) self.load_ckpt = config.load_ckpt if self.load_ckpt: self.load_checkpoint() # History self.history = dict() self.history['f1'] = 0. self.history['token_f1'] = 0. self.history['abx'] = 0.5 self.history['total_loss'] = 0. self.history['avg_loss'] = 0. self.history['epoch'] = 0 self.history['iter'] = 0
def __init__(self, config): self.config = config self.cuda = torch.cuda.is_available() self.beta = 1. # XXX self.epoch = config.epoch self.batch_size = config.batch_size self.lr = config.lr self.n_layers = config.get('num_layers', 3) self.eps = 1e-9 if config.audio_feature == 'mfcc': self.audio_feature_net = None self.input_size = 80 self.hop_len_ms = 10 elif config.audio_feature == 'wav2vec2': self.audio_feature_net = cuda( fairseq.checkpoint_utils.load_model_ensemble_and_task( [config.wav2vec_path])[0][0], self.cuda) for p in self.audio_feature_net.parameters(): p.requires_grad = False self.input_size = 512 self.hop_len_ms = 20 elif config.audio_feature == 'cpc': self.audio_feature_net = None self.input_size = 256 self.hop_len_ms = 10 else: raise ValueError( f"Feature type {config.audio_feature} not supported") self.K = config.K self.global_iter = 0 self.global_epoch = 0 self.audio_feature = config.audio_feature self.image_feature = config.image_feature self.debug = config.debug self.dataset = config.dataset self.max_normalize = config.get('max_normalize', False) self.loss_type = config.get('loss_type', 'macro_token_floss') self.beta_f_measure = config.get('beta_f_measure', 0.3) self.weight_word_loss = config.get('weight_word_loss', 1.0) self.weight_phone_loss = config.get('weight_phone_loss', 0.0) self.ckpt_dir = Path(config.ckpt_dir) if not self.ckpt_dir.exists(): self.ckpt_dir.mkdir(parents=True, exist_ok=True) if self.loss_type == 'macro_token_floss': self.criterion = MacroTokenFLoss(beta=self.beta_f_measure) elif self.loss_type == 'binary_cross_entropy': self.criterion = nn.BCELoss() else: raise ValueError(f'Invalid loss type {self.loss_type}') # Dataset self.data_loader = return_data(config) self.ignore_index = config.get('ignore_index', -100) self.n_visual_class = self.data_loader['train']\ .dataset.preprocessor.num_visual_words self.n_phone_class = self.data_loader[ 'train'].dataset.preprocessor.num_tokens self.visual_words = self.data_loader[ 'train'].dataset.preprocessor.visual_words self.phone_set = self.data_loader['train'].dataset.preprocessor.tokens self.max_feat_len = self.data_loader['train'].dataset.max_feat_len self.max_word_len = self.data_loader['train'].dataset.max_word_len print(f'Number of visual label classes = {self.n_visual_class}') print(f'Number of phone classes = {self.n_phone_class}') print(f'Max normalized: {self.max_normalize}') self.audio_net = cuda( BLSTM(self.K, n_layers=self.n_layers, n_class=self.n_phone_class, input_size=self.input_size, ds_ratio=1, bidirectional=True), self.cuda) self.phone_net = cuda( HMMPronunciator(self.visual_words, self.phone_set, config=config, ignore_index=self.ignore_index), self.cuda) self.phone_net.train_model() self.align_net = cuda(LinearPositionAligner(scale=0.), self.cuda) # XXX trainables = [p for p in self.audio_net.parameters()] optim_type = config.get('optim', 'adam') if optim_type == 'sgd': self.optim = optim.SGD(trainables, lr=self.lr) else: self.optim = optim.Adam(trainables, lr=self.lr, betas=(0.5, 0.999)) self.scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=0.97) self.load_ckpt = config.load_ckpt if self.load_ckpt or config.mode in ['test', 'cluster']: self.load_checkpoint() # History self.history = dict() self.history['token_f1'] = 0. self.history['visual_token_f1'] = 0. self.history['loss'] = 0. self.history['epoch'] = 0 self.history['iter'] = 0
def __init__(self, config): self.config = config self.cuda = torch.cuda.is_available() self.epoch = config.epoch self.batch_size = config.batch_size self.beta = config.beta self.lr = config.lr self.n_layers = config.get('num_layers', 1) self.weight_word_loss = config.get('weight_word_loss', 1.) self.weight_phone_loss = config.get('weight_phone_loss', 0.) self.weight_evidence = config.get('weight_evidence', 1.) self.anneal_rate = config.get('anneal_rate', 3e-6) self.num_sample = config.get('num_sample', 1) self.eps = 1e-9 if config.audio_feature == 'mfcc': self.audio_feature_net = None self.input_size = 80 self.hop_len_ms = 10 elif config.audio_feature == 'wav2vec2': self.audio_feature_net = cuda( fairseq.checkpoint_utils.load_model_ensemble_and_task( [config.wav2vec_path])[0][0], self.cuda) for p in self.audio_feature_net.parameters(): p.requires_grad = False self.input_size = 512 self.hop_len_ms = 20 else: raise ValueError( f"Feature type {config.audio_feature} not supported") self.K = config.K self.global_iter = 0 self.global_epoch = 0 self.audio_feature = config.audio_feature self.image_feature = config.image_feature self.debug = config.debug self.dataset = config.dataset # Dataset self.data_loader = return_data(config) self.ignore_index = config.get('ignore_index', -100) self.n_visual_class = self.data_loader['train']\ .dataset.preprocessor.num_visual_words self.n_phone_class = self.data_loader[ 'train'].dataset.preprocessor.num_tokens self.visual_words = self.data_loader[ 'train'].dataset.preprocessor.visual_words print(f'Number of visual label classes = {self.n_visual_class}') output_size = self.n_visual_class + 2 * self.input_size if self.weight_phone_loss > 0: output_size += self.n_phone_class self.audio_net = cuda( GaussianBLSTM(self.K, input_size=self.input_size, n_layers=self.n_layers, n_class=output_size, ds_ratio=1, bidirectional=True), self.cuda) trainables = [p for p in self.audio_net.parameters()] optim_type = config.get('optim', 'adam') if optim_type == 'sgd': self.optim = optim.SGD(trainables, lr=self.lr) else: self.optim = optim.Adam(trainables, lr=self.lr, betas=(0.5, 0.999)) self.scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=0.97) # History self.history = dict() self.history['acc'] = 0. self.history['token_f1'] = 0. self.history['loss'] = 0. self.history['epoch'] = 0 self.history['iter'] = 0 self.history['temp'] = 1. self.ckpt_dir = Path(config.ckpt_dir) if not self.ckpt_dir.exists(): self.ckpt_dir.mkdir(parents=True, exist_ok=True) self.load_ckpt = config.load_ckpt if self.load_ckpt or config.mode in ['test', 'cluster']: self.load_checkpoint()