def setup(self, training): print('---Setup input interfaces...') self.inputs = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) self.labels = tf.placeholder(tf.int32) # Note: this placeholder allows us to set the learning rate for each epoch self.learning_rate = tf.placeholder(tf.float32) print('---Setup the network...') network = ResNet(self.conf.resnet_version, self.conf.resnet_size, self.conf.num_classes, self.conf.first_num_filters) if training: print('---Setup training components...') # compute logits logits = network(self.inputs, True) # predictions for validation self.preds = tf.argmax(logits, axis=-1) # weight decay l2_loss = self.conf.weight_decay * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'kernel' in v.name ]) ### YOUR CODE HERE # cross entropy crossEntropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.labels, logits=logits) crossEntropyLoss = tf.reduce_mean(crossEntropy) # final loss function self.losses = crossEntropyLoss + l2_loss ### END CODE HERE # momentum optimizer with momentum=0.9 optimizer = tf.train.MomentumOptimizer( learning_rate=self.learning_rate, momentum=0.9) ### YOUR CODE HERE # train_op update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_op = optimizer.minimize(self.losses) ### END CODE HERE print('---Setup the Saver for saving models...') self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) else: print('---Setup testing components...') # compute predictions logits = network(self.inputs, False) self.preds = tf.argmax(logits, axis=-1) print('---Setup the Saver for loading models...') self.loader = tf.train.Saver(var_list=tf.global_variables())
def setup(self, training): print('---Setup input interfaces...') self.inputs = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) self.labels = tf.placeholder(tf.int32) # Note: this placeholder allows us to set the learning rate for each epoch self.learning_rate = tf.placeholder(tf.float32) print('---Setup the network...') network = ResNet(self.conf.resnet_version, self.conf.resnet_size, self.conf.num_classes, self.conf.first_num_filters) if training: print('---Setup training components...') # compute logits logits = network(self.inputs, True) # predictions for validation self.preds = tf.argmax(logits, axis=-1) # weight decay l2_loss = self.conf.weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'kernel' in v.name]) ### YOUR CODE HERE #cross entropy cross_entropy_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.labels)) # final loss function self.losses = l2_loss + cross_entropy_loss tf.summary.scalar('loss', self.losses) tf.summary.scalar('l2_loss', tf.convert_to_tensor(l2_loss)) tf.summary.scalar('cross_entropy_loss', cross_entropy_loss) tf.summary.scalar('learning rate', self.learning_rate) ### END CODE HERE # momentum optimizer with momentum=0.9 optimizer = tf.train.MomentumOptimizer( learning_rate=self.learning_rate, momentum=0.9) ### YOUR CODE HERE # train_op grad_var_pairs = optimizer.compute_gradients(loss=self.losses, var_list=tf.trainable_variables()) self.train_op = optimizer.apply_gradients(grads_and_vars=grad_var_pairs) self.summary_op = tf.summary.merge_all() ### END CODE HERE print('---Setup the Saver for saving models...') self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) else: print('---Setup testing components...') # compute predictions logits = network(self.inputs, False) self.preds = tf.argmax(logits, axis=-1) print('---Setup the Saver for loading models...') self.loader = tf.train.Saver(var_list=tf.global_variables())
def __init__(self, configs): self.configs = configs # self.network = MyNetwork(configs) # self.network = MyNetwork.ResNetBottleNeck(3) self.network = ResNet(configs['block'], configs['depth']) print(self.network)
class MyModel(object): def __init__(self, configs): self.configs = configs # self.network = MyNetwork(configs) # self.network = MyNetwork.ResNetBottleNeck(3) self.network = ResNet(configs['block'], configs['depth']) print(self.network) def model_setup(self): for m in self.network.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') # pass elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def train(self, x_train, y_train, configs, x_valid=None, y_valid=None): #initialize model parameters self.model_setup() batch_size = configs['batch_size'] max_epoch = configs['max_epoch'] dataset = MyDataset(x_train, y_train, training=True) train_stats = {'epoch': [], 'bs': [], 'lr': [], 'loss': [], 'val_loss': [], 'val_score': [], } def get_lr(optimizer): for param_group in optimizer.param_groups: return param_group['lr'] def set_lr(optimizer, lr): for param_group in optimizer.param_groups: param_group['lr'] = lr def schedule_lr(epoch, config): keys = config.keys() for key in keys: if epoch < key: return config[key] return config[key] torch.backends.cudnn.benchmark = True batchsize_schedule = [0, 100, 140, 160] iters = [1, 1, 1, 1] # iters = [1,5,25,125] batches_per_update = 1 self.network.train() optimizer = torch.optim.SGD(self.network.parameters(), lr=configs["learning_rate"], weight_decay=configs['weight_decay']) # initialize optimizer criterion = nn.CrossEntropyLoss() # initialize loss function loader = DataLoader(dataset, batch_size, shuffle=False, num_workers=0) best_epoch = 0 best_val = 10 self.network = self.network.cuda() # set_lr(optimizer,0.01) for _ in range(max_epoch): self.network.train() set_lr(optimizer, schedule_lr(_, configs['lr_schedule'])) for b, bs in zip(batchsize_schedule, iters): if _ >= b: new_bs = configs["batch_size"]*bs if new_bs > batch_size: batches_per_update = bs batch_size = new_bs print("batchsize {}".format(new_bs)) train_stats['epoch'].append(_) train_stats['lr'].append(get_lr(optimizer)) train_stats['bs'].append(batch_size) total_loss = 0 its = 0 ops = 0 optimizer.zero_grad() for x_batch, y_batch in tqdm.tqdm(loader, desc="Epoch {}".format(_)): its += 1 x_batch = x_batch.float().cuda() y_batch = y_batch.long().cuda() y_pred = self.network(x_batch) loss = criterion(y_pred, y_batch) loss = loss/batches_per_update total_loss += float(loss) loss.backward() if its % batches_per_update == 0 or its == x_batch.shape[0]: optimizer.step() optimizer.zero_grad() ops += 1 total_loss /= its print(total_loss) train_stats['loss'].append(total_loss) val_loss = 0 # do validation if x_valid is not None and y_valid is not None: score, val_loss = self.evaluate(x_valid, y_valid) print("score = {:.3f}% ({:.4f}) in validation set.\n".format( score*100, val_loss)) if val_loss < best_val: best_val = val_loss best_epoch = _ print("Best loss yet!") self.save(acc=score, epoch=_) else: print("best was {} epochs ago".format(_-best_epoch)) train_stats['val_loss'].append(val_loss) train_stats['val_score'].append(score) return train_stats def evaluate(self, x, y): self.network.eval() crit = nn.CrossEntropyLoss() dataset = MyDataset(x, y, training=False) loader = DataLoader(dataset, batch_size=min(1000, x.shape[0])) score = 0 total_loss = 0 with torch.no_grad(): for batch, (x_sample, y_sample) in enumerate(loader): x_sample = x_sample.float().cuda() y_sample = y_sample.long().cuda() y_prob = self.network(x_sample) loss = crit(y_prob, y_sample) preds = torch.argmax(y_prob, dim=1) score += torch.eq(preds, y_sample).sum().item() total_loss += float(loss) score /= x.shape[0] total_loss /= batch return score, total_loss def predict_prob(self, x): # predict class probabilities with torch.no_grad(): probs = self.network(x.cuda()) probs = F.softmax(probs, dim=1).cpu().numpy() return probs def save(self, acc=0, epoch=0): print("Saving...") chkpt = { 'weights': self.network.state_dict(), 'configs': self.configs, 'acc': acc, 'epoch': epoch, } path = os.path.abspath(self.configs['save_dir']) if not os.path.exists(path): os.mkdir(path) torch.save(chkpt, os.path.join(path, self.configs['name'] + '.ckpt')) def load(self): fn = os.path.join(self.configs['save_dir'], self.configs['name'] + '.ckpt') chkpt = torch.load(fn) print("Loading from file: ") configs = chkpt['configs'] print(configs) self.network = ResNet(configs['block'], configs['depth']) self.network.load_state_dict(chkpt['weights']) self.network.cuda() return