model.build((None, HEIGHT, WIDTH, 3)) # load_checkpoint("./drive/MyDrive/models/ImageNet-86/ckpt", model=backbone) criterion = DetectionLoss(box_loss_fn=iou_loss(mode='ciou', offset=True), cls_loss_fn=focal_loss(alpha=0.25, gamma=2.0), centerness=True) base_lr = 0.0025 epochs = 60 lr_schedule = CosineLR(base_lr * mul, steps_per_epoch, epochs, min_lr=0, warmup_min_lr=base_lr, warmup_epoch=5) optimizer = SGD(lr_schedule, momentum=0.9, nesterov=True, weight_decay=1e-4) train_metrics = { 'loss': Mean(), } eval_metrics = { 'loss': MeanMetricWrapper(criterion), } def output_transform(output): bbox_preds, cls_scores, centerness = get( ['bbox_pred', 'cls_score', 'centerness'], output) return postprocess(bbox_preds, cls_scores, bbox_coder,
batch_size, eval_batch_size, transform, zip_transform) setup_runtime(fp16=True) ds_train, ds_test = distribute_datasets(ds_train, ds_test) model = ResNet(depth=16, k=8, num_classes=100) model.build((None, 32, 32, 3)) model.summary() criterion = CrossEntropy(label_smoothing=params['label_smoothing']) base_lr = params['base_lr'] epochs = 50 lr_schedule = CosineLR(base_lr, steps_per_epoch, epochs=epochs, min_lr=0) optimizer = SGD(lr_schedule, momentum=0.9, weight_decay=params['weight_decay'], nesterov=True) train_metrics = { 'loss': Mean(), 'acc': CategoricalAccuracy(), } eval_metrics = { 'loss': CategoricalCrossentropy(from_logits=True), 'acc': CategoricalAccuracy(), } learner = SuperLearner(model, criterion, optimizer, train_metrics=train_metrics, eval_metrics=eval_metrics,
def objective(trial: optuna.Trial): base_lr = trial.suggest_float("base_lr", 0.001, 0.05, step=0.001) weight_decay = trial.suggest_loguniform("weight_decay", 1e-5, 1e-3) ema = trial.suggest_categorical("ema", ["true", "false"]) ema_decay = trial.suggest_loguniform("ema_decay", 0.99, 0.9999) if ema == 'true' else None @curry def transform(image, label, training): image = pad(image, 2) image, label = to_tensor(image, label) image = normalize(image, [0.1307], [0.3081]) label = tf.one_hot(label, 10) return image, label batch_size = 128 eval_batch_size = 256 ds_train, ds_test, steps_per_epoch, test_steps = make_mnist_dataset( batch_size, eval_batch_size, transform, sub_ratio=0.01) model = LeNet5() model.build((None, 32, 32, 1)) criterion = CrossEntropy() epochs = 20 lr_shcedule = CosineLR(base_lr, steps_per_epoch, epochs=epochs, min_lr=0) optimizer = SGD(lr_shcedule, momentum=0.9, nesterov=True, weight_decay=weight_decay) train_metrics = { 'loss': Mean(), 'acc': CategoricalAccuracy(), } eval_metrics = { 'loss': CategoricalCrossentropy(from_logits=True), 'acc': CategoricalAccuracy(), } learner = SuperLearner(model, criterion, optimizer, train_metrics=train_metrics, eval_metrics=eval_metrics, work_dir=f"./MNIST", multiple_steps=True) callbacks = [OptunaReportIntermediateResult('acc', trial)] # if ema == 'true': # callbacks.append(EMA(ema_decay)) learner.fit(ds_train, epochs, ds_test, val_freq=2, steps_per_epoch=steps_per_epoch, val_steps=test_steps, callbacks=callbacks) return learner.metric_history.get_metric('acc', "eval")[-1]
'affine': False, 'track_running_stats': False, }, }) set_primitives('tiny') model = Network(4, 5) model.build((None, 32, 32, 3)) criterion = CrossEntropy() base_lr = 0.025 epochs = 240 lr_schedule = CosineLR(base_lr, steps_per_epoch, epochs=epochs, min_lr=1e-3) optimizer_model = SGD(lr_schedule, momentum=0.9, weight_decay=3e-4) optimizer_arch = AdamW(learning_rate=3e-4, beta_1=0.5, weight_decay=1e-3) train_metrics = { 'loss': Mean(), 'acc': CategoricalAccuracy(), } eval_metrics = { 'loss': CategoricalCrossentropy(from_logits=True), 'acc': CategoricalAccuracy(), } learner = DARTSLearner(model, criterion, optimizer_arch, optimizer_model,
x = self.normal2(x, hardwts, index) x = self.reduce2(x) x = self.normal3(x, hardwts, index) x = self.avg_pool(x) x = self.fc(x) return x model = ConvNet() model.build((None, 32, 32, 1)) epochs = 200 lr_schedule = CosineLR(0.05, steps_per_epoch, epochs, min_lr=0) optimizer_model = SGD(lr_schedule, momentum=0.9, weight_decay=1e-4, nesterov=True, exclude_from_weight_decay=['alpha_normal']) optimizer_arch = Adam(1e-3) train_loss = tf.keras.metrics.Mean() train_acc = tf.keras.metrics.SparseCategoricalAccuracy() @tf.function(jit_compile=True) def train_step(batch): x, y = batch with tf.GradientTape() as tape: p = model(x, training=True) per_example_loss = tf.keras.losses.sparse_categorical_crossentropy( y, p, from_logits=True)
def objective(trial: optuna.Trial): cutout_prob = trial.suggest_float("cutout_prob", 0, 1.0, step=0.1) mixup_alpha = trial.suggest_float("mixup_alpha", 0, 0.5, step=0.1) label_smoothing = trial.suggest_uniform("label_smoothing", 0, 0.2) base_lr = trial.suggest_float("base_lr", 0.01, 0.2, step=0.01) weight_decay = trial.suggest_loguniform("weight_decay", 1e-5, 1e-3) ema = trial.suggest_categorical("ema", ["true", "false"]) ema_decay = trial.suggest_loguniform("ema_decay", 0.995, 0.9999) if ema == 'true' else None @curry def transform(image, label, training): if training: image = random_crop(image, (32, 32), (4, 4)) image = tf.image.random_flip_left_right(image) image = autoaugment(image, "CIFAR10") image, label = to_tensor(image, label) image = normalize(image, [0.491, 0.482, 0.447], [0.247, 0.243, 0.262]) if training: image = random_apply(cutout(length=16), cutout_prob, image) label = tf.one_hot(label, 100) return image, label def zip_transform(data1, data2): return mixup(data1, data2, alpha=mixup_alpha) batch_size = 128 eval_batch_size = 2048 ds_train, ds_test, steps_per_epoch, test_steps = make_cifar100_dataset( batch_size, eval_batch_size, transform, zip_transform) setup_runtime(fp16=True) ds_train, ds_test = distribute_datasets(ds_train, ds_test) model = ResNet(depth=16, k=8, num_classes=100) model.build((None, 32, 32, 3)) model.summary() criterion = CrossEntropy(label_smoothing=label_smoothing) epochs = 50 lr_schedule = CosineLR(base_lr, steps_per_epoch, epochs=epochs, min_lr=0) optimizer = SGD(lr_schedule, momentum=0.9, weight_decay=weight_decay, nesterov=True) train_metrics = { 'loss': Mean(), 'acc': CategoricalAccuracy(), } eval_metrics = { 'loss': CategoricalCrossentropy(from_logits=True), 'acc': CategoricalAccuracy(), } learner = SuperLearner(model, criterion, optimizer, train_metrics=train_metrics, eval_metrics=eval_metrics, work_dir=f"./CIFAR100-NNI", multiple_steps=True) callbacks = [OptunaReportIntermediateResult('acc', trial)] if ema == 'true': callbacks.append(EMA(ema_decay)) learner.fit(ds_train, epochs, ds_test, val_freq=1, steps_per_epoch=steps_per_epoch, val_steps=test_steps, callbacks=callbacks) return learner.metric_history.get_metric('acc', "eval")[-1]
elif "Depth" in str(type(m)): w = np.transpose(m.depthwise_kernel.numpy(), [2, 3, 0, 1]) else: w = np.transpose(m.kernel.numpy(), [3, 2, 0, 1]) wt = mt.weight.detach().numpy() dw = w - wt print(dw.mean(), dw.std()) weight_decay = 0 num_layers = 8 channels = 16 net1 = NASNet(channels, num_layers, False, 0, 10, FTSO) net1.build((None, 32, 32, 3)) net2 = DARTS(channels, num_layers, False, 0, 10, FTSO) optimizer1 = SGD( 0.025, momentum=0.9, weight_decay=weight_decay) optimizer2 = torch.optim.SGD( net2.parameters(), 0.025, momentum=0.9, weight_decay=weight_decay) criterion1 = CrossEntropy() criterion2 = nn.CrossEntropyLoss() # net1.summary() # summary(net2, (3, 32,32)) copy_conv(net1.stem.layers[0], net2.stem[0]) copy_bn(net1.stem.layers[1], net2.stem[1]) for i in range(num_layers): cell1 = net1.cells[i] cell2 = net2.cells[i] copy_preprocess(cell1.preprocess0, cell2.preprocess0)
batch_size = 128 eval_batch_size = 256 ds_train, ds_test, steps_per_epoch, test_steps = make_mnist_dataset( batch_size, eval_batch_size, transform, sub_ratio=0.1) model = LeNet5() model.build((None, 32, 32, 1)) criterion = CrossEntropy() epochs = 20 base_lr = params["learning_rate"] lr_shcedule = CosineLR(base_lr, steps_per_epoch, epochs=epochs, min_lr=0) optimizer = SGD(lr_shcedule, momentum=0.9, nesterov=True, weight_decay=params["weight_decay"]) train_metrics = { 'loss': Mean(), 'acc': CategoricalAccuracy(), } eval_metrics = { 'loss': CategoricalCrossentropy(from_logits=True), 'acc': CategoricalAccuracy(), } learner = SuperLearner(model, criterion, optimizer, train_metrics=train_metrics,