def mlp_mixer_h14(num_classes: int, image_size: int = 224, channels: int = 3): params = dict(patch_size=14, num_layers=32, hidden_dim=1280, tokens_hidden_dim=640, channels_hidden_dim=5120) return MLPMixer(num_classes, image_size, channels, **params)
def mlp_mixer_l32(num_classes: int, image_size: int = 224, channels: int = 3): params = dict(patch_size=32, num_layers=24, hidden_dim=1024, tokens_hidden_dim=512, channels_hidden_dim=4096) return MLPMixer(num_classes, image_size, channels, **params)
def mlp_mixer_b32(num_classes: int, image_size: int = 224, channels: int = 3): params = dict(patch_size=32, num_layers=12, hidden_dim=768, tokens_hidden_dim=384, channels_hidden_dim=3072) return MLPMixer(num_classes, image_size, channels, **params)
def mlp_mixer_s16(num_classes: int, image_size: int = 224, channels: int = 3): params = dict(patch_size=16, num_layers=8, hidden_dim=512, tokens_hidden_dim=256, channels_hidden_dim=2048) return MLPMixer(num_classes, image_size, channels, **params)
import tensorflow as tf from mlp_mixer import MLP, MixerLayer, MLPMixer model = MLPMixer(n_classes=1000, image_size=256, patch_size=16, depth=6, n_channels=3, hdim=512) img = tf.random.uniform([3, 256, 256]) pred = model(img) # (1, 1000) print(pred) print(pred.shape)
test_iterator = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE) if args.inspired: model_name = 'mlp-mixer-inspired-cifar10.pt' model = MLPMixer_Inspired(patch_size=args.patch_size, output_dim=10, c=3, h=32, w=32, depth=args.depth) else: model_name = 'mlp-mixer-cifar10.pt' model = MLPMixer(image_size=32, patch_size=args.patch_size, dim=512, depth=args.depth, num_classes=10) # Loss criterion = nn.CrossEntropyLoss() # Softmax + CrossEntropy # Put model&criterion on GPU device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') criterion = criterion.to(device) model = model.to(device) # Optim optimizer = create_optim(model.parameters(), args)