def __init__(self, in_channels, out_channels, dim_model, k=16): super().__init__() self.k = k # dummy feature is created if there is none given in_channels = max(in_channels, 1) # first block self.mlp_input = MLP([in_channels, dim_model[0]], plain_last=False) self.transformer_input = TransformerBlock(in_channels=dim_model[0], out_channels=dim_model[0]) # backbone layers self.transformers_down = torch.nn.ModuleList() self.transition_down = torch.nn.ModuleList() for i in range(len(dim_model) - 1): # Add Transition Down block followed by a Transformer block self.transition_down.append( TransitionDown(in_channels=dim_model[i], out_channels=dim_model[i + 1], k=self.k)) self.transformers_down.append( TransformerBlock(in_channels=dim_model[i + 1], out_channels=dim_model[i + 1])) # class score computation self.mlp_output = MLP([dim_model[-1], 64, out_channels], norm=None)
def test_mlp(batch_norm, act_first, plain_last): x = torch.randn(4, 16) torch.manual_seed(12345) mlp = MLP( [16, 32, 32, 64], batch_norm=batch_norm, act_first=act_first, plain_last=plain_last, ) assert str(mlp) == 'MLP(16, 32, 32, 64)' out = mlp(x) assert out.size() == (4, 64) if is_full_test(): jit = torch.jit.script(mlp) assert torch.allclose(jit(x), out) torch.manual_seed(12345) mlp = MLP( 16, hidden_channels=32, out_channels=64, num_layers=3, batch_norm=batch_norm, act_first=act_first, plain_last=plain_last, ) assert torch.allclose(mlp(x), out)
def __init__(self, out_channels, k=20, aggr='max'): super().__init__() self.conv1 = DynamicEdgeConv(MLP([2 * 3, 64, 64, 64]), k, aggr) self.conv2 = DynamicEdgeConv(MLP([2 * 64, 128]), k, aggr) self.lin1 = Linear(128 + 64, 1024) self.mlp = MLP([1024, 512, 256, out_channels], dropout=0.5, norm=None)
def __init__(self): super().__init__() # Input channels account for both `pos` and node features. self.sa1_module = SAModule(0.5, 0.2, MLP([3, 64, 64, 128])) self.sa2_module = SAModule(0.25, 0.4, MLP([128 + 3, 128, 128, 256])) self.sa3_module = GlobalSAModule(MLP([256 + 3, 256, 512, 1024])) self.mlp = MLP([1024, 512, 256, 10], dropout=0.5, batch_norm=False)
def __init__(self, out_channels, k=30, aggr='max'): super().__init__() self.conv1 = DynamicEdgeConv(MLP([2 * 6, 64, 64]), k, aggr) self.conv2 = DynamicEdgeConv(MLP([2 * 64, 64, 64]), k, aggr) self.conv3 = DynamicEdgeConv(MLP([2 * 64, 64, 64]), k, aggr) self.mlp = MLP([3 * 64, 1024, 256, 128, out_channels], dropout=0.5, batch_norm=False)
def test_mlp(batch_norm, relu_first): x = torch.randn(4, 16) mlp = MLP([16, 32, 32, 64], batch_norm=batch_norm, relu_first=relu_first) assert mlp.__repr__() == 'MLP(16, 32, 32, 64)' out = mlp(x) assert out.size() == (4, 64) jit = torch.jit.script(mlp) assert torch.allclose(jit(x), out)
def __init__(self, in_channels, hidden_channels, out_channels, num_layers): super().__init__() self.convs = torch.nn.ModuleList() for _ in range(num_layers): mlp = MLP([in_channels, hidden_channels, hidden_channels]) self.convs.append(GINConv(nn=mlp, train_eps=False)) in_channels = hidden_channels self.mlp = MLP([hidden_channels, hidden_channels, out_channels], norm=None, dropout=0.5)
def __init__(self, in_channels, out_channels): super().__init__() self.lin_in = Lin(in_channels, in_channels) self.lin_out = Lin(out_channels, out_channels) self.pos_nn = MLP([3, 64, out_channels], norm=None, plain_last=False) self.attn_nn = MLP([out_channels, 64, out_channels], norm=None, plain_last=False) self.transformer = PointTransformerConv(in_channels, out_channels, pos_nn=self.pos_nn, attn_nn=self.attn_nn)
def test_attentional_aggregation(): channels = 16 x = torch.randn(6, channels) index = torch.tensor([0, 0, 1, 1, 1, 2]) ptr = torch.tensor([0, 2, 5, 6]) gate_nn = MLP([channels, 1], act='relu') nn = MLP([channels, channels], act='relu') aggr = AttentionalAggregation(gate_nn, nn) assert str(aggr) == (f'AttentionalAggregation(gate_nn=MLP({channels}, 1), ' f'nn=MLP({channels}, {channels}))') out = aggr(x, index) assert out.size() == (3, channels) torch.allclose(aggr(x, ptr=ptr, dim_size=3), out)
def __init__(self, hidden_channels, num_layers, GNN=GCNConv, k=0.6): super().__init__() if k < 1: # Transform percentile to number. num_nodes = sorted([data.num_nodes for data in train_dataset]) k = num_nodes[int(math.ceil(k * len(num_nodes))) - 1] k = max(10, k) self.k = int(k) self.convs = ModuleList() self.convs.append(GNN(train_dataset.num_features, hidden_channels)) for i in range(0, num_layers - 1): self.convs.append(GNN(hidden_channels, hidden_channels)) self.convs.append(GNN(hidden_channels, 1)) conv1d_channels = [16, 32] total_latent_dim = hidden_channels * num_layers + 1 conv1d_kws = [total_latent_dim, 5] self.conv1 = Conv1d(1, conv1d_channels[0], conv1d_kws[0], conv1d_kws[0]) self.maxpool1d = MaxPool1d(2, 2) self.conv2 = Conv1d(conv1d_channels[0], conv1d_channels[1], conv1d_kws[1], 1) dense_dim = int((self.k - 2) / 2 + 1) dense_dim = (dense_dim - conv1d_kws[1] + 1) * conv1d_channels[1] self.mlp = MLP([dense_dim, 128, 1], dropout=0.5, norm=None)
def test_fine_grained_mlp(plain_last): mlp = MLP( [16, 32, 32, 64], dropout=[0.1, 0.2, 0.3], bias=[False, True, False], ) assert mlp(torch.randn(4, 16)).size() == (4, 64)
def __init__(self, in_channels: int, out_channels: int, hidden_channels: int = 64, num_layers: int = 3, dropout: float = 0.5): super().__init__() self.gnn = GIN(in_channels, hidden_channels, num_layers, dropout=dropout, jk='cat') self.classifier = MLP([hidden_channels, hidden_channels, out_channels], batch_norm=True, dropout=dropout) self.train_acc = Accuracy() self.val_acc = Accuracy() self.test_acc = Accuracy()
def __init__(self, in_channels: int, out_channels: int, hidden_channels: int = 64, num_layers: int = 3, dropout: float = 0.5): super().__init__() self.gnn = GIN(in_channels, hidden_channels, num_layers, dropout=dropout, jk='cat') self.classifier = MLP([hidden_channels, hidden_channels, out_channels], norm="batch_norm", dropout=dropout)
def __init__(self, num_classes): super().__init__() # Input channels account for both `pos` and node features. self.sa1_module = SAModule(0.2, 0.2, MLP([3 + 3, 64, 64, 128])) self.sa2_module = SAModule(0.25, 0.4, MLP([128 + 3, 128, 128, 256])) self.sa3_module = GlobalSAModule(MLP([256 + 3, 256, 512, 1024])) self.fp3_module = FPModule(1, MLP([1024 + 256, 256, 256])) self.fp2_module = FPModule(3, MLP([256 + 128, 256, 128])) self.fp1_module = FPModule(3, MLP([128 + 3, 128, 128, 128])) self.mlp = MLP([128, 128, 128, num_classes], dropout=0.5, norm=None) self.lin1 = torch.nn.Linear(128, 128) self.lin2 = torch.nn.Linear(128, 128) self.lin3 = torch.nn.Linear(128, num_classes)
from torch_geometric.datasets import Planetoid parser = argparse.ArgumentParser() parser.add_argument('--lamb', type=float, default=0.0, help='Balances loss from hard labels and teacher outputs') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid') dataset = Planetoid(path, name='Cora', transform=T.NormalizeFeatures()) data = dataset[0].to(device) gnn = GCN(dataset.num_node_features, hidden_channels=16, out_channels=dataset.num_classes, num_layers=2).to(device) mlp = MLP([dataset.num_node_features, 64, dataset.num_classes], dropout=0.5, batch_norm=False).to(device) gnn_optimizer = torch.optim.Adam(gnn.parameters(), lr=0.01, weight_decay=5e-4) mlp_optimizer = torch.optim.Adam(mlp.parameters(), lr=0.01, weight_decay=5e-4) def train_teacher(): gnn.train() gnn_optimizer.zero_grad() out = gnn(data.x, data.edge_index) loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask]) loss.backward() gnn_optimizer.step() return float(loss)
from ogb.nodeproppred import Evaluator, PygNodePropPredDataset import torch_geometric.transforms as T from torch_geometric.nn import MLP, CorrectAndSmooth root = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'OGB') dataset = PygNodePropPredDataset('ogbn-products', root, transform=T.ToSparseTensor()) evaluator = Evaluator(name='ogbn-products') split_idx = dataset.get_idx_split() data = dataset[0] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = MLP([dataset.num_features, 200, 200, dataset.num_classes], dropout=0.5, batch_norm=True, relu_first=True).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) criterion = torch.nn.CrossEntropyLoss() x, y = data.x.to(device), data.y.to(device) train_idx = split_idx['train'].to(device) val_idx = split_idx['valid'].to(device) test_idx = split_idx['test'].to(device) x_train, y_train = x[train_idx], y[train_idx] def train(): model.train() optimizer.zero_grad() out = model(x_train)
def __init__(self, in_channels, out_channels): super().__init__() self.mlp_sub = MLP([in_channels, out_channels], plain_last=False) self.mlp = MLP([out_channels, out_channels], plain_last=False)
def __init__(self, in_channels, out_channels, ratio=0.25, k=16): super().__init__() self.k = k self.ratio = ratio self.mlp = MLP([in_channels, out_channels], plain_last=False)