Exemplo n.º 1
0
    def __init__(self, ckpt_path, newModel=False):
        self.dualGraph = readGraph()
        file_name = "dualGraphNodes.pkl"
        open_file = open(file_name, "wb")
        pickle.dump(list(self.dualGraph.nodes), open_file)
        open_file.close()
        #print(list(self.dualGraph.nodes))
        self.data = from_networkx(self.dualGraph)
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        open_file = open("edge_index.pkl", "wb")
        pickle.dump(self.data.edge_index, open_file)
        open_file.close()
        open_file = open("edge_index.pkl", "rb")
        edge_index = pickle.load(open_file)
        open_file.close()
        self.model = Node2Vec(edge_index,
                              embedding_dim=32,
                              walk_length=20,
                              context_size=10,
                              walks_per_node=10,
                              num_negative_samples=1,
                              p=1,
                              q=1,
                              sparse=True).to(self.device)

        self.loader = self.model.loader(batch_size=128,
                                        shuffle=True,
                                        num_workers=0)
        self.optimizer = torch.optim.SparseAdam(list(self.model.parameters()),
                                                lr=0.01)
        if newModel:
            self.train(epochs=20)
            self.saveTo(ckpt_path)
        else:
            self.loadFrom(ckpt_path)
Exemplo n.º 2
0
 def train(self, data):
     if data.has_features == False:
         embedding_dim = 128
     
         embedder = Node2Vec(
             data.x.size()[0],   # Num nodes
             embedding_dim,      # Embedding dimesion
             5,                  # Walk len  
             3,                  # Context size 
         )
         
         # First train embedder
         embedder = n2v_trainer(
             data, embedder, self.device, lr=0.1, epochs=400
         )
         
         # Then use n2v embeddings as features
         data.x = embedder.embedding.weight
     
     model = BenGCN(
         features_num=data.x.size()[1], 
         num_class=int(max(data.y)) + 1, 
         num_layers=2
     )
     
     return generic_training_loop(
         data, 
         model, 
         self.device,
         lr=0.01
     )
Exemplo n.º 3
0
def node2vec(edge_index):
    embedding_dim = 128
    walk_length = 80
    context_size = 20
    walks_per_node = 10
    batch_size = 256
    lr = 0.01
    epochs = 5
    log_steps = 1

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    model = Node2Vec(edge_index, embedding_dim, walk_length,
                     context_size, walks_per_node, sparse=True).to(device)

    optimizer = torch.optim.SparseAdam(model.parameters(), lr=lr)
    loader = model.loader(batch_size=batch_size, shuffle=True, num_workers=4)

    model.train()
    for epoch in range(1, epochs + 1):
        for i, (pos_rw, neg_rw) in enumerate(loader):
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()

            if (i + 1) % log_steps == 0:
                print(f'Epoch: {epoch:02d}, Step: {i+1:03d}/{len(loader)}, '
                      f'Loss: {loss:.4f}')
    
    print(f'node2vec total params are {sum(p.numel() for p in model.parameters())}')
    return model.embedding.weight.data.cpu().numpy()
Exemplo n.º 4
0
    def generate_node2vec_feature(self, data, epochs=20, num_features=64):
        edge_index = data['edge_file'][['src_idx', 'dst_idx']].to_numpy()
        edge_index = sorted(edge_index, key=lambda d: d[0])
        edge_index = torch.tensor(edge_index, dtype=torch.long).transpose(0, 1)

        model = Node2Vec(edge_index,
                         embedding_dim=num_features,
                         walk_length=20,
                         context_size=10,
                         walks_per_node=10,
                         num_negative_samples=1,
                         sparse=True).to('cuda')

        loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
        optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.01)

        def train():
            model.train()
            total_loss = 0
            for pos_rw, neg_rw in loader:
                optimizer.zero_grad()
                loss = model.loss(pos_rw.to('cuda'), neg_rw.to('cuda'))
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            return total_loss / len(loader)

        for epoch in range(1, epochs + 1):
            loss = train()
            print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')

        return pd.concat(
            [data['fea_table'],
             pd.DataFrame(model().detach().cpu().numpy())],
            axis=1)
Exemplo n.º 5
0
    def train_nn(self):
        self.model = Node2Vec(self.data.num_nodes,
                              embedding_dim=128,
                              walk_length=20,
                              context_size=10,
                              walks_per_node=10)
        self.model = self.model.to(self.device)
        self.data = self.data.to(self.device)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        self.loader = DataLoader(torch.arange(self.data.num_nodes),
                                 batch_size=128,
                                 shuffle=True)

        for epoch in range(1, self.epochs + 1):
            t1 = time.time()
            self.model.train()
            total_loss = 0
            for subset in self.loader:
                self.optimizer.zero_grad()
                loss = self.model.loss(self.data.edge_index,
                                       subset.to(self.device))
                loss.backward()
                self.optimizer.step()
                total_loss += loss.item()
            total_loss = total_loss / len(self.loader)
            print("epoch: %d, time elapsed: %.2f, loss: %.5f" %
                  (epoch, time.time() - t1, total_loss))

        self.model.eval()
        with torch.no_grad():
            z = self.model(
                torch.arange(self.data.num_nodes, device=self.device))

        return z
Exemplo n.º 6
0
    def __init__(self,
                 n2v_dim,
                 attention_dim,
                 feature_dim,
                 embedding_dim,
                 num_heads,
                 output_dimension,
                 windowsz=3):
        super(ConstGat, self).__init__()
        open_file = open("edge_index.pkl", "rb")
        edge_index = pickle.load(open_file)
        open_file.close()
        self.n2v = Node2Vec(edge_index,
                            embedding_dim=32,
                            walk_length=20,
                            context_size=10,
                            walks_per_node=10,
                            num_negative_samples=1,
                            p=1,
                            q=1,
                            sparse=True)

        #self.n2v = N2V('node2vec.mdl')
        self.linearContextual = nn.Linear(n2v_dim, attention_dim)
        self.embedding_dim = embedding_dim
        self.feature_dim = feature_dim
        self.num_heads = num_heads
        self.output_dimension = output_dimension
        self.background_dim = embedding_dim[1] + embedding_dim[2] + 1
        self.linearQ = nn.Linear(self.background_dim + n2v_dim, attention_dim)
        # embedding layers for 7 categorical features
        # "road_type", "time_stage", "week_day", "lanes", "bridge", "endpoint_u", "endpoint_v", "trip_id"
        # 0 represents Unknown
        # 0-21
        self.embedding_road_type = nn.Embedding(22, self.embedding_dim[0])
        # 0-6
        self.embedding_time_stage = nn.Embedding(7, self.embedding_dim[1])
        # 0-7
        self.embedding_week_day = nn.Embedding(8, self.embedding_dim[2])
        # 0-8
        self.embedding_lanes = nn.Embedding(9, self.embedding_dim[3])
        # 0-1
        self.embedding_bridge = nn.Embedding(2, self.embedding_dim[4])
        # 0-16
        self.embedding_endpoint_u = nn.Embedding(17, self.embedding_dim[5])
        self.embedding_endpoint_v = nn.Embedding(17, self.embedding_dim[6])
        self.selfattn = nn.MultiheadAttention(embed_dim=attention_dim,
                                              num_heads=self.num_heads,
                                              batch_first=True)
        self.traffic_dim = embedding_dim[0] + sum(
            embedding_dim[3:]) + feature_dim - 1
        self.linearTraffic = nn.Linear(self.traffic_dim, attention_dim)
        #self.norm = LayerNorm(self.total_embed_dim)
        self.feed_forward = PositionwiseFeedForward(
            (2 * attention_dim + self.background_dim) * windowsz,
            self.output_dimension)
        self.activate = nn.ReLU()
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(description='OGB (Node2Vec)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--task', type=str, default='ogbn')
    parser.add_argument('--dataset', type=str, default='arxiv')
    parser.add_argument('--embedding_dim', type=int, default=128)
    parser.add_argument('--walk_length', type=int, default=80)
    parser.add_argument('--context_size', type=int, default=20)
    parser.add_argument('--walks_per_node', type=int, default=10)
    parser.add_argument('--batch_size', type=int, default=256)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=5)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--dropedge_rate', type=float, default=0.4)
    parser.add_argument('--dump_adj_only', dest="dump_adj_only", action="store_true", help="dump adj matrix for proX")
    parser.set_defaults(dump_adj_only=False)
    args = parser.parse_args()

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = create_dataset(name=f'{args.task}-{args.dataset}')
    data = dataset[0]
    if args.dataset == 'arxiv':
        data.edge_index = to_undirected(data.edge_index, data.num_nodes)
    elif args.dataset == 'papers100M':
        data.edge_index, _ = dropout_adj(data.edge_index, p = args.dropedge_rate, num_nodes= data.num_nodes)
        data.edge_index = to_undirected(data.edge_index, data.num_nodes)

    if args.dump_adj_only:
        adj = to_scipy_sparse_matrix(data.edge_index)
        sp.save_npz(f'data/{args.name}-adj.npz', adj)
        return

    model = Node2Vec(data.edge_index, args.embedding_dim, args.walk_length,
                     args.context_size, args.walks_per_node,
                     sparse=True).to(device)

    loader = model.loader(batch_size=args.batch_size, shuffle=True,
                          num_workers=4)
    optimizer = torch.optim.SparseAdam(model.parameters(), lr=args.lr)

    model.train()
    for epoch in range(1, args.epochs + 1):
        for i, (pos_rw, neg_rw) in enumerate(loader):
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()

            if (i + 1) % args.log_steps == 0:
                print(f'Epoch: {epoch:02d}, Step: {i+1:03d}/{len(loader)}, '
                      f'Loss: {loss:.4f}')

            if (i + 1) % 100 == 0:  # Save model every 100 steps.
                save_embedding(model, args.embedding_dim, args.dataset, args.context_size)
        save_embedding(model, args.embedding_dim, args.dataset, args.context_size)
Exemplo n.º 8
0
def main():
    dataset = 'Cora'
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
    dataset = Planetoid(path, dataset)
    data = dataset[0]

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = Node2Vec(data.edge_index, embedding_dim=128, walk_length=20,
                     context_size=10, walks_per_node=10,
                     num_negative_samples=1, p=1, q=1, sparse=True).to(device)

    loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
    optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)

    def train():
        model.train()
        total_loss = 0
        for pos_rw, neg_rw in loader:
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        return total_loss / len(loader)

    @torch.no_grad()
    def test():
        model.eval()
        z = model()
        acc = model.test(z[data.train_mask], data.y[data.train_mask],
                         z[data.test_mask], data.y[data.test_mask],
                         max_iter=150)
        return acc

    for epoch in range(1, 101):
        loss = train()
        acc = test()
        print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Acc: {acc:.4f}')

    @torch.no_grad()
    def plot_points(colors):
        model.eval()
        z = model(torch.arange(data.num_nodes, device=device))
        z = TSNE(n_components=2).fit_transform(z.cpu().numpy())
        y = data.y.cpu().numpy()

        plt.figure(figsize=(8, 8))
        for i in range(dataset.num_classes):
            plt.scatter(z[y == i, 0], z[y == i, 1], s=20, color=colors[i])
        plt.axis('off')
        plt.show()

    colors = [
        '#ffc0cb', '#bada55', '#008080', '#420420', '#7fe5f0', '#065535',
        '#ffd700'
    ]
    plot_points(colors)
Exemplo n.º 9
0
def n2v(edge_list, node2id, round_id,init_dict=None, embedding_dim=128, walk_length=10,
        context_size=5, walks_per_node=10,tol=1e-4,verbose=False, epochs=100):
    edge_index = torch.tensor(np.array(edge_list).T, dtype=torch.long)
    data = Data(edge_index=edge_index)
    model = Node2Vec(data.edge_index, embedding_dim=embedding_dim, walk_length=walk_length,
                    context_size=context_size, walks_per_node=walks_per_node, sparse=True)
    if init_dict is not None:
        miss_nodes = []
        X = np.random.randn(len(node2id), embedding_dim)
        for node, idx in node2id.items():
            try:
                X[idx] = init_dict[node]
            except:
                miss_nodes.append(node)
        print("Missing {} nodes: {} ".format(len(miss_nodes), miss_nodes))
        model.embedding.data = torch.tensor(X)

    model = model.to(device)
    loader = model.loader(batch_size=32, shuffle=True)
    optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.01/(int(round_id)+1))
    best_loss = 10e8
    n_step_without_progress = 0
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        if verbose:
            for pos_rw, neg_rw in tqdm(loader, desc="Train epoch {}".format(epoch+1)):
                optimizer.zero_grad()
                loss = model.loss(pos_rw.to(device), neg_rw.to(device))
                loss.backward()
                total_loss += loss.item()
                optimizer.step()
        else:
            for pos_rw, neg_rw in loader:
                optimizer.zero_grad()
                loss = model.loss(pos_rw.to(device), neg_rw.to(device))
                loss.backward()
                total_loss += loss.item()
                optimizer.step()
        if (best_loss - total_loss)/best_loss < tol:
            n_step_without_progress += 1
            if n_step_without_progress == 3:
                break
        else:
            best_loss = total_loss
            n_step_without_progress = 0
        if verbose:
            print("Epoch {}: loss {} best loss {} #step without progress {}".format(epoch, total_loss, best_loss, n_step_without_progress))

    model.eval()
    out = model().cpu().detach().numpy()

    return out
Exemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser(description='OGBN-citeseer (Node2Vec)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--embedding_dim', type=int, default=256)
    parser.add_argument('--walk_length', type=int, default=80)
    parser.add_argument('--context_size', type=int, default=20)
    parser.add_argument('--walks_per_node', type=int, default=10)
    parser.add_argument('--batch_size', type=int, default=256)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=20)
    parser.add_argument('--log_steps', type=int, default=1)
    args = parser.parse_args()

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)
    # root = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'arxiv')
    data_dir = 'planetoid'
    dataset = pyg.datasets.Planetoid(name='Citeseer', root=data_dir)

    data = dataset[0]
    data.edge_index = to_undirected(data.edge_index, data.num_nodes)

    model = Node2Vec(data.edge_index,
                     args.embedding_dim,
                     args.walk_length,
                     args.context_size,
                     args.walks_per_node,
                     sparse=True).to(device)

    loader = model.loader(batch_size=args.batch_size,
                          shuffle=True,
                          num_workers=4)
    optimizer = torch.optim.SparseAdam(model.parameters(), lr=args.lr)

    model.train()
    for epoch in range(1, args.epochs + 1):
        for i, (pos_rw, neg_rw) in enumerate(loader):
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()

            if (i + 1) % args.log_steps == 0:
                print(f'Epoch: {epoch:02d}, Step: {i+1:03d}/{len(loader)}, '
                      f'Loss: {loss:.4f}')

            if (i + 1) % 100 == 0:  # Save model every 100 steps.
                save_embedding(model)
        save_embedding(model)
Exemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(description="OGBL-Citation2 (Node2Vec)")
    parser.add_argument("--device", type=int, default=0)
    parser.add_argument("--embedding_dim", type=int, default=128)
    parser.add_argument("--walk_length", type=int, default=40)
    parser.add_argument("--context_size", type=int, default=20)
    parser.add_argument("--walks_per_node", type=int, default=10)
    parser.add_argument("--batch_size", type=int, default=256)
    parser.add_argument("--lr", type=float, default=0.01)
    parser.add_argument("--epochs", type=int, default=1)
    parser.add_argument("--log_steps", type=int, default=1)
    args = parser.parse_args()

    device = f"cuda:{args.device}" if torch.cuda.is_available() else "cpu"
    device = torch.device(device)

    dataset = PygLinkPropPredDataset(name="ogbl-citation2")
    data = dataset[0]
    data.edge_index = to_undirected(data.edge_index, data.num_nodes)

    model = Node2Vec(
        data.edge_index,
        args.embedding_dim,
        args.walk_length,
        args.context_size,
        args.walks_per_node,
        sparse=True,
    ).to(device)

    loader = model.loader(batch_size=args.batch_size,
                          shuffle=True,
                          num_workers=4)
    optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=args.lr)

    model.train()
    for epoch in range(1, args.epochs + 1):
        for i, (pos_rw, neg_rw) in enumerate(loader):
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()

            if (i + 1) % args.log_steps == 0:
                print(f"Epoch: {epoch:02d}, Step: {i+1:03d}/{len(loader)}, "
                      f"Loss: {loss:.4f}")

            if (i + 1) % 100 == 0:  # Save model every 100 steps.
                save_embedding(model)
        save_embedding(model)
Exemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser(description='OGBN-Arxiv (Node2Vec)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--embedding_dim', type=int, default=128)
    parser.add_argument('--walk_length', type=int, default=80)
    parser.add_argument('--context_size', type=int, default=20)
    parser.add_argument('--walks_per_node', type=int, default=10)
    parser.add_argument('--batch_size', type=int, default=256)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=5)
    parser.add_argument('--log_steps', type=int, default=1)
    args = parser.parse_args()

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(
        name='ogbn-arxiv', root='/srv/scratch/ogb/datasets/nodeproppred')
    data = dataset[0]
    data.edge_index = to_undirected(data.edge_index, data.num_nodes)

    model = Node2Vec(data.edge_index,
                     args.embedding_dim,
                     args.walk_length,
                     args.context_size,
                     args.walks_per_node,
                     sparse=True).to(device)

    loader = model.loader(batch_size=args.batch_size,
                          shuffle=True,
                          num_workers=4)
    optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=args.lr)

    model.train()
    for epoch in range(1, args.epochs + 1):
        for i, (pos_rw, neg_rw) in enumerate(loader):
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()

            if (i + 1) % args.log_steps == 0:
                print(f'Epoch: {epoch:02d}, Step: {i+1:03d}/{len(loader)}, '
                      f'Loss: {loss:.4f}')

            if (i + 1) % 100 == 0:  # Save model every 100 steps.
                save_embedding(model)
        save_embedding(model)
Exemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser(description='OGBN-Proteins (Node2Vec)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--embedding_dim', type=int, default=128)
    parser.add_argument('--walk_length', type=int, default=80)
    parser.add_argument('--context_size', type=int, default=20)
    parser.add_argument('--walks_per_node', type=int, default=10)
    parser.add_argument('--batch_size', type=int, default=256)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=1)
    parser.add_argument('--log_steps', type=int, default=1)
    args = parser.parse_args()

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-proteins')
    data = dataset[0]

    edge_index = data.edge_index.to(device)
    perm = torch.argsort(edge_index[0] * data.num_nodes + edge_index[1])
    edge_index = edge_index[:, perm]

    model = Node2Vec(data.num_nodes, args.embedding_dim, args.walk_length,
                     args.context_size, args.walks_per_node).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    loader = DataLoader(torch.arange(data.num_nodes),
                        batch_size=args.batch_size,
                        shuffle=True)

    model.train()
    for epoch in range(1, args.epochs + 1):
        for i, subset in enumerate(loader):
            optimizer.zero_grad()
            loss = model.loss(edge_index, subset.to(edge_index.device))
            loss.backward()
            optimizer.step()

            if (i + 1) % args.log_steps == 0:
                print(f'Epoch: {epoch:02d}, Step: {i+1:03d}/{len(loader)}, '
                      f'Loss: {loss:.4f}')

            if (i + 1) % 100 == 0:  # Save model every 100 steps.
                save_embedding(model)
        save_embedding(model)
Exemplo n.º 14
0
    def __init__(self,
                 num_nodes,
                 embedding_dim=16,
                 walk_length=5,
                 context_size=5,
                 walks_per_node=1,
                 num_layers=2,
                 hidden=32,
                 features_num=16,
                 num_class=2):
        super().__init__(num_layers, hidden, features_num + embedding_dim,
                         num_class)

        self.n2v = Node2Vec(num_nodes,
                            embedding_dim,
                            walk_length,
                            context_size,
                            walks_per_node=walks_per_node)
Exemplo n.º 15
0
def test_node2vec():
    edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])

    model = Node2Vec(edge_index, embedding_dim=16, walk_length=2,
                     context_size=2)
    assert model.__repr__() == 'Node2Vec(3, 16)'

    z = model(torch.arange(3))
    assert z.size() == (3, 16)

    pos_rw, neg_rw = model.sample(torch.arange(3))

    loss = model.loss(pos_rw, neg_rw)
    assert 0 <= loss.item()

    acc = model.test(torch.ones(20, 16), torch.randint(10, (20, )),
                     torch.ones(20, 16), torch.randint(10, (20, )))
    assert 0 <= acc and acc <= 1
Exemplo n.º 16
0
    def __init__(self, feature_dim, embedding_dim, num_heads, output_dimension,n2v_dim,window_size,attention_dim = 64):
        super(Pigat, self).__init__()
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        #self.n2v = N2V('node2vec.mdl')
        open_file = open("edge_index.pkl", "rb")
        edge_index = pickle.load(open_file)
        open_file.close()
        self.n2v = Node2Vec(edge_index, embedding_dim=32, walk_length=20,
                              context_size=10, walks_per_node=10,
                              num_negative_samples=1, p=1, q=1, sparse=True)

        self.attention_dim = attention_dim
        self.embedding_dim = embedding_dim
        self.feature_dim = feature_dim
        self.total_embed_dim = self.feature_dim + sum(self.embedding_dim)+ n2v_dim
        self.output_dimension = output_dimension
        self.num_heads = num_heads
        # embedding layers for 7 categorical features
        # "road_type", "time_stage", "week_day", "lanes", "bridge", "endpoint_u", "endpoint_v", "trip_id"
        # 0 represents Unknown
        # 0-21
        self.embedding_road_type = nn.Embedding(22, self.embedding_dim[0])
        # 0-6
        self.embedding_time_stage = nn.Embedding(7, self.embedding_dim[1])
        # 0-7
        self.embedding_week_day = nn.Embedding(8, self.embedding_dim[2])
        # 0-8
        self.embedding_lanes = nn.Embedding(9, self.embedding_dim[3])
        # 0-1
        self.embedding_bridge = nn.Embedding(2, self.embedding_dim[4])
        # 0-16
        self.embedding_endpoint_u = nn.Embedding(17, self.embedding_dim[5])
        self.embedding_endpoint_v = nn.Embedding(17, self.embedding_dim[6])
        # self.linearq = nn.Linear(self.total_embed_dim, self.attention_dim)
        # self.linearx = nn.Linear(self.total_embed_dim, self.attention_dim)
        self.attention_dim = self.total_embed_dim
        self.selfattn = nn.MultiheadAttention(embed_dim= self.attention_dim, num_heads= self.num_heads)
        self.norm = LayerNorm(self.attention_dim )
        self.feed_forward = PositionwiseFeedForward(self.attention_dim)
        self.linear = nn.Linear(self.attention_dim,self.output_dimension)
        self.activate = nn.Softplus()
        self.middleOfTheWindow = window_size//2
Exemplo n.º 17
0
def main_node2vec(data):
    torch.backends.cudnn.deterministic = True
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    num_nodes = data.x.size(0)
    model = Node2Vec(num_nodes,
                     embedding_dim=64,
                     walk_length=10,
                     context_size=10,
                     walks_per_node=10)
    model = model.to(device)
    data = data.to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.005,
                                 weight_decay=5e-4)

    for i in range(100):
        model.train()
        optimizer.zero_grad()
        loss = model.loss(data.edge_index)
        loss.backward()
        optimizer.step()
    node_index = torch.tensor([i for i in range(num_nodes)]).to(device)
    return model.forward(node_index).cpu()
Exemplo n.º 18
0
    def train(self, data):
        if data.has_features == False:
            embedding_dim = 128

            embedder = Node2Vec(
                data.x.size()[0],  # Num nodes
                embedding_dim,  # Embedding dimesion
                7,  # Walk len  
                3,  # Context size 
            )

            # First train embedder
            # Use a higher learning rate, bc this part is
            # meant to be kind of "quick and dirty"
            embedder = n2v_trainer(data, embedder, self.device, lr=0.1)

            # Then use n2v embeddings as features
            data.x = embedder.embedding.weight

        model = BenSAGE(features_num=data.x.size()[1],
                        num_class=int(max(data.y)) + 1,
                        num_layers=2)

        return generic_training_loop(data, model, self.device)
Exemplo n.º 19
0
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import Node2Vec

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
dataset = Planetoid(path, dataset)
data = dataset[0]
loader = DataLoader(torch.arange(data.num_nodes), batch_size=128, shuffle=True)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Node2Vec(data.num_nodes,
                 embedding_dim=128,
                 walk_length=20,
                 context_size=10,
                 walks_per_node=10)
model, data = model.to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


def train():
    model.train()
    total_loss = 0
    for subset in loader:
        optimizer.zero_grad()
        loss = model.loss(data.edge_index, subset.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
Exemplo n.º 20
0
import onnxruntime
from sklearn.manifold import TSNE
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import Node2Vec

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
dataset = Planetoid(path, dataset)
data = dataset[0]

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'
model = Node2Vec(data.edge_index,
                 embedding_dim=128,
                 walk_length=20,
                 context_size=10,
                 walks_per_node=10,
                 num_negative_samples=1,
                 sparse=True).to(device)

loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.01)


def export_to_onnx_pt(model, data, use_dynamic=True):
    input_names = ["input_1"]
    output_names = ["output1"]
    batch = torch.arange(data.num_nodes)
    if use_dynamic:
        torch_out = torch.onnx.export(
            model,  # model being run
Exemplo n.º 21
0
def node2vec(fp, PARAMS):
    """[generate node2vec embedding]

    Args:
        fp ([string]): [the file path of the root of the data]
        PARAMS ([dict]): [the parameters of the node2vec model,
                        KEYS: {
                                GRAPH_NAME: the name of the graph file
                                EMBEDDING_DIM: dimension of embedding, 
                                WALK_LENGTH: random walk length, 
                                CONTEXT_SIZE: context size, 
                                WALKS_PER_NODE: number of walks per node, 
                                P: P parameter of node2vec, 
                                Q: Q parameter of node2vec, 
                                LEARNING_RATE: learning rate, 
                                BATCH_SIZE: batch size of each batch, 
                                NUM_EPOCH: number of epoch to be trained,
                                CUDA: use GPU
                                }]
    Returns:
        [np.array]: [the numpy array format of embedding]
    """
    N = io.loadmat(osp.join(fp, 'interim', 'graph', PARAMS['GRAPH_NAME']))['N']
    edge_idx, x = from_scipy_sparse_matrix(N)
    post_indx = io.loadmat(
        osp.join(fp, 'interim', 'graph', PARAMS['GRAPH_NAME']))['post_indx']
    post_indx = post_indx.reshape(-1, )
    data = Data(x=x, edge_index=edge_idx)
    if PARAMS['CUDA']:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
    else:
        device = 'cpu'
    model = Node2Vec(data.edge_index,
                     embedding_dim=PARAMS['EMBEDDING_DIM'],
                     walk_length=PARAMS['WALK_LENGTH'],
                     context_size=PARAMS['CONTEXT_SIZE'],
                     walks_per_node=PARAMS['WALKS_PER_NODE'],
                     p=PARAMS['P'],
                     q=PARAMS['Q'],
                     sparse=True).to(device)
    loader = model.loader(batch_size=PARAMS['BATCH_SIZE'],
                          shuffle=True,
                          num_workers=8)
    optimizer = torch.optim.SparseAdam(model.parameters(),
                                       lr=PARAMS['LEARNING_RATE'])

    def train():
        model.train()
        total_loss = 0
        for pos_rw, neg_rw in tqdm(loader):
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        return total_loss / len(loader)

    print('number of nodes to be embedded {}'.format(len(post_indx)))
    print('Start Node2vec Embedding Process with Following Parameters:')
    print(PARAMS)
    losses = []
    for epoch in range(1, PARAMS['NUM_EPOCH'] + 1):
        loss = train()
        losses.append(loss)
        print('Epoch: {:02d}, Node2vec Loss: {:.4f}'.format(epoch, loss))
    model.eval()
    with torch.no_grad():
        z = model()
    if not os.path.exists(os.path.join(fp, 'processed', 'node2vec')):
        os.makedirs(os.path.join(fp, 'processed', 'node2vec'), exist_ok=True)
    with open(
            osp.join(fp, 'processed', 'node2vec',
                     PARAMS['EMBEDDING_NAME'] + 'log.json'), 'w') as f:
        json.dump({'loss': losses}, f)
    z = z.detach().cpu().numpy()[post_indx, :]
    np.save(osp.join(fp, 'processed', 'node2vec', PARAMS['EMBEDDING_NAME']), z)
    print('successfully saved embedding')
    return z
Exemplo n.º 22
0
def main():
    parser = argparse.ArgumentParser(description='OGBN-Papers100M (Node2Vec)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--embedding_dim', type=int, default=128)
    parser.add_argument('--walk_length', type=int, default=20)
    parser.add_argument('--context_size', type=int, default=10)
    parser.add_argument('--walks_per_node', type=int, default=10)
    parser.add_argument('--batch_size', type=int, default=256)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=2)
    parser.add_argument('--log_steps', type=int, default=10)
    parser.add_argument('--dropedge_rate', type=float, default=0.4)
    args = parser.parse_args()

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-papers100M')

    split_idx = dataset.get_idx_split()

    data = dataset[0]

    # if args.add_inverse:

    print('Making the graph undirected.')
    ### Randomly drop some edges to avoid segmentation fault
    data.edge_index, _ = dropout_adj(data.edge_index,
                                     p=args.dropedge_rate,
                                     num_nodes=data.num_nodes)
    data.edge_index = to_undirected(data.edge_index, data.num_nodes)
    filename = 'data_dict.pt'

    print(data)

    model = Node2Vec(data.edge_index,
                     args.embedding_dim,
                     args.walk_length,
                     args.context_size,
                     args.walks_per_node,
                     sparse=True).to(device)

    loader = model.loader(batch_size=args.batch_size,
                          shuffle=True,
                          num_workers=4)
    optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=args.lr)

    print('Saving data_dict before training...')
    save_data_dict(model, data, split_idx, save_file=filename)

    model.train()
    for epoch in range(1, args.epochs + 1):
        for i, (pos_rw, neg_rw) in tqdm(enumerate(loader)):
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()

            if (i + 1) % args.log_steps == 0:
                print(f'Epoch: {epoch:02d}, Step: {i+1:03d}/{len(loader)}, '
                      f'Loss: {loss:.4f}')

            if (i + 1) % 1000 == 0:  # Save model every 1000 steps.
                print('Saving data dict...')
                save_data_dict(model, data, split_idx, save_file=filename)

        print('Saving data dict...')
        save_data_dict(model, data, split_idx, save_file=filename)
Exemplo n.º 23
0
def main(_):
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)
    start = time.time()
    outfile = os.path.join(FLAGS.output_dir,
                           '%s_%i_%i' % (FLAGS.dataset, FLAGS.dim, FLAGS.C))
    if FLAGS.run:
        outfile += '_' + FLAGS.run
    device = 'cuda'

    main_directory = FLAGS.datasets_dir
    main_directory = os.path.expanduser(main_directory)
    dataset_dir = os.path.join(main_directory, FLAGS.dataset)
    if not os.path.exists(dataset_dir):
        print('Dataset not found ' + FLAGS.dataset)
        print(', '.join(os.listdir(dataset_dir)))
        exit(-1)
    graph_file = os.path.join(dataset_dir, 'train.txt.npy')
    edges = np.load(graph_file)
    pyg_edges = np.concatenate([edges, edges[:, ::-1]], axis=0).T
    pyg_edges = torch.from_numpy(np.array(pyg_edges, dtype='int64'))
    test_neg_file = os.path.join(dataset_dir, 'test.neg.txt.npy')
    test_neg_arr = np.load(open(test_neg_file, 'rb'))
    test_pos_file = os.path.join(dataset_dir, 'test.txt.npy')
    test_pos_arr = np.load(open(test_pos_file, 'rb'))

    model = Node2Vec(pyg_edges,
                     embedding_dim=FLAGS.dim,
                     walk_length=FLAGS.C,
                     context_size=FLAGS.C,
                     walks_per_node=20,
                     num_negative_samples=1,
                     sparse=True).to(device)

    loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
    optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)

    def train():
        model.train()
        total_loss = 0
        for pos_rw, neg_rw in loader:
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        return total_loss / len(loader)

    def test():
        model.eval()
        embeds = model()
        npe = embeds.detach().cpu().numpy()
        test_scores = (npe[test_pos_arr[:, 0]] *
                       npe[test_pos_arr[:, 1]]).sum(-1)
        test_neg_scores = (npe[test_neg_arr[:, 0]] *
                           npe[test_neg_arr[:, 1]]).sum(-1)

        test_y = [0] * len(test_neg_scores) + [1] * len(test_scores)
        test_y_pred = np.concatenate([test_neg_scores, test_scores], 0)
        test_accuracy = metrics.roc_auc_score(test_y, test_y_pred)
        return test_accuracy

    header = 'epoch,time,accuracy'
    with open(outfile, 'w') as fout:
        print('writing to ' + outfile)
        fout.write(header + '\n')
        print(header)
        for epoch in range(1, 100):  # Over 100, it starts overfitting.
            loss = train()
            acc = test()
            line = '%i,%f,%f' % (epoch, time.time() - start, acc)
            print(line)
            fout.write(line + '\n')
Exemplo n.º 24
0
# We need to add loop edges for vertices with no outgoing edges.
# https://github.com/rusty1s/pytorch_cluster/issues/45
index, counts = np.unique(es.src, return_counts=True)
degree = np.zeros(num_nodes)
degree[index] = counts
deadends = (degree == 0).nonzero()[0]
# Also convert from uint32 to int64 for PyTorch.
srcs = np.concatenate((es.src, deadends)).astype('int64')
dsts = np.concatenate((es.dst, deadends)).astype('int64')

# Configure Node2Vec.
edges = torch.tensor([srcs, dsts]).to(device)
model = Node2Vec(edges,
                 num_nodes=num_nodes,
                 embedding_dim=op.params['dimensions'],
                 walk_length=op.params['walkLength'],
                 context_size=op.params['contextSize'],
                 walks_per_node=op.params['walksPerNode']).to(device)
loader = model.loader(batch_size=128, shuffle=True)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Train model.
for epoch in range(op.params['iterations']):
    model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw, neg_rw)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
Exemplo n.º 25
0
        try:
            id2 = node2id[node2]
        except:
            id2 = len(node2id)
            node2id[node2] = id2
        
        edge_list.add((id1, id2))
        # edge_list.add((id2, id1))
    except:
        pass

edge_index = torch.tensor(np.array(edge_list).T, dtype=torch.long)

data = Data(edge_index=edge_index)

model = Node2Vec(data.edge_index, embedding_dim=128, walk_length=4,
            context_size=2, walks_per_node=2, sparse=True).to(device)

loader = model.loader(batch_size=2000, shuffle=True, num_workers=12)
optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.01)

for epoch in range(EPOCHS):
    model.train()

    # total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        # total_loss += loss.item()
    # total_loss = total_loss / len(loader)
Exemplo n.º 26
0
 def __init__(self, A):
     self.n2v = Node2Vec(len(A), **params)
     self.svm = SVC()
Exemplo n.º 27
0
def run_model(dataset, conf):
    # ## 1) Build Table graph
    # ### Tables tokenization
    tokenized_tables, vocabulary, cell_dict, reversed_dictionary = corpus_tuple = create_corpus(
        dataset, include_attr=conf["add_attr"])
    if conf["shuffle_vocab"] == True:
        shuffled_vocab = shuffle_vocabulary(vocabulary)
    else:
        shuffled_vocab = None

    nodes = build_node_features(vocabulary)
    row_edges_index, row_edges_weights = build_graph_edges(
        tokenized_tables,
        s_vocab=shuffled_vocab,
        sample_frac=conf["row_edges_sample"],
        columns=False)
    col_edges_index, col_edges_weights = build_graph_edges(
        tokenized_tables,
        s_vocab=shuffled_vocab,
        sample_frac=conf["column_edges_sample"],
        columns=True)

    edges = torch.cat((row_edges_index, col_edges_index), dim=1)
    weights = torch.cat((row_edges_weights, col_edges_weights), dim=0)
    graph_data = Data(x=nodes, edge_index=edges, edge_attr=weights)

    # ## 2 ) Run Table Auto-Encoder Model:
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    loader = DataLoader(torch.arange(graph_data.num_nodes),
                        batch_size=128,
                        shuffle=True)
    graph_data = graph_data.to(device)

    def train():
        model.train()
        total_loss = 0
        for subset in loader:
            optimizer.zero_grad()
            loss = model.loss(graph_data.edge_index, subset.to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        return total_loss / len(loader)

    model = Node2Vec(graph_data.num_nodes,
                     embedding_dim=conf["vector_size"],
                     walk_length=conf["n2v_walk_length"],
                     context_size=conf["n2v_context_size"],
                     walks_per_node=conf["n2v_walks_per_node"])
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    losses = []
    for epoch in range(conf["epoch_num"]):
        loss = train()
        print('Epoch: {:02d}, Loss: {:.4f}'.format(epoch, loss))
        losses.append(float(loss))
    # ### 3) Extract the latent cell vectors, generate table vectors:
    model.eval()
    with torch.no_grad():
        z = model(torch.arange(graph_data.num_nodes, device=device))
        cell_vectors = z.cpu().numpy()
    vec_list = generate_table_vectors(cell_vectors,
                                      tokenized_tables,
                                      s_vocab=shuffled_vocab)

    # ## 3) Evaluate the model
    result_score = evaluate_model(dataset, vec_list, k=5)
    return cell_vectors, vec_list, losses, result_score
Exemplo n.º 28
0
    def train(self, data, start_time, time_budget):
        ADD_N2V = False
        ADD_GRAPH_FEATS = True
        MIN_TIME = 3  # Stop training loop early if less than this many seconds remain

        # Graph data
        avg_degree = degree(data.edge_index[0], data.x.size()[0]).mean()

        # Add n2v embeddings to features if there are an order of magnitude more
        # edges than there are features
        if int(log(data.x.size()[0], 10)) < int(
                log(data.edge_index.size()[1], 10)):
            ADD_N2V = True

        # Hyperparamters
        train_epochs = 1000
        num_layers = 2  # gcn layers

        # Different algorithms for the number of hidden dims for each
        if data.has_features:
            hidden = min([int(max(data.y) + 1)**2, 128])
            attn_heads = 'N/a'
        else:
            attn_heads = min([int(log(max(data.y) + 1)) + 2, 4])
            hidden = (min([int(max(data.y) + 1)**2, 32]) // attn_heads) + 1

        early_stopping = True
        val_patience = 100  # how long validation loss can increase before we stop

        # Use heuristic-based hyperparams if too many edges to handle
        simplified = True if data.edge_index.size()[1] > 1e6 else False

        print('Hidden dimensions: %d' % hidden)
        print('Attention heads: %s' % str(attn_heads))
        if not data.has_features or ADD_N2V:
            # Requires at least len(class) dimensions, but give it a little more
            embedding_dim = 128 + int(avg_degree**(1 / 2))

            # The larger the avg degree, the less distant walks matter
            # Of course, a minimum is still important
            context_size = int(log(data.edge_index.size()[1]) / avg_degree)
            context_size = context_size if context_size >= 3 else 3

            # We should look at at least 1 context per walk
            walk_len = context_size + 1

            print('Embedding dim: %d\tWalk Len: %d\tContext size: %d' %
                  (embedding_dim, walk_len, context_size))

            embedder = Node2Vec(
                data.x.size()[0],  # Num nodes
                embedding_dim,  # Embedding dimesion
                walk_len,  # Walk len  
                context_size,  # Context size 
                num_negative_samples=context_size**2)

            # First, train embedder
            # Use a higher learning rate, bc this part is
            # meant to be kind of "quick and dirty"
            embedder = self.n2v_trainer(
                data,
                embedder,
                lr=0.05,
                patience=50  # lower patience when time is important
            )

            # Training moves data to GPU. Have to put it back before manipulating
            # it further.
            data = data.to('cpu')
            embedder = embedder.to('cpu')

            if data.has_features and ADD_N2V:
                data.x = torch.cat(
                    (self.var_thresh(data.x), embedder.embedding.weight),
                    axis=1)
            else:
                # Then use n2v embeddings as features
                data.x = embedder.embedding.weight

            # Remove reference to embedder to free up memory on GPU
            del embedder
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

        else:
            print('Num feature before: %d' % data.x.size()[1])
            data.x = self.var_thresh(data.x)
            print('Num features after: %d' % data.x.size()[1])

        if ADD_GRAPH_FEATS:
            print('Num feature before: %d' % data.x.size()[1])
            data.x = torch.cat((data.x, data.graph_data), axis=1)
            print('Num features after: %d' % data.x.size()[1])

        if data.has_features:
            print("Using GCN")

            # Make sure we actually need this.. Only if it crashes on test data
            # Just use heuristics-based
            #if simplified:
            #    params = {
            #        'features_num': data.x.size()[1],
            #        'num_class': int(max(data.y)) + 1,
            #        'hidden': hidden
            #    }

            # Grid search to find best
            #else:
            params = self.grid_search(data,
                                      hidden,
                                      h_dist=10,
                                      epochs=50,
                                      h_step=1)
            model = GCN(**params)

        else:
            print("Using GAT")

            # Just use heuristics based
            if simplified:
                params = {
                    'features_num': data.x.size()[1],
                    'num_class': int(max(data.y)) + 1,
                    'hidden': hidden,
                    'heads': attn_heads,
                    'dropout': 0.7  # Increase dropout if complex graph
                }

            # Do grid search for best params
            else:
                params = self.grid_search(data,
                                          hidden,
                                          attn_heads=attn_heads,
                                          h_dist=10,
                                          h_step=1,
                                          a_dist=1,
                                          a_step=1,
                                          epochs=25)

            model = GAT(**params)

        # Move data to compute device
        model = model.to(self.device)
        data = data.to(self.device)

        # Configure optimizer
        lr = 0.005
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=1e-5)

        # Main training loop
        min_loss = float('inf')
        val_loss_min = 1000
        train_loss_min = 1000
        val_increase = 0
        stopped_early = False
        state_dict_save = 'checkpoint.model'

        # Fraction of patience before restarting with lower lr from best model
        FRUSTRATION = 1 if data.has_features else 25
        # Number of times to retry training from prev best with lower lr
        NUM_REDOS = 15
        # Smallest value we allow loss to be, usually 5e-6
        MIN_LOSS = 5e-6
        # How much we decrease loss when frustrated
        LOSS_DEC_INC = 1.25

        # What percent validation loss increase we're willing to accept if training loss
        # goes down by more than that much. The hope is this balances for overfitting?
        # E.g., an epoch that increases val loss from 1 to 1.01 but decreases train loss
        # from 1 to 0.98 is considered the best model
        GOOD_ENOUGH = 1.00025
        BECOME_FRUSTRATED = False
        lr_decays = 0

        # LR must decay at least this many times before GOOD_ENOUGH training
        # is activated. This way we're sure it's in a local minimum before we
        # allow it to stray
        GOOD_ENOUGH_THRESH = 3

        epoch = 0
        while (True):
            train_start = time.time()

            model.train()
            optimizer.zero_grad()
            loss = F.nll_loss(model(data)[data.train_mask],
                              data.y[data.train_mask],
                              weight=data.weighted_loss)
            loss.backward()
            optimizer.step()
            train_loss = loss.item()

            # calculate loss on validation set
            model.eval()
            loss = F.nll_loss(model(data)[data.val_mask],
                              data.y[data.val_mask],
                              weight=data.weighted_loss)

            val_loss = loss.item()

            if ((val_loss > val_loss_min and early_stopping)
                    and not (BECOME_FRUSTRATED
                             and val_loss <= GOOD_ENOUGH * val_loss_min
                             and train_loss * GOOD_ENOUGH <= train_loss_min)):
                val_increase += 1
            else:
                print("===New Minimum validation loss===")
                print('[%d] Train loss: %.3f   Val Loss: %.3f' %
                      (epoch, train_loss, val_loss))
                val_loss_min = val_loss
                train_loss_min = train_loss
                val_increase = 0
                redos = 1
                torch.save(model.state_dict(), state_dict_save)

            # Want to make sure we have the amount of time it takes
            # to loop and however much extra we need later
            time_cutoff = MIN_TIME + (time.time() - train_start)

            if (val_increase > val_patience
                    or time_budget - (time.time() - start_time) < time_cutoff):

                print("Early stopping!")
                stopped_early = True
                break

            # Lower learning rate and start from prev best after model becomes
            # frustrated with poor progress
            if val_increase > val_patience // FRUSTRATION and lr > MIN_LOSS:
                if redos % NUM_REDOS == 0:
                    lr /= LOSS_DEC_INC
                    lr = lr if lr > MIN_LOSS else MIN_LOSS  # make sure not less than 5e-6
                    print('LR decay: New lr: %.6f' % lr)
                    for g in optimizer.param_groups:
                        g['lr'] = lr

                    lr_decays += 1
                    if lr_decays >= GOOD_ENOUGH_THRESH:
                        BECOME_FRUSTRATED = True

                    model.load_state_dict(torch.load(state_dict_save))

                redos += 1

            # Use simple LR decay for data w features
            elif data.has_features and epoch > 0 and epoch % 10 == 0 and lr > 5e-6:
                lr -= 0.00025
                lr = lr if lr > 5e-6 else 5e-6  # make sure not less than 5e-6
                print('LR decay: New lr: %.6f' % lr)
                for g in optimizer.param_groups:
                    g['lr'] = lr

            epoch += 1

        if stopped_early:
            print("Reloading best parameters!")

            # State dict saved to CPU so have to load from there(?)
            model.load_state_dict(torch.load(state_dict_save))

        return model