Exemplo n.º 1
0
    def process(self):
        data_list = []

        indices_train = []
        indices_val = []
        indices_test = []

        infile = open("train.index.txt", "r")
        for line in infile:
            indices_train = line.split(",")
            indices_train = [int(i) for i in indices_train]

        infile = open("val.index.txt", "r")
        for line in infile:
            indices_val = line.split(",")
            indices_val = [int(i) for i in indices_val]

        infile = open("test.index.txt", "r")
        for line in infile:
            indices_test = line.split(",")
            indices_test = [int(i) for i in indices_test]

        dp.get_dataset("ZINC_train", regression=True)
        dp.get_dataset("ZINC_test", regression=True)
        dp.get_dataset("ZINC_val", regression=True)

        node_labels = pre.get_all_node_labels_ZINC(True, True, indices_train,
                                                   indices_val, indices_test)

        targets = pre.read_targets("ZINC_train", indices_train)
        targets.extend(pre.read_targets("ZINC_val", indices_val))
        targets.extend(pre.read_targets("ZINC_test", indices_test))

        matrices = pre.get_all_matrices_dwl("ZINC_train", indices_train)
        matrices.extend(pre.get_all_matrices_dwl("ZINC_val", indices_val))
        matrices.extend(pre.get_all_matrices_dwl("ZINC_test", indices_test))

        for i, m in enumerate(matrices):
            edge_index_1_l = torch.tensor(matrices[i][0]).t().contiguous()
            edge_index_1_g = torch.tensor(matrices[i][1]).t().contiguous()
            edge_index_2_l = torch.tensor(matrices[i][2]).t().contiguous()
            edge_index_2_g = torch.tensor(matrices[i][3]).t().contiguous()

            data = Data()
            data.edge_index_1_l = edge_index_1_l
            data.edge_index_1_g = edge_index_1_g
            data.edge_index_2_l = edge_index_2_l
            data.edge_index_2_g = edge_index_2_g

            one_hot = np.eye(445)[node_labels[i]]
            data.x = torch.from_numpy(one_hot).to(torch.float)
            data.y = data.y = torch.from_numpy(np.array([targets[i]
                                                         ])).to(torch.float)

            data_list.append(data)

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])
Exemplo n.º 2
0
    def process(self):
        data_list = []

        indices_train = []
        indices_val = []
        indices_test = []

        indices_train = []
        indices_val = []

        infile = open("train_50.index.txt", "r")
        for line in infile:
            indices_train = line.split(",")
            indices_train = [int(i) for i in indices_train]

        infile = open("val_50.index.txt", "r")
        for line in infile:
            indices_val = line.split(",")
            indices_val = [int(i) for i in indices_val]

        indices_test = list(range(0, 5000))

        dp.get_dataset("ZINC_train")
        dp.get_dataset("ZINC_test")
        dp.get_dataset("ZINC_val")
        node_labels = pre.get_all_node_labels_ZINC_connected(
            True, True, indices_train, indices_val, indices_test)

        targets = pre.read_targets("ZINC_train", indices_train)
        targets.extend(pre.read_targets("ZINC_val", indices_val))
        targets.extend(pre.read_targets("ZINC_test", indices_test))

        matrices = pre.get_all_matrices_connected("ZINC_train", indices_train)
        matrices.extend(pre.get_all_matrices_connected("ZINC_val",
                                                       indices_val))
        matrices.extend(
            pre.get_all_matrices_connected("ZINC_test", indices_test))

        for i, m in enumerate(matrices):
            edge_index_1 = torch.tensor(matrices[i][0]).t().contiguous().long()
            edge_index_2 = torch.tensor(matrices[i][1]).t().contiguous().long()

            data = Data()
            data.edge_index_1 = edge_index_1
            data.edge_index_2 = edge_index_2

            one_hot = np.eye(445)[node_labels[i]]
            data.x = torch.from_numpy(one_hot).to(torch.float)
            data.y = data.y = torch.from_numpy(np.array([targets[i]
                                                         ])).to(torch.float)

            data_list.append(data)

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])
Exemplo n.º 3
0
    def process(self):
        data_list = []

        dp.get_dataset("ZINC_test", regression=True)

        # TODO Change this
        node_labels = pre.get_all_node_labels("ZINC_full", True, True)
        targets = pre.read_targets("ZINC_test", list(range(0, 5000)))

        node_labels_1 = node_labels[220011:225011]
        matrices = pre.get_all_matrices_wl("ZINC_test", list(range(0, 5000)))
        targets_1 = targets
        for i, m in enumerate(matrices):
            edge_index_1 = torch.tensor(matrices[i][0]).t().contiguous()
            edge_index_2 = torch.tensor(matrices[i][1]).t().contiguous()

            data = Data()
            data.edge_index_1 = edge_index_1
            data.edge_index_2 = edge_index_2

            # one_hot = np.eye(492)[node_labels[i]]
            data.x = torch.from_numpy(np.array(node_labels_1[i])).to(
                torch.float)
            data.y = data.y = torch.from_numpy(np.array([targets_1[i]
                                                         ])).to(torch.float)

            data_list.append(data)

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])
Exemplo n.º 4
0
    def process(self):
        data_list = []

        indices_val = list(range(225011, 249456))

        dp.get_dataset("ZINC_val", regression=True)
        node_labels = pre.get_all_node_labels("ZINC_full", True, True)
        targets = pre.read_targets("ZINC_val", list(range(0, 24445)))

        node_labels_1 = node_labels[225011:249456]
        matrices = pre.get_all_matrices_wl("ZINC_val", list(range(0, 24445)))
        targets_1 = targets
        for i, m in enumerate(matrices):
            edge_index_1 = torch.tensor(matrices[i][0]).t().contiguous()
            edge_index_2 = torch.tensor(matrices[i][1]).t().contiguous()

            data = Data()
            data.edge_index_1 = edge_index_1
            data.edge_index_2 = edge_index_2

            data.x = torch.from_numpy(np.array(node_labels_1[i])).to(
                torch.float)
            data.y = data.y = torch.from_numpy(np.array([targets_1[i]
                                                         ])).to(torch.float)

            data_list.append(data)

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])
Exemplo n.º 5
0
    def process(self):
        data_list = []

        indices_train = list(range(0, 220011))
        indices_val = list(range(0, 24445))
        indices_test = list(range(0, 5000))

        dp.get_dataset("ZINC_train", regression=True)
        dp.get_dataset("ZINC_test", regression=True)
        dp.get_dataset("ZINC_val", regression=True)
        node_labels = pre.get_all_node_labels_ZINC(True, True, indices_train,
                                                   indices_val, indices_test)

        targets = pre.read_targets("ZINC_train", indices_train)
        targets.extend(pre.read_targets("ZINC_val", indices_val))
        targets.extend(pre.read_targets("ZINC_test", indices_test))

        node_labels = node_labels[0:50000]
        matrices = pre.get_all_matrices_dwl("ZINC_train", list(range(0,
                                                                     50000)))
        targets = targets[0:50000]

        for i, m in enumerate(matrices):
            edge_index_1_l = torch.tensor(matrices[i][0]).t().contiguous()
            edge_index_1_g = torch.tensor(matrices[i][1]).t().contiguous()
            edge_index_2_l = torch.tensor(matrices[i][2]).t().contiguous()
            edge_index_2_g = torch.tensor(matrices[i][3]).t().contiguous()

            data = Data()
            data.edge_index_1_l = edge_index_1_l
            data.edge_index_1_g = edge_index_1_g
            data.edge_index_2_l = edge_index_2_l
            data.edge_index_2_g = edge_index_2_g

            data.x = torch.from_numpy(np.array(node_labels[i])).to(torch.float)
            data.y = data.y = torch.from_numpy(np.array([targets[i]
                                                         ])).to(torch.float)

            data_list.append(data)

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])