def process(self): data_list = [] indices_train = [] indices_val = [] indices_test = [] infile = open("test_al_50.index", "r") for line in infile: indices_test = line.split(",") indices_test = [int(i) for i in indices_test] infile = open("val_al_50.index", "r") for line in infile: indices_val = line.split(",") indices_val = [int(i) for i in indices_val] infile = open("train_al_50.index", "r") for line in infile: indices_train = line.split(",") indices_train = [int(i) for i in indices_train] targets = dp.get_dataset("alchemy_full", multigregression=True) tmp1 = targets[indices_train].tolist() tmp2 = targets[indices_val].tolist() tmp3 = targets[indices_test].tolist() targets = tmp1 targets.extend(tmp2) targets.extend(tmp3) node_labels = pre.get_all_node_labels_alchem_1(True, True, indices_train, indices_val, indices_test) edge_labels = pre.get_all_edge_labels_alchem_1(True, True, indices_train, indices_val, indices_test) matrices = pre.get_all_matrices_1("alchemy_full", indices_train) matrices.extend(pre.get_all_matrices_1("alchemy_full", indices_val)) matrices.extend(pre.get_all_matrices_1("alchemy_full", indices_test)) for i, m in enumerate(matrices): data = Data() data.edge_index = torch.tensor(matrices[i]).t().contiguous() one_hot = np.eye(6)[node_labels[i]] data.x = torch.from_numpy(one_hot).to(torch.float) one_hot = np.eye(4)[edge_labels[i]] data.edge_attr = torch.from_numpy(one_hot).to(torch.float) data.y = torch.from_numpy(np.array([targets[i]])).to(torch.float) print(data.y.size()) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def process(self): data_list = [] indices_train = [] indices_val = [] indices_test = [] infile = open("test.index.txt", "r") for line in infile: indices_test = line.split(",") indices_test = [int(i) for i in indices_test] infile = open("val.index.txt", "r") for line in infile: indices_val = line.split(",") indices_val = [int(i) for i in indices_val] infile = open("train.index.txt", "r") for line in infile: indices_train = line.split(",") indices_train = [int(i) for i in indices_train] dp.get_dataset("ZINC_train") dp.get_dataset("ZINC_test") dp.get_dataset("ZINC_val") node_labels = pre.get_all_node_labels_ZINC_1(True, True, indices_train, indices_val, indices_test) edge_labels = pre.get_all_edge_labels_ZINC_1(True, True, indices_train, indices_val, indices_test) targets = pre.read_targets("ZINC_train", indices_train) targets.extend(pre.read_targets("ZINC_val", indices_val)) targets.extend(pre.read_targets("ZINC_test", indices_test)) matrices = pre.get_all_matrices_1("ZINC_train", indices_train) matrices.extend(pre.get_all_matrices_1("ZINC_val", indices_val)) matrices.extend(pre.get_all_matrices_1("ZINC_test", indices_test)) for i, m in enumerate(matrices): data = Data() data.edge_index = torch.tensor(matrices[i]).t().contiguous() one_hot = np.eye(21)[node_labels[i]] data.x = torch.from_numpy(one_hot).to(torch.float) one_hot = np.eye(21)[edge_labels[i]] data.edge_attr = torch.from_numpy(one_hot).to(torch.float) data.y = data.y = torch.from_numpy(np.array([targets[i] ])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def process(self): data_list = [] targets = dp.get_dataset("ZINC_full", regression=True) targets = targets.tolist() node_labels = pre.get_all_node_labels_1("ZINC_full", True) edge_labels = pre.get_all_edge_labels_1("ZINC_full") matrices = pre.get_all_matrices_1("ZINC_full", list(range(0, 249456))) for i, m in enumerate(matrices): data = Data() data.edge_index = torch.tensor(matrices[i]).t().contiguous() one_hot = np.eye(28)[node_labels[i]] data.x = torch.from_numpy(one_hot).to(torch.float) one_hot = np.eye(3)[edge_labels[i]] data.edge_attr = torch.from_numpy(one_hot).to(torch.float) data.y = torch.from_numpy(np.array([targets[i]])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])