Python Data.test_mask примеры использования

Язык программирования: Python

Пространство имен/Пакет: cogdl.data

Класс/Тип: Data

Метод/Функция: test_mask

Примеров на hotexamples.com: 7

Python Data.test_mask - 7 примеров найдено. Это лучшие примеры Python кода для cogdl.data.Data.test_mask, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Data(30)

val_mask(7)

train_mask(7)

test_mask(7)

x(5)

adj(4)

valid_target(4)

valid_node(4)

edge_index(4)

train_target(4)

train_node(4)

test_target(4)

test_node(4)

y(2)

from_pyg_data(2)

edge_attr(2)

species_id(1)

test_data(1)

pos(1)

num_nodes(1)

norm_loss(1)

train_data(1)

norm_aggr(1)

go_target_pretrain(1)

go_target_downstream(1)

valid_data(1)

center_node_idx(1)

apply(1)

subgraph(1)

Пример #1

Показать файл

Файл: cora_data.py Проект: xssstory/cogdl

    def __init__(self, args=None):
        dataset = "jknet_cora"
        path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        if not osp.exists(path):
            os.makedirs(path)
        super(CoraDataset, self).__init__(path)
        with open(self.processed_paths[0], 'rb') as fin:
            load_data = pickle.load(fin)
        self.num_nodes = load_data['node_num']

        data = Data()
        data.x = load_data['xs']
        data.y = load_data['ys']

        train_size = int(self.num_nodes * 0.8)
        train_mask = np.zeros((self.num_nodes, ), dtype=bool)
        train_idx = np.random.choice(np.arange(self.num_nodes), size=train_size, replace=False)
        train_mask[train_idx] = True
        test_mask = np.ones((self.num_nodes, ), dtype=bool)
        test_mask[train_idx] = False
        val_mask = test_mask

        edges = load_data['edges']
        edges = np.array(edges, dtype=int).transpose((1, 0))

        data.edge_index = torch.from_numpy(edges)
        data.train_mask = torch.from_numpy(train_mask)
        data.test_mask = torch.from_numpy(test_mask)
        data.val_mask = torch.from_numpy(val_mask)
        data.x = torch.Tensor(data.x)
        data.y = torch.LongTensor(data.y)

        self.data = data
        self.num_classes = torch.max(self.data.y).item() + 1

Пример #2

Показать файл

Файл: kg_data.py Проект: qazcy1983/cogdl

def read_triplet_data(folder):
    filenames = ["train2id.txt", "valid2id.txt", "test2id.txt"]
    count = 0
    edge_index = []
    edge_attr = []
    count_list = []
    for filename in filenames:
        with open(osp.join(folder, filename), "r") as f:
            num = int(f.readline().strip())
            for line in f:
                items = line.strip().split()
                edge_index.append([int(items[0]), int(items[1])])
                edge_attr.append(int(items[2]))
                count += 1
            count_list.append(count)

    edge_index = torch.LongTensor(edge_index).t()
    edge_attr = torch.LongTensor(edge_attr)
    data = Data()
    data.edge_index = edge_index
    data.edge_attr = edge_attr

    def generate_mask(start, end):
        mask = torch.BoolTensor(count)
        mask[:] = False
        mask[start:end] = True
        return mask

    data.train_mask = generate_mask(0, count_list[0])
    data.val_mask = generate_mask(count_list[0], count_list[1])
    data.test_mask = generate_mask(count_list[1], count_list[2])
    return data

Пример #3

Показать файл

    def get_subgraph(self, phase, require_norm=True):
        """
        Generate one minibatch for model. In the 'train' mode, one minibatch corresponds
        to one subgraph of the training graph. In the 'valid' or 'test' mode, one batch
        corresponds to the full graph (i.e., full-batch rather than minibatch evaluation
        for validation / test sets).

        Inputs:
            mode                str, can be 'train', 'valid', 'test'
            require_norm        boolean

        Outputs:
            data                Data object, modeling the sampled subgraph
            data.norm_aggr      aggregation normalization
            data.norm_loss      normalization normalization
        """
        if phase in ['val', 'test']:
            node_subgraph = np.arange(self.data.num_nodes)
            data = self.data
            if require_norm:
                data.norm_aggr = torch.ones(self.num_edges)
                data.norm_loss = self.norm_loss_test
        else:
            if len(self.subgraphs_nodes) == 0:
                self.gen_subgraph()

            node_subgraph = self.subgraphs_nodes.pop()
            edge_subgraph = self.subgraphs_edge_index.pop()
            num_nodes_subgraph = node_subgraph.size
            adj = sp.csr_matrix(
                (self.subgraphs_data.pop(), self.subgraphs_indices.pop(),
                 self.subgraphs_indptr.pop()),
                shape=(num_nodes_subgraph, num_nodes_subgraph))

            if require_norm:
                adj.data[:] = self.norm_aggr_train[edge_subgraph][:]
                #normalization
                D = adj.sum(1).flatten()
                norm_diag = sp.dia_matrix((1 / D, 0), shape=adj.shape)
                adj = norm_diag.dot(adj)
                adj.sort_indices()

            adj = adj.tocoo()
            data = Data(
                self.data.x[node_subgraph],
                torch.LongTensor(np.vstack(
                    (adj.row, adj.col))), None if self.data.edge_attr is None
                else self.data.edge_attr[edge_subgraph],
                self.data.y[node_subgraph], None
                if self.data.pos is None else self.data.pos[node_subgraph])

            if require_norm:
                data.norm_aggr = torch.FloatTensor(adj.data)
                data.norm_loss = self.norm_loss_train[node_subgraph]
            data.train_mask = self.data.train_mask[node_subgraph]
            data.val_mask = self.data.val_mask[node_subgraph]
            data.test_mask = self.data.test_mask[node_subgraph]

        return data

Пример #4

Показать файл

Файл: planetoid_data.py Проект: asarigun/cogdl

def read_planetoid_data(folder, prefix):
    prefix = prefix.lower()
    names = ["x", "tx", "allx", "y", "ty", "ally", "graph", "test.index"]
    objects = []
    for item in names[:-1]:
        with open(f"{folder}/ind.{prefix}.{item}", "rb") as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding="latin1"))
            else:
                objects.append(pkl.load(f))
    test_index = parse_index_file(f"{folder}/ind.{prefix}.{names[-1]}")
    test_index = torch.Tensor(test_index).long()
    test_index_reorder = test_index.sort()[0]

    x, tx, allx, y, ty, ally, graph = tuple(objects)
    x, tx, allx = tuple(
        [torch.from_numpy(item.todense()).float() for item in [x, tx, allx]])
    y, ty, ally = tuple(
        [torch.from_numpy(item).float() for item in [y, ty, ally]])

    train_index = torch.arange(y.size(0), dtype=torch.long)
    val_index = torch.arange(y.size(0), y.size(0) + 500, dtype=torch.long)

    if prefix.lower() == "citeseer":
        # There are some isolated nodes in the Citeseer graph, resulting in
        # none consecutive test indices. We need to identify them and add them
        # as zero vectors to `tx` and `ty`.
        len_test_indices = (test_index.max() - test_index.min()).item() + 1

        tx_ext = torch.zeros(len_test_indices, tx.size(1))
        tx_ext[test_index_reorder - test_index.min(), :] = tx
        ty_ext = torch.zeros(len_test_indices, ty.size(1))
        ty_ext[test_index_reorder - test_index.min(), :] = ty

        tx, ty = tx_ext, ty_ext

    x = torch.cat([allx, tx], dim=0).float()
    y = torch.cat([ally, ty], dim=0).max(dim=1)[1].long()

    x[test_index] = x[test_index_reorder]
    y[test_index] = y[test_index_reorder]

    train_mask = index_to_mask(train_index, size=y.size(0))
    val_mask = index_to_mask(val_index, size=y.size(0))
    test_mask = index_to_mask(test_index, size=y.size(0))

    edge_index = edge_index_from_dict(graph, num_nodes=y.size(0))

    data = Data(x=x, edge_index=edge_index, y=y)
    data.train_mask = train_mask
    data.val_mask = val_mask
    data.test_mask = test_mask

    return data

Пример #5

Показать файл

Файл: kg_data.py Проект: zrt/cogdl

def read_triplet_data(folder):
    filenames = ["train2id.txt", "valid2id.txt", "test2id.txt"]
    count = 0
    edge_index = []
    edge_attr = []
    count_list = []
    triples = []
    num_entities = 0
    num_relations = 0
    entity_dic = {}
    relation_dic = {}
    for filename in filenames:
        with open(osp.join(folder, filename), "r") as f:
            _ = int(f.readline().strip())
            if "train" in filename:
                train_start_idx = len(triples)
            elif "valid" in filename:
                valid_start_idx = len(triples)
            elif "test" in filename:
                test_start_idx = len(triples)
            for line in f:
                items = line.strip().split()
                edge_index.append([int(items[0]), int(items[1])])
                edge_attr.append(int(items[2]))
                triples.append((int(items[0]), int(items[2]), int(items[1])))
                if items[0] not in entity_dic:
                    entity_dic[items[0]] = num_entities
                    num_entities += 1
                if items[1] not in entity_dic:
                    entity_dic[items[1]] = num_entities
                    num_entities += 1
                if items[2] not in relation_dic:
                    relation_dic[items[2]] = num_relations
                    num_relations += 1
                count += 1
            count_list.append(count)

    edge_index = torch.LongTensor(edge_index).t()
    edge_attr = torch.LongTensor(edge_attr)
    data = Data()
    data.edge_index = edge_index
    data.edge_attr = edge_attr

    def generate_mask(start, end):
        mask = torch.BoolTensor(count)
        mask[:] = False
        mask[start:end] = True
        return mask

    data.train_mask = generate_mask(0, count_list[0])
    data.val_mask = generate_mask(count_list[0], count_list[1])
    data.test_mask = generate_mask(count_list[1], count_list[2])
    return data, triples, train_start_idx, valid_start_idx, test_start_idx, num_entities, num_relations

Пример #6

Показать файл

    def process(self):
        data = np.load(osp.join(self.raw_dir, "reddit_data.npz"))
        x = torch.from_numpy(data["feature"]).to(torch.float)
        y = torch.from_numpy(data["label"]).to(torch.long)
        split = torch.from_numpy(data["node_types"])

        adj = sp.load_npz(osp.join(self.raw_dir, "reddit_graph.npz"))
        row = torch.from_numpy(adj.row).to(torch.long)
        col = torch.from_numpy(adj.col).to(torch.long)
        edge_index = torch.stack([row, col], dim=0)
        edge_index, _ = coalesce(edge_index, None, x.size(0), x.size(0))

        data = Data(x=x, edge_index=edge_index, y=y)
        data.train_mask = split == 1
        data.val_mask = split == 2
        data.test_mask = split == 3

        torch.save(self.collate([data]), self.processed_paths[0])

Пример #7

Показать файл

def read_planetoid_data(folder, prefix):
    names = ['x', 'tx', 'allx', 'y', 'ty', 'ally', 'graph', 'test.index']
    items = [read_file(folder, prefix, name) for name in names]
    x, tx, allx, y, ty, ally, graph, test_index = items
    train_index = torch.arange(y.size(0), dtype=torch.long)
    val_index = torch.arange(y.size(0), y.size(0) + 500, dtype=torch.long)
    sorted_test_index = test_index.sort()[0]

    if prefix.lower() == 'citeseer':
        # There are some isolated nodes in the Citeseer graph, resulting in
        # none consecutive test indices. We need to identify them and add them
        # as zero vectors to `tx` and `ty`.
        len_test_indices = (test_index.max() - test_index.min()).item() + 1

        tx_ext = torch.zeros(len_test_indices, tx.size(1))
        tx_ext[sorted_test_index - test_index.min(), :] = tx
        ty_ext = torch.zeros(len_test_indices, ty.size(1))
        ty_ext[sorted_test_index - test_index.min(), :] = ty

        tx, ty = tx_ext, ty_ext

    x = torch.cat([allx, tx], dim=0)
    y = torch.cat([ally, ty], dim=0).max(dim=1)[1]

    x[test_index] = x[sorted_test_index]
    y[test_index] = y[sorted_test_index]

    train_mask = sample_mask(train_index, num_nodes=y.size(0))
    val_mask = sample_mask(val_index, num_nodes=y.size(0))
    test_mask = sample_mask(test_index, num_nodes=y.size(0))

    edge_index = edge_index_from_dict(graph, num_nodes=y.size(0))

    data = Data(x=x, edge_index=edge_index, y=y)
    data.train_mask = train_mask
    data.val_mask = val_mask
    data.test_mask = test_mask

    return data