Exemplo n.º 1
0
 def base_model_fit(self):
     self.base_walker = DeepWalker(self.graph, self.args)
     print("\nDoing base random walks.\n")
     self.base_walker.create_features()
     print("\nLearning the base model.\n")
     self.base_node_embedding = self.base_walker.learn_base_embedding()
     print("\nDeleting the base walker.\n")
     del self.base_walker
Exemplo n.º 2
0
 def create_split(self):
     """
     Creating an EgoNetSplitter.
     """
     self.egonet_splitter = EgoNetSplitter(self.graph)
     self.persona_walker = DeepWalker(self.egonet_splitter.persona_graph, self.args)
     print("\nDoing persona random walks.\n")
     self.persona_walker.create_features()
     self.create_noises()
Exemplo n.º 3
0
    def __init__(self, args, graph):
        print("\nPerforming Node2vec...\n")
        # 1. generate walker
        walker = DeepWalker(args, graph)
        print("\nDoing deepwalks...\n")
        walker.create_features()

        self.inputFileName = "{}{}-deepwalk_{}-num_walks_{}-len_metapath.txt".format(
            args.input_path, args.idx_metapath, args.number_of_walks,
            args.walk_length)

        # 2. read data
        self.data = DataReader(args.min_count, args.care_type,
                               self.inputFileName)

        # 3. make dataset for training
        dataset = DatasetLoader(self.data, args.window_size)

        # 4. initialize dataloader
        self.dataloader = DataLoader(dataset,
                                     batch_size=args.batch_size,
                                     shuffle=True,
                                     num_workers=args.num_workers,
                                     collate_fn=dataset.collate)

        self.output_file_name = "{}{}-embedding_{}-deepwalk_{}-dim_{}-initial_lr_{}-window_size_{}-iterations_{}-min_count.pickle".format(
            args.output_path, args.idx_embed, args.idx_metapath, args.dim,
            args.initial_lr, args.window_size, args.iterations, args.min_count)
        self.emb_size = len(self.data.word2id)
        self.emb_dimension = args.dim
        self.batch_size = args.batch_size
        self.iterations = args.iterations
        self.initial_lr = args.initial_lr
        self.skip_gram_model = SkipGramModel(self.emb_size, self.emb_dimension)

        self.use_cuda = torch.cuda.is_available()
        self.device = torch.device("cuda" if self.use_cuda else "cpu")
        if self.use_cuda:
            self.skip_gram_model.cuda()
Exemplo n.º 4
0
class SplitterTrainer(object):
    """
    Class for training a Splitter.
    """
    def __init__(self, graph, args):
        """
        :param graph: NetworkX graph object.
        :param args: Arguments object.
        """
        self.graph = graph
        self.args = args
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def create_noises(self):
        """
        Creating node noise distribution for negative sampling.
        """
        self.downsampled_degrees = {node: int(1+self.egonet_splitter.persona_graph.degree(node)**0.75) for node in self.egonet_splitter.persona_graph.nodes()}
        self.noises = [k for k,v in self.downsampled_degrees.items() for i in range(v)]
          
    def base_model_fit(self):
        """
        Fitting DeepWalk on base model.
        """
        self.base_walker = DeepWalker(self.graph, self.args)
        print("\nDoing base random walks.\n")
        self.base_walker.create_features()
        print("\nLearning the base model.\n")
        self.base_node_embedding = self.base_walker.learn_base_embedding()
        print("\nDeleting the base walker.\n")
        del self.base_walker

    def create_split(self):
        """
        Creating an EgoNetSplitter.
        """
        self.egonet_splitter = EgoNetSplitter(self.graph)
        self.persona_walker = DeepWalker(self.egonet_splitter.persona_graph, self.args)
        print("\nDoing persona random walks.\n")
        self.persona_walker.create_features()
        self.create_noises()

    def setup_model(self):
        """
        Creating a model and doing a transfer to GPU.
        """
        base_node_count = self.graph.number_of_nodes()
        persona_node_count = self.egonet_splitter.persona_graph.number_of_nodes()
        self.model = Splitter(self.args, base_node_count, persona_node_count)
        self.model.create_weights()
        self.model.initialize_weights(self.base_node_embedding, self.egonet_splitter.personality_map)
        self.model = self.model.to(self.device)

    def transfer_batch(self, source_nodes, context_nodes, targets, persona_nodes, pure_source_nodes):
        """
        Transfering the batch to GPU.
        """
        self.sources = torch.LongTensor(source_nodes).to(self.device)
        self.contexts = torch.LongTensor(context_nodes).to(self.device)
        self.targets = torch.FloatTensor(targets).to(self.device)
        self.personas = torch.LongTensor(persona_nodes).to(self.device)
        self.pure_sources = torch.LongTensor(pure_source_nodes).to(self.device)

    def optimize(self):
        """
        Doing a weight update.
        """
        loss = self.model(self.sources, self.contexts, self.targets, self.personas, self.pure_sources)
        loss.backward()
        self.optimizer.step()
        self.optimizer.zero_grad()
        return loss.item()

    def process_walk(self, walk):
        """
        Process random walk (source, context) pairs. Sample negative instances and create persona node list.
        :param walk: Random walk sequence.
        """
        left_nodes = [walk[i] for i in range(len(walk)-self.args.window_size) for j in range(1, self.args.window_size+1)]
        right_nodes = [walk[i+j] for i in range(len(walk)-self.args.window_size) for j in range(1, self.args.window_size+1)]
        node_pair_count = len(left_nodes)
        source_nodes = left_nodes + right_nodes
        context_nodes = right_nodes + left_nodes
        persona_nodes = np.array([self.egonet_splitter.personality_map[source_node] for source_node in source_nodes])
        pure_source_nodes = np.array(source_nodes)
        source_nodes = np.array((self.args.negative_samples+1)*source_nodes)
        context_nodes = np.concatenate((np.array(context_nodes), np.random.choice(self.noises,node_pair_count*2*self.args.negative_samples)))
        positives = [1.0 for node in range(node_pair_count*2)]
        negatives = [0.0 for node in range(node_pair_count*self.args.negative_samples*2)]
        targets = np.array(positives + negatives)
        self.transfer_batch(source_nodes, context_nodes, targets, persona_nodes, pure_source_nodes)

    def update_average_loss(self, loss_score):
        """
        Updating the average loss and the description of the time remains bar.
        :param loss_score: Loss on the sample.
        """
        self.cummulative_loss = self.cummulative_loss + loss_score
        self.steps = self.steps + 1
        average_loss = self.cummulative_loss/self.steps
        self.walk_steps.set_description("Splitter (Loss=%g)" % round(average_loss,4))

    def reset_average_loss(self, step):
        """
        Doing a reset on the average loss.
        :param step: Current number of walks processed.
        """
        if step % 100 == 0:
            self.cummulative_loss = 0
            self.steps = 0

    def fit(self):
        """
        Fitting a model.
        """
        self.base_model_fit()
        self.create_split()
        self.setup_model()
        self.model.train()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
        self.optimizer.zero_grad()
        print("\nLearning the joint model.\n")
        random.shuffle(self.persona_walker.paths)
        self.walk_steps = trange(len(self.persona_walker.paths), desc="Loss")
        for step in self.walk_steps:
            self.reset_average_loss(step)
            walk = self.persona_walker.paths[step]
            self.process_walk(walk)
            loss_score = self.optimize()
            self.update_average_loss(loss_score)

    def save_embedding(self):
        """
        Saving the node embedding.
        """
        print("\n\nSaving the model.\n")
        nodes = [node for node in self.egonet_splitter.persona_graph.nodes()]
        nodes.sort()
        nodes = torch.LongTensor(nodes).to(self.device)
        self.embedding = self.model.node_embedding(nodes).cpu().detach().numpy()
        embedding_header = ["id"] + ["x_" + str(x) for x in range(self.args.dimensions)]
        self.embedding  = np.concatenate([np.array(range(self.embedding.shape[0])).reshape(-1,1),self.embedding],axis=1)
        self.embedding = pd.DataFrame(self.embedding, columns = embedding_header)
        self.embedding.to_csv(self.args.embedding_output_path, index = None)

    def save_persona_graph_mapping(self):
        """
        Saving the persona map.
        """
        with open(self.args.persona_output_path, "w") as f:
           json.dump(self.egonet_splitter.personality_map, f)                     
Exemplo n.º 5
0
class SplitterTrainer(object):
    """
    Class for training a Splitter.
    """
    def __init__(self, graph, args):
        """
        :param graph: NetworkX graph object.
        :param args: Arguments object.
        """
        self.graph = graph
        self.args = args
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')

    def base_model_fit(self):
        """
        Fitting DeepWalk on base model.
        """
        self.base_walker = DeepWalker(self.graph, self.args)
        print("\nDoing base random walks.\n")
        self.base_walker.create_features()
        print("\nLearning the base model.\n")
        self.base_node_embedding = self.base_walker.learn_base_embedding()
        print("\nDeleting the base walker.\n")
        del self.base_walker

    def create_split(self):
        """
        Creating an EgoNetSplitter.
        """
        self.egonet_splitter = EgoNetSplitter(self.graph)
        self.persona_walker = DeepWalker(self.egonet_splitter.persona_graph,
                                         self.args)
        print("\nDoing persona random walks.\n")
        self.persona_walker.create_features()

    def setup_model(self):
        """
        Creating a model and doing a transfer to GPU.
        """
        base_node_count = self.graph.number_of_nodes()
        persona_node_count = self.egonet_splitter.persona_graph.number_of_nodes(
        )
        self.model = Splitter(self.args, base_node_count, persona_node_count)
        self.model.create_weights()
        self.model.initialize_weights(self.base_node_embedding,
                                      self.egonet_splitter.personality_map)
        self.model = self.model.to(self.device)

    def reset_node_sets(self):
        """
        Resetting the node sets.
        """
        self.pure_sources = []
        self.personas = []
        self.sources = []
        self.contexts = []
        self.targets = []

    def create_batch(self, source_node, context_node):
        """
        Augmenting a batch of data.
        :param source_node: A source node.
        :param context_node: A target to predict.
        """
        self.pure_sources = self.pure_sources + [source_node]
        self.personas = self.personas + [
            self.egonet_splitter.personality_map[source_node]
        ]
        self.sources = self.sources + [source_node
                                       ] * (self.args.negative_samples + 1)
        self.contexts = self.contexts + [context_node] + random.sample(
            self.egonet_splitter.persona_graph.nodes(),
            self.args.negative_samples)
        self.targets = self.targets + [1.0
                                       ] + [0.0] * self.args.negative_samples

    def transfer_batch(self):
        """
        Transfering the batch to GPU.
        """
        self.sources = torch.LongTensor(self.sources).to(self.device)
        self.contexts = torch.LongTensor(self.contexts).to(self.device)
        self.targets = torch.FloatTensor(self.targets).to(self.device)
        self.personas = torch.LongTensor(self.personas).to(self.device)
        self.pure_sources = torch.LongTensor(self.pure_sources).to(self.device)

    def optimize(self):
        """
        Doing a weight update.
        """
        loss = self.model(self.sources, self.contexts, self.targets,
                          self.personas, self.pure_sources)
        loss.backward()
        self.optimizer.step()
        self.optimizer.zero_grad()
        self.reset_node_sets()
        return loss.item()

    def fit(self):
        """
        Fitting a model.
        """
        self.reset_node_sets()
        self.base_model_fit()
        self.create_split()
        self.setup_model()
        self.model.train()
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=self.args.learning_rate)
        self.optimizer.zero_grad()
        print("\nLearning the joint model.\n")
        random.shuffle(self.persona_walker.paths)
        self.steps = 0
        self.losses = 0
        self.walk_steps = trange(len(self.persona_walker.paths), desc="Loss")
        for step in self.walk_steps:
            if step % 1000 == 0:
                self.steps = 0
                self.losses = 0
            walk = self.persona_walker.paths[step]

            for i in range(self.args.walk_length - self.args.window_size):
                for j in range(1, self.args.window_size + 1):
                    source_node = walk[i]
                    context_node = walk[i + j]
                    self.create_batch(source_node, context_node)
            for i in range(self.args.window_size, self.args.walk_length):
                for j in range(1, self.args.window_size + 1):
                    source_node = walk[i]
                    context_node = walk[i - j]
                    self.create_batch(source_node, context_node)
            self.transfer_batch()
            self.losses = self.losses + self.optimize()
            self.steps = self.steps + 1
            average_loss = self.losses / self.steps
            self.walk_steps.set_description("Splitter (Loss=%g)" %
                                            round(average_loss, 4))

    def save_embedding(self):
        """
        Saving the node embedding.
        """
        print("\n\nSaving the model.\n")
        nodes = torch.LongTensor([
            node for node in self.egonet_splitter.persona_graph.nodes()
        ]).to(self.device)
        self.embedding = self.model.node_embedding(
            nodes).cpu().detach().numpy()
        embedding_header = ["id"] + [
            "x_" + str(x) for x in range(self.args.dimensions)
        ]
        self.embedding = np.concatenate([
            np.array(range(self.embedding.shape[0])).reshape(-1, 1),
            self.embedding
        ],
                                        axis=1)
        self.embedding = pd.DataFrame(self.embedding, columns=embedding_header)
        self.embedding.to_csv(self.args.embedding_output_path, index=None)

    def save_persona_graph_mapping(self):
        """
        Saving the persona map.
        """
        with open(self.args.persona_output_path, "w") as f:
            json.dump(self.egonet_splitter.personality_map, f)