def load_node_labels(self, elliptic_args, tar_archive): labels = u.load_data_from_tar(elliptic_args.classes_file, tar_archive, replace_unknow=True).long() times = u.load_data_from_tar(elliptic_args.times_file, tar_archive, replace_unknow=True).long() #print(labels.size()) #print(times.size()) lcols = u.Namespace({'nid': 0, 'label': 1}) tcols = u.Namespace({'nid': 0, 'time': 1}) print(len(labels)) print(len(times)) nodes_labels_times = [] for i in range(len(labels)): label = labels[i, [lcols.label]].long() if label >= 0: #print(labels[i,[lcols.nid]]) nid = labels[i, [lcols.nid]].long() time = times[nid - 1, [tcols.time]].long() nodes_labels_times.append([nid, label, time]) nodes_labels_times = torch.tensor(nodes_labels_times) return nodes_labels_times
def load_edges(self, args, tar_archive): data = u.load_data_from_tar(args.uc_irc_args.edges_file, tar_archive, starting_line=2, sep=' ') cols = u.Namespace({'source': 0, 'target': 1, 'weight': 2, 'time': 3}) data = data.long() self.num_nodes = int(data[:, [cols.source, cols.target]].max()) # first id should be 0 (they are already contiguous) data[:, [cols.source, cols.target]] -= 1 # add edges in the other direction (simmetric) data = torch.cat([ data, data[:, [cols.target, cols.source, cols.weight, cols.time]] ], dim=0) data[:, cols.time] = u.aggregate_by_time(data[:, cols.time], args.uc_irc_args.aggr_time) ids = data[:, cols.source] * self.num_nodes + data[:, cols.target] self.num_non_existing = float(self.num_nodes**2 - ids.unique().size(0)) idx = data[:, [cols.source, cols.target, cols.time]] self.max_time = data[:, cols.time].max() self.min_time = data[:, cols.time].min() return {'idx': idx, 'vals': torch.ones(idx.size(0))}
def load_node_feats(self, elliptic_args, tar_archive): data = u.load_data_from_tar(elliptic_args.feats_file, tar_archive, starting_line=0) nodes = data nodes_feats = nodes[:,1:] self.num_nodes = len(nodes) self.feats_per_node = data.size(1) - 1 return nodes, nodes_feats.float()
def load_transactions(self, elliptic_args, tar_archive): data = u.load_data_from_tar(elliptic_args.edges_file, tar_archive, type_fn=float, tensor_const=torch.LongTensor) tcols = u.Namespace({'source': 0, 'target': 1, 'time': 2}) data = torch.cat([data,data[:,[1,0,2]]]) self.max_time = data[:,tcols.time].max() self.min_time = data[:,tcols.time].min() return {'idx': data, 'vals': torch.ones(data.size(0))}
def load_edges(self, args, tar_archive): files = tar_archive.getnames() cont_files2times = self.times_from_names(files) edges = [] cols = u.Namespace({'source': 0, 'target': 1, 'time': 2}) for file in files: data = u.load_data_from_tar(file, tar_archive, starting_line=4, sep='\t', type_fn=int, tensor_const=torch.LongTensor) time_col = torch.zeros(data.size(0), 1, dtype=torch.long) + cont_files2times[file] data = torch.cat([data, time_col], dim=1) data = torch.cat( [data, data[:, [cols.target, cols.source, cols.time]]]) edges.append(data) edges = torch.cat(edges) _, edges[:, [cols.source, cols. target]] = edges[:, [cols.source, cols.target]].unique( return_inverse=True) # use only first X time steps indices = edges[:, cols.time] < args.aut_sys_args.steps_accounted edges = edges[indices, :] # time aggregation edges[:, cols.time] = u.aggregate_by_time(edges[:, cols.time], args.aut_sys_args.aggr_time) self.num_nodes = int(edges[:, [cols.source, cols.target]].max() + 1) ids = edges[:, cols.source] * self.num_nodes + edges[:, cols.target] self.num_non_existing = float(self.num_nodes**2 - ids.unique().size(0)) self.max_time = edges[:, cols.time].max() self.min_time = edges[:, cols.time].min() return {'idx': edges, 'vals': torch.ones(edges.size(0))}