# DatasetInfo(name="simple", neigh=1000, relevant_neigh=4), # DatasetInfo(name="simple", neigh=3000, relevant_neigh=4) ] if __name__ == "__main__": args = __pars_args__() # device = torch.device("cuda:{}".format(args.device) if args.use_cuda else "cpu") for dataset in DATASETS: print("\n\n---------------") print("{}".format(dataset)) # prefix = "{}_neigh-{}_rel-{}".format(dataset.name, dataset.neigh, dataset.relevant_neigh) prefix = "{}".format(dataset.name) args.dataset_prefix = prefix args.max_neighbors = dataset.neigh input_embeddings, target_embeddings, neighbor_embeddings, edge_types, mask_neighbor = get_embeddings( path.join("data", args.data_dir), prefix=prefix) train_dataset = CDataset(path.join("data", args.data_dir), "{}_{}".format(prefix, args.train_file_name)) eval_dataset = CDataset(path.join("data", args.data_dir), "{}_{}".format(prefix, args.eval_file_name)) test_dataset = CDataset(path.join("data", args.data_dir), "{}_{}".format(prefix, args.test_file_name)) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2, drop_last=True) eval_dataloader = DataLoader(eval_dataset, batch_size=args.eval_batch_size, shuffle=False,
help="Iteration number.") parser.add_argument("--use_cuda", "-cuda", type=bool, default=False, help="Use cuda computation") return parser.parse_args() if __name__ == "__main__": args = __pars_args__() risk_tsfm = RiskToTensor(args.data_dir) attribute_tsfm = AttributeToTensor(args.data_dir) input_embeddings, target_embeddings, neighbor_embeddings, seq_len = get_embeddings( args.data_dir, args.customer_file_name, args.neighbors_file_name, args.embedding_dim, risk_tsfm, attribute_tsfm) # customer_id_2_customer_idx = pickle.load(open("../data/customers/customerid_to_idx.bin", "rb")) # customer_idx_2_neighbors_idx = pickle.load(open("../data/customers/customeridx_to_neighborsidx.bin", "rb")) train_dataset = CustomerDataset(args.data_dir, args.train_file_name) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=1, drop_last=True) eval_dataset = CustomerDataset(args.data_dir, args.eval_file_name) eval_dataloader = DataLoader(eval_dataset, batch_size=args.batch_size,
def main(settings, dataset_number=5, image_size=200, padding=2, n_clusters=None, out_file='../img.png'): # Load images and get embeddings from NN imgs = helper.get_images(dataset_number) embeddings = helper.get_embeddings(dataset_number, imgs) print('loaded {} images'.format(len(imgs))) if settings.shuffle: random.shuffle(imgs) # Compute 2D embeddings with MDS if settings.no_mds: em_2d = np.random.random((len(imgs), 2)) else: em_2d = compute.mds(embeddings, init=compute.pca(embeddings)) # Perform clustering cluster_centers, labels = compute.k_means(em_2d, k_default=n_clusters) print('clusters:', len(cluster_centers)) print('sizes of clusters: ', end='') for l in range(max(labels) + 1): print(sum(labels == l), end=', ') print() # Representative images silhouettes = compute.get_silhouettes(em_2d, labels) representative = compute.get_representative(em_2d, cluster_centers, labels, silhouettes) # Sizes and positions of the images ratios = helper.get_image_size_ratios(imgs) sizes = compute.get_sizes(image_size, em_2d, ratios, cluster_centers, labels, representative) positions = compute.get_positions(em_2d, image_size) # Expand as long as overlaps occur - gradually increase space between images iters = 0 while compute.overlap(positions, sizes, padding): positions *= 1.05 iters += 1 print('overlap resolved in {} iterations'.format(iters)) dists = [compute.get_distances(positions)] # Overlapping resolved, now "shrink" towards representative images if not settings.no_intra: positions = compute.shrink_intra(positions, sizes, representative, labels, padding) dists.append(compute.get_distances(positions)) if not settings.no_inter: # Move clusters closer together by same factor positions = compute.shrink_inter1(positions, sizes, representative, labels, padding) dists.append(compute.get_distances(positions)) # Move clusters closer together separately by different factors positions = compute.shrink_inter2(positions, sizes, representative, labels, padding) dists.append(compute.get_distances(positions)) if not settings.no_xy and not settings.no_intra: # Shrink by x and y separately positions = compute.shrink_xy(positions, sizes, representative, labels, padding) dists.append(compute.get_distances(positions)) if not settings.no_shake: # "Shake" images with small offsets for _ in range(10): positions = compute.shrink_with_shaking(positions, sizes, padding) dists.append(compute.get_distances(positions)) if not settings.no_final and not settings.no_intra: # Shrink to finalize positions positions = compute.shrink_xy(positions, sizes, representative, labels, padding) dists.append(compute.get_distances(positions)) positions = compute.shrink_xy(positions, sizes, representative, labels, padding, smaller=True) dists.append(compute.get_distances(positions)) if not settings.no_inter: positions = compute.shrink_inter2(positions, sizes, representative, labels, padding) dists.append(compute.get_distances(positions)) im = helper.plot(imgs, positions, sizes) im.save(out_file) # helper.plot_clusters(em_2d, cluster_centers, labels, representative) scores = list(map(lambda d: compute.compare_distances(dists[0], d), dists)) print('\nscores:') for i, s in enumerate(scores[1:]): print('{:.3f},'.format(s), end=' ')
import torch import pickle import random from helper import get_embeddings from os import path BASE_DIR = path.join("..", "..", "data", "pems") if __name__ == "__main__": input_embeddings, target_embeddings, neighbor_embeddings, edge_types, mask_neighbor = get_embeddings( path.join("..", "..", "data", "pems")) idx = random.randint(0, 10000) print(input_embeddings[idx, :, 0]) print(target_embeddings[idx]) print(neighbor_embeddings[idx, :, :, 0].t()) # pickle.dump(neighbor_embeddings/10, open(path.join(BASE_DIR, "simple_neighbor_embeddings.bin"), "wb"))
default=102, help="Iteration number.") parser.add_argument('--eval_step', type=int, default=10, help='How often do an eval step') parser.add_argument('--save_rate', type=float, default=0.9, help='How often do save an eval example') return parser.parse_args() if __name__ == "__main__": args = __pars_args__() input_embeddings, target_embeddings, neighbor_embeddings, edge_types, mask_neighbor = get_embeddings( path_join("..", "data", args.data_dir), prefix=args.dataset_prefix) model = RNNJointAttention(args.input_dim, args.hidden_dim, args.output_dim, args.n_head, args.time_windows, dropout_prob=args.drop_prob, temperature=args.temp) # model = StructuralRNN(args.input_dim, args.hidden_size, args.output_size, args.num_layers, args.max_neighbors, input_embeddings.size(1), # dropout_prob=args.drop_prob) n_params = get_param_numbers(model) print(n_params)