def preliminaries(dataset_path, output_path, params_path, n_fold, n_run): """ Runs a single instance of EEL. If an exception occurs during this execution, the program will output the exception message to an file, into the output_path directory. :type dataset_path: str :param dataset_path: a full path (including file type, i.e. dataset.arff) to the dataset to be run. :type output_path: str :param output_path: path to output metadata regarding evolutionary process. :type params_path: str :param params_path: path to parameters file. :type n_fold: int :param n_fold: number of fold to run in this execution. Must be smaller than the value in the params file. :type n_run: int :param n_run: number of the current run. """ dataset_name = get_dataset_name(dataset_path) # try: eelem( dataset_path=dataset_path, output_path=output_path, params_path=params_path, n_fold=n_fold, n_run=n_run )
def verify_speed(traj, sequences): dataset_name = get_dataset_name(TEST_DATASET_PATH) max_speed = get_max_speed(dataset_name) traj = get_traj(traj, sequences) dist = get_distance(traj) speed = get_speed_from_distance(dist) # We calculate inverse sigmoid to verify the speed inverse_sigmoid(speed, max_speed)
def get_dataset_location(dataset): "Return EOS location of the dataset." return XROOTD_DIR_BASE + \ EXPERIMENT + '/' + \ MCDIR_BASE + '/' + \ get_dataset_runperiod(dataset) + '/' + \ get_dataset_name(dataset) + '/' + \ get_dataset_format(dataset) + '/' + \ get_dataset_version(dataset)
def get_dataset_index_file_base(dataset): "Return index file base for given dataset." filebase = EXPERIMENT.upper() + '_' + \ MCDIR_BASE + '_' + \ get_dataset_runperiod(dataset) + '_' + \ get_dataset_name(dataset) + '_' + \ get_dataset_format(dataset) + '_' + \ get_dataset_version(dataset) return filebase
def eelem(dataset_path, output_path, params_path, n_fold, n_run, verbose=True): """ Runs a single instance of EEL. :type dataset_path: str :param dataset_path: a full path (including file type, i.e. dataset.arff) to the dataset to be run. :type output_path: str :param output_path: path to output metadata regarding evolutionary process. :type params_path: str :param params_path: path to parameters file. :type n_fold: int :param n_fold: number of fold to run in this execution. Must be smaller than the value in the params file. :type n_run: int :param n_run: number of the current run. :type verbose: bool :param verbose: whether to output metadata to console. Defaults to True. """ params = json.load(open(params_path)) dataset_name = get_dataset_name(dataset_path) X_train, X_test, y_train, y_test = __get_fold__(params=params, dataset_path=dataset_path, n_fold=n_fold) n_classes = len(np.unique(y_train)) reporter = EDAReporter( Xs=[X_train, X_test], ys=[y_train, y_test], set_names=['train', 'test'], output_path=output_path, dataset_name=dataset_name, n_fold=n_fold, n_run=n_run, n_classifiers=params['n_base_classifiers'], n_classes=n_classes, ) ensemble = Ensemble.from_adaboost( X_train=X_train, y_train=y_train, data_normalizer_class=DataNormalizer, n_classifiers=params['n_base_classifiers'], ) # type: Ensemble ensemble = integrate( ensemble=ensemble, n_individuals=params['n_individuals'], n_generations=params['n_generations'], reporter=reporter, verbose=verbose ) return ensemble
def verify_speed(traj, sequences, labels=None): if MULTI_CONDITIONAL_MODEL: traj, label = get_traj(traj, sequences, labels=labels) else: dataset_name = get_dataset_name(SINGLE_TEST_DATASET_PATH) traj = get_traj(traj, sequences, labels=None) dist = get_distance(traj) speed = get_speed_from_distance(dist) # We calculate inverse sigmoid to verify the speed if MULTI_CONDITIONAL_MODEL: inverse_sigmoid(speed, labels=label) else: maxspeed= SINGLE_AGENT_MAX_SPEED inverse_sigmoid(speed, max_speed=maxspeed)
def main(): checkpoint = torch.load(CHECKPOINT_NAME) generator = TrajectoryGenerator() generator.load_state_dict(checkpoint['g_state']) if USE_GPU: generator.cuda() generator.train() else: generator.train() dataset_name = get_dataset_name(TEST_DATASET_PATH) _, loader = data_loader(TEST_DATASET_PATH, TEST_METRIC) if TEST_METRIC == 1: num_samples = 1 else: num_samples = NUM_SAMPLES ade, fde = evaluate(loader, generator, num_samples) print('Dataset: {}, Pred Len: {}, ADE: {:.2f}, FDE: {:.2f}'.format(dataset_name, PRED_LEN, ade, fde))
def main(): args = parser.parse_args() print(args) config = configure(args.config) os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # define model name setup_list = [ args.src, args.tgt, args.network, f"contrast_dim_{args.contrast_dim}", f"temperature_{args.temperature}", f"alpha_{args.alpha}", f"cw_{args.cw}", f"thresh_{args.thresh}", f"max_key_size_{args.max_key_size}", f"min_conf_samples_{args.min_conf_samples}", f"gpu_{args.gpu}" ] model_name = "_".join(setup_list) print(colored(f"Model name: {model_name}", 'green')) model_dir = os.path.join(args.log_dir, model_name) if os.path.isdir(model_dir): shutil.rmtree(model_dir) os.mkdir(model_dir) summary_writer = SummaryWriter(model_dir) # save parsed arguments with open(os.path.join(model_dir, 'parsed_args.txt'), 'w') as f: json.dump(args.__dict__, f, indent=2) dataset_name = get_dataset_name(args.src, args.tgt) dataset_config = config.data.dataset[dataset_name] src_file = os.path.join(args.dataset_root, dataset_name, args.src + '_list.txt') tgt_file = os.path.join(args.dataset_root, dataset_name, args.tgt + '_list.txt') model = Model(base_net=args.network, num_classes=dataset_config.num_classes, contrast_dim=args.contrast_dim, frozen_layer=args.frozen_layer) model_ema = Model(base_net=args.network, num_classes=dataset_config.num_classes, contrast_dim=args.contrast_dim, frozen_layer=args.frozen_layer) moment_update(model, model_ema, 0) model = model.cuda() model_ema = model_ema.cuda() contrast_loss = InfoNCELoss(temperature=args.temperature).cuda() src_memory = KeyMemory(args.max_key_size, args.contrast_dim).cuda() tgt_memory = KeyMemory(args.max_key_size, args.contrast_dim).cuda() tgt_pseudo_labeler = KMeansPseudoLabeler( num_classes=dataset_config.num_classes, batch_size=args.pseudo_batch_size) parameters = model.get_parameter_list() group_ratios = [parameter['lr'] for parameter in parameters] optimizer = torch.optim.SGD(parameters, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) assert args.lr_scheduler == 'inv' lr_scheduler = InvScheduler(gamma=args.gamma, decay_rate=args.decay_rate, group_ratios=group_ratios, init_lr=args.lr) trainer = Train(model, model_ema, optimizer, lr_scheduler, model_dir, summary_writer, src_file, tgt_file, contrast_loss, src_memory, tgt_memory, tgt_pseudo_labeler, cw=args.cw, thresh=args.thresh, min_conf_samples=args.min_conf_samples, num_classes=dataset_config.num_classes, batch_size=args.batch_size, eval_batch_size=args.eval_batch_size, num_workers=args.num_workers, max_iter=args.max_iterations, iters_per_epoch=dataset_config.iterations_per_epoch, log_summary_interval=args.log_summary_interval, log_image_interval=args.log_image_interval, num_proj_samples=args.num_project_samples, acc_metric=dataset_config.acc_metric, alpha=args.alpha) tgt_best_acc = trainer.train() # write to text file # with open(args.acc_file, 'a') as f: # f.write(model_name + ' ' + str(tgt_best_acc) + '\n') # f.close() # write to xlsx file write_list = [ args.src, args.tgt, args.network, args.contrast_dim, args.temperature, args.alpha, args.cw, args.thresh, args.max_key_size, args.min_conf_samples, args.gpu, tgt_best_acc ] with open(args.acc_file, 'a') as f: csv_writer = csv.writer(f) csv_writer.writerow(write_list)
training_group = parser.add_argument_group('Training parameters') training_group.add_argument('-ts', '--train_set_size', default=70000, type=int, help='training set size') training_group.add_argument('-vs', '--valid_set_size', default=30000, type=int, help='validation set size') training_group.add_argument('-vp', '--valid_partitions', default=100, type=int, help='validation set partitions number') training_group.add_argument('-tp', '--test_partitions', default=100, type=int, help='test set partitions number') training_group.add_argument('-b', '--batch_size', default=70, type=int, help='batch size for SGD') training_group.add_argument('-l', '--learning_rate', default=1e-4, type=float, help='learning rate for SGD') training_group.add_argument('-dr', '--decay_rate', default=0.1, type=float, help='learning rate decay rate') training_group.add_argument('-ds', '--decay-steps', default=1000, type=int, help='learning rate decay steps') training_group.add_argument('-c', '--gradient_clip', default=40.0, type=float, help='clip at this max norm of gradient') training_group.add_argument('-m', '--max_steps', default=10000, type=int, help='max number of iterations for training') training_group.add_argument('-s', '--save', action='store_true', help='save the model every epoch') training_group.add_argument('-ens', '--ensemble', default=1, type=int, help='Number of the model in the ensemble') args = parser.parse_args() pickle_filename = utils.get_dataset_name(args.time_window, args.time_aggregation, args.forecast_window, args.forecast_aggregation, args.train_set_size, args.valid_set_size) dataset = utils.get_dataset(pickle_filename, args, parser) train_set = dataset[0] train_labels = dataset[1] valid_set = dataset[2] valid_labels = dataset[3] valid_set2 = dataset[4] valid_labels2 = dataset[5] test_set = dataset[6] test_labels = dataset[7] mean = dataset[8] stddev = dataset[9] del dataset print('Training set', train_set.shape, train_labels.shape)
Xs.append(X), ys.append(y) k = 30 cv = model_selection.StratifiedKFold(n_splits=k) id3_measures = [] cart_measures = [] for dataset_index in range(0, len(datasets)): fold = 0 id3_fold_measures = [] cart_fold_measures = [] for train_indexes, test_indexes in cv.split(Xs[dataset_index], ys[dataset_index]): print('processing {} fold of {} algorithm...'.format( fold, utils.get_dataset_name(dataset_index))) fold_sets = [[], [], [], []] fold_sets[SplitPartNames['X_train']] = Xs[dataset_index][train_indexes] fold_sets[SplitPartNames['y_train']] = ys[dataset_index][train_indexes] fold_sets[SplitPartNames['X_test']] = Xs[dataset_index][train_indexes] fold_sets[SplitPartNames['y_test']] = ys[dataset_index][train_indexes] id3_fold_measures.append(trees_algorithms.measures_of_id3(fold_sets)) cart_fold_measures.append(trees_algorithms.measures_of_cart(fold_sets)) fold += 1 id3_measures.append(id3_fold_measures) cart_measures.append(cart_fold_measures)
def main(): args = parser.parse_args() print(args) config = configure(args.config) os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu print(colored(f"Model directory: {args.model_dir}", 'green')) assert os.path.isfile(args.model_dir) dataset_name = get_dataset_name(args.src, args.tgt) dataset_config = config.data.dataset[dataset_name] src_file = os.path.join(args.dataset_root, dataset_name, args.src + '_list.txt') tgt_file = os.path.join(args.dataset_root, dataset_name, args.tgt + '_list.txt') model = Model(base_net=args.network, num_classes=dataset_config.num_classes, frozen_layer='') del model.classifier_layer del model.contrast_layer model_state_dict = model.state_dict() trained_state_dict = torch.load(args.model_dir)['weights'] keys = set(model_state_dict.keys()) trained_keys = set(trained_state_dict.keys()) shared_keys = keys.intersection(trained_keys) to_load_state_dict = {key: trained_state_dict[key] for key in shared_keys} model.load_state_dict(to_load_state_dict) model = model.cuda() # source classifier and domain classifier src_classifier = nn.Sequential( nn.Dropout(0.5), nn.Linear(model.base_network.out_dim, dataset_config.num_classes)) initialize_layer(src_classifier) parameter_list = [{"params": src_classifier.parameters(), "lr": 1}] src_classifier = src_classifier.cuda() domain_classifier = nn.Sequential(nn.Dropout(0.5), nn.Linear(model.base_network.out_dim, 2)) initialize_layer(domain_classifier) parameter_list += [{"params": domain_classifier.parameters(), "lr": 1}] domain_classifier = domain_classifier.cuda() group_ratios = [parameter['lr'] for parameter in parameter_list] optimizer = torch.optim.SGD(parameter_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) assert args.lr_scheduler == 'inv' lr_scheduler = InvScheduler(gamma=args.gamma, decay_rate=args.decay_rate, group_ratios=group_ratios, init_lr=args.lr) # split into train and validation sets src_size = len(open(src_file).readlines()) src_train_size = int(args.train_portion * src_size) src_train_indices, src_test_indices = np.split( np.random.permutation(src_size), [src_train_size]) tgt_size = len(open(tgt_file).readlines()) tgt_train_size = int(args.train_portion * tgt_size) tgt_train_indices, tgt_test_indices = np.split( np.random.permutation(tgt_size), [tgt_train_size]) # define data loaders train_data_loader_kwargs = { 'shuffle': True, 'drop_last': True, 'batch_size': args.batch_size, 'num_workers': args.num_workers } test_data_loader_kwargs = { 'shuffle': False, 'drop_last': False, 'batch_size': args.eval_batch_size, 'num_workers': args.num_workers } train_transformer = get_transform(training=True) test_transformer = get_transform(training=False) data_loader = {} data_iterator = {} src_train_dataset = IndicesDataset(src_file, list(src_train_indices), transform=train_transformer) data_loader['src_train'] = DataLoader(src_train_dataset, **train_data_loader_kwargs) src_test_dataset = IndicesDataset(src_file, list(src_test_indices), transform=test_transformer) data_loader['src_test'] = DataLoader(src_test_dataset, **test_data_loader_kwargs) tgt_train_dataset = IndicesDataset(tgt_file, list(tgt_train_indices), transform=train_transformer) data_loader['tgt_train'] = DataLoader(tgt_train_dataset, **train_data_loader_kwargs) tgt_test_dataset = IndicesDataset(tgt_file, list(tgt_test_indices), transform=test_transformer) data_loader['tgt_test'] = DataLoader(tgt_test_dataset, **test_data_loader_kwargs) for key in data_loader: data_iterator[key] = iter(data_loader[key]) # start training total_progress_bar = tqdm.tqdm(desc='Iterations', total=args.max_iterations, ascii=True, smoothing=0.01) class_criterion = nn.CrossEntropyLoss() model.base_network.eval() src_classifier.train() domain_classifier.train() iteration = 0 while iteration < args.max_iterations: lr_scheduler.adjust_learning_rate(optimizer, iteration) optimizer.zero_grad() src_data = get_sample(data_loader, data_iterator, 'src_train') src_inputs, src_labels = src_data['image_1'].cuda( ), src_data['true_label'].cuda() tgt_data = get_sample(data_loader, data_iterator, 'tgt_train') tgt_inputs = tgt_data['image_1'].cuda() model.set_bn_domain(domain=0) with torch.no_grad(): src_features = model.base_network(src_inputs) src_features = F.normalize(src_features, p=2, dim=1) src_class_logits = src_classifier(src_features) src_domain_logits = domain_classifier(src_features) model.set_bn_domain(domain=1) with torch.no_grad(): tgt_features = model.base_network(tgt_inputs) tgt_features = F.normalize(tgt_features, p=2, dim=1) tgt_domain_logits = domain_classifier(tgt_features) src_classification_loss = class_criterion(src_class_logits, src_labels) domain_logits = torch.cat([src_domain_logits, tgt_domain_logits], dim=0) domain_labels = torch.tensor([0] * src_inputs.size(0) + [1] * tgt_inputs.size(0)).cuda() domain_classification_loss = class_criterion(domain_logits, domain_labels) if iteration % args.print_acc_interval == 0: compute_accuracy(src_class_logits, src_labels, acc_metric=dataset_config.acc_metric, print_result=True) compute_accuracy(domain_logits, domain_labels, acc_metric='total_mean', print_result=True) total_loss = src_classification_loss + domain_classification_loss total_loss.backward() optimizer.step() iteration += 1 total_progress_bar.update(1) # test model.base_network.eval() src_classifier.eval() domain_classifier.eval() with torch.no_grad(): src_all_class_logits = [] src_all_labels = [] src_all_domain_logits = [] model.set_bn_domain(domain=0) for src_test_data in tqdm.tqdm(data_loader['src_test'], desc='src_test', leave=False, ascii=True): src_test_inputs, src_test_labels = src_test_data['image_1'].cuda( ), src_test_data['true_label'].cuda() src_test_features = model.base_network(src_test_inputs) src_test_features = F.normalize(src_test_features, p=2, dim=1) src_test_class_logits = src_classifier(src_test_features) src_test_domain_logits = domain_classifier(src_test_features) src_all_class_logits += [src_test_class_logits] src_all_labels += [src_test_labels] src_all_domain_logits += [src_test_domain_logits] src_all_class_logits = torch.cat(src_all_class_logits, dim=0) src_all_labels = torch.cat(src_all_labels, dim=0) src_all_domain_logits = torch.cat(src_all_domain_logits, dim=0) src_test_class_acc = compute_accuracy( src_all_class_logits, src_all_labels, acc_metric=dataset_config.acc_metric, print_result=True) src_test_domain_acc = compute_accuracy( src_all_domain_logits, torch.zeros(src_all_domain_logits.size(0)).cuda(), acc_metric='total_mean', print_result=True) tgt_all_domain_logits = [] model.set_bn_domain(domain=1) for tgt_test_data in tqdm.tqdm(data_loader['tgt_test'], desc='tgt_test', leave=False, ascii=True): tgt_test_inputs = tgt_test_data['image_1'].cuda() tgt_test_features = model.base_network(tgt_test_inputs) tgt_test_features = F.normalize(tgt_test_features, p=2, dim=1) tgt_test_domain_logits = domain_classifier(tgt_test_features) tgt_all_domain_logits += [tgt_test_domain_logits] tgt_all_domain_logits = torch.cat(tgt_all_domain_logits, dim=0) tgt_test_domain_acc = compute_accuracy( tgt_all_domain_logits, torch.ones(tgt_all_domain_logits.size(0)).cuda(), acc_metric='total_mean', print_result=True) write_list = [ args.model_dir, src_test_class_acc, src_test_domain_acc, tgt_test_domain_acc ] # with open('hyper_search_office_home.csv', 'a') as f: with open(args.output_file, 'a') as f: csv_writer = csv.writer(f) csv_writer.writerow(write_list)
def main(): args = parser.parse_args() print(args) config = configure(args.config) os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu print(colored(f"Model directory: {args.model_dir}", 'green')) assert os.path.isfile(args.model_dir) dataset_name = get_dataset_name(args.src, args.tgt) dataset_config = config.data.dataset[dataset_name] tgt_file = os.path.join(args.dataset_root, dataset_name, args.tgt + '_list.txt') # tgt classification model = Model(base_net=args.network, num_classes=dataset_config.num_classes, frozen_layer='') del model.classifier_layer del model.contrast_layer model_state_dict = model.state_dict() trained_state_dict = torch.load(args.model_dir)['weights'] keys = set(model_state_dict.keys()) trained_keys = set(trained_state_dict.keys()) shared_keys = keys.intersection(trained_keys) to_load_state_dict = {key: trained_state_dict[key] for key in shared_keys} model.load_state_dict(to_load_state_dict) model = model.cuda() # define data loader test_data_loader_kwargs = { 'shuffle': False, 'drop_last': False, 'batch_size': args.batch_size, 'num_workers': args.num_workers } test_transformer = get_transform(training=False) data_loader = {} data_iterator = {} tgt_test_dataset = DefaultDataset(tgt_file, transform=test_transformer) data_loader['tgt_test'] = DataLoader(tgt_test_dataset, **test_data_loader_kwargs) for key in data_loader: data_iterator[key] = iter(data_loader[key]) # test model.base_network.eval() with torch.no_grad(): tgt_all_features = [] tgt_all_labels = [] model.set_bn_domain(domain=1) for tgt_test_data in tqdm.tqdm(data_loader['tgt_test'], desc='tgt_test', leave=False, ascii=True): tgt_test_inputs, tgt_test_labels = tgt_test_data['image_1'].cuda( ), tgt_test_data['true_label'].cuda() tgt_test_features = model.base_network(tgt_test_inputs) # tgt_test_features = F.normalize(tgt_test_features, p=2, dim=1) tgt_all_features += [tgt_test_features] tgt_all_labels += [tgt_test_labels] tgt_all_features = torch.cat(tgt_all_features, dim=0) tgt_all_labels = torch.cat(tgt_all_labels, dim=0) tgt_all_features = tgt_all_features.cpu().numpy() tgt_all_labels = tgt_all_labels.cpu().numpy() features_pickle_file = os.path.join('features', args.src + '_' + args.tgt, 'tgt_features.pkl') labels_pickle_file = os.path.join('features', args.src + '_' + args.tgt, 'tgt_labels.pkl') pickle.dump(tgt_all_features, open(features_pickle_file, 'wb')) pickle.dump(tgt_all_labels, open(labels_pickle_file, 'wb'))