def main(argv): del argv confusions = [float(t) for t in FLAGS.confusions.split(" ")] mixtures = [float(t) for t in FLAGS.active_sampling_percentage.split(" ")] max_dataset_size = None if FLAGS.max_dataset_size == 0 else FLAGS.max_dataset_size starting_seed = 42 for c in confusions: for m in mixtures: for seed in range(starting_seed, starting_seed + FLAGS.trials): sampler = get_AL_sampler(FLAGS.sampling_method) score_model = utils.get_model(seed) results, sampler_state = generate_one_curve( sampler, score_model, seed, FLAGS.warmstart_size, FLAGS.batch_size, c, m, FLAGS.train_horizon)
def main(args): # make the export folder structure # this is made here because the Logger uses the filename if args.do_save: # make a base save directory utils.make_dir(args.save_dir) # make a directory in the base save directory with for the specific # method. save_subdir = os.path.join(args.save_dir, args.dataset + "_" + args.sampling_method) utils.make_dir(save_subdir) filename = os.path.join( save_subdir, "log-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) + ".txt") sys.stdout = utils.Logger(filename) # confusion argument can have multiple values confusions = [float(t) for t in args.confusions.split(" ")] mixtures = [float(t) for t in args.active_sampling_percentage.split(" ")] max_dataset_size = None if args.max_dataset_size == 0 else args.max_dataset_size starting_seed = args.seed # get the dataset from file based on the data directory and dataset name X, y = utils.get_mldata(args.data_dir, args.dataset) # object to store the results in all_results = {} # percentage of labels to randomize for c in confusions: # Mixture weights on active sampling." for m in mixtures: # the number of curves created during multiple trials for seed in range(starting_seed, starting_seed + args.trials): # get the sampler based on the name # returns a python object # also named: query strategy sampler = get_AL_sampler(args.sampling_method) # get the model score_model = utils.get_model(args.score_method, seed) # if (args.select_method == "None" or args.select_method == args.score_method): select_model = None else: select_model = utils.get_model(args.select_method, seed) # create the learning curve results, sampler_state = generate_one_curve( X, y, sampler, score_model, seed, args.warmstart_size, args.batch_size, select_model, confusion=c, active_p=m, max_points=max_dataset_size, standardize_data=args.standardize_data, norm_data=args.normalize_data, train_horizon=args.train_horizon) key = (args.dataset, args.sampling_method, args.score_method, args.select_method, m, args.warmstart_size, args.batch_size, c, args.standardize_data, args.normalize_data, seed) sampler_output = sampler_state.to_dict() results["sampler_output"] = sampler_output all_results[key] = results # Not sure why this is done in a qay like this. fields = [ "dataset", "sampler", "score_method", "select_method", "active percentage", "warmstart size", "batch size", "confusion", "standardize", "normalize", "seed" ] all_results["tuple_keys"] = fields # write the results to a file if args.do_save: # format the filename filename = "results_score_{}_select_{}_norm_{}_stand_{}".format( args.score_method, args.select_method, args.normalize_data, args.standardize_data) existing_files = gfile.Glob( os.path.join(save_subdir, "{}*.pkl".format(filename))) filepath = os.path.join( save_subdir, "{}_{}.pkl".format(filename, 1000 + len(existing_files))[1:]) # dump the dict to a pickle file pickle.dump(all_results, gfile.GFile(filepath, "w")) # flush stfout sys.stdout.flush_file()
def main(argv): del argv if not gfile.Exists(FLAGS.save_dir): try: gfile.MkDir(FLAGS.save_dir) except: print(('WARNING: error creating save directory, ')) save_dir = os.path.join(FLAGS.save_dir, FLAGS.dataset + '_' + FLAGS.sampling_method) if FLAGS.do_save == "True": if not gfile.Exists(save_dir): try: gfile.MkDir(save_dir) except: print(('WARNING: error creating save directory, ' 'directory most likely already created.')) # Set up logging filename = os.path.join( save_dir, "log-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) + ".txt") sys.stdout = utils.Logger(filename) X, y = utils.get_mldata(FLAGS.data_dir, FLAGS.dataset) #load dataset! starting_seed = FLAGS.seed all_results = {} for seed in range(starting_seed, starting_seed + FLAGS.trials): sampler = get_AL_sampler(FLAGS.sampling_method) #load sampler! score_model = utils.get_model(FLAGS.score_method, seed) #load score model! if (FLAGS.select_method == "None" or #load select model! FLAGS.select_method == FLAGS.score_method): select_model = None else: select_model = utils.get_model(FLAGS.select_method, seed) results, sampler_state = \ generate_one_curve(X=X, y=y, sampler=sampler, score_model=score_model, seed=seed, warmstart_size=FLAGS.warmstart_size, batch_size=FLAGS.batch_size, select_model=select_model, max_points=FLAGS.max_dataset_size) key = (FLAGS.dataset, FLAGS.sampling_method, FLAGS.score_method, FLAGS.select_method, FLAGS.warmstart_size, FLAGS.batch_size, seed) #sampler_output = sampler_state.to_dict() #results['sampler_output'] = sampler_output results['sampler_output'] = None all_results[key] = results fields = [ 'dataset', 'sampling_methods', 'score_method', 'select_method', 'warmstart size', 'batch size', 'seed' ] all_results['tuple_keys'] = fields if FLAGS.do_save == "True": filename = ("results_score_" + FLAGS.score_method + "_select_" + FLAGS.select_method) existing_files = gfile.Glob(os.path.join(save_dir, filename + "*.pkl")) filename = os.path.join( save_dir, filename + "_" + str(1000 + len(existing_files))[1:] + ".pkl") pickle.dump(all_results, gfile.GFile(filename, "w")) sys.stdout.flush_file()
def main(argv): del argv if not gfile.Exists(FLAGS.save_dir): try: gfile.MkDir(FLAGS.save_dir) except: print(('WARNING: error creating save directory, ' 'directory most likely already created.')) save_dir = os.path.join( FLAGS.save_dir, FLAGS.dataset + "_" + FLAGS.sampling_method) do_save = FLAGS.do_save == "True" if do_save: if not gfile.Exists(save_dir): try: gfile.MkDir(save_dir) except: print(('WARNING: error creating save directory, ' 'directory most likely already created.')) # Set up logging filename = os.path.join( save_dir, "log-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) + ".txt") sys.stdout = utils.Logger(filename) confusions = [float(t) for t in FLAGS.confusions.split(" ")] mixtures = [float(t) for t in FLAGS.active_sampling_percentage.split(" ")] all_results = {} max_dataset_size = None if FLAGS.max_dataset_size == "0" else int( FLAGS.max_dataset_size) normalize_data = FLAGS.normalize_data == "True" standardize_data = FLAGS.standardize_data == "True" X, y = utils.get_mldata(FLAGS.data_dir, FLAGS.dataset) starting_seed = FLAGS.seed for c in confusions: for m in mixtures: for seed in range(starting_seed, starting_seed + FLAGS.trials): sampler = get_AL_sampler(FLAGS.sampling_method) score_model = utils.get_model(FLAGS.score_method, seed) if (FLAGS.select_method == "None" or FLAGS.select_method == FLAGS.score_method): select_model = None else: select_model = utils.get_model(FLAGS.select_method, seed) results, sampler_state = generate_one_curve( X, y, sampler, score_model, seed, FLAGS.warmstart_size, FLAGS.batch_size, select_model, c, m, max_dataset_size, standardize_data, normalize_data, FLAGS.train_horizon) key = (FLAGS.dataset, FLAGS.sampling_method, FLAGS.score_method, FLAGS.select_method, m, FLAGS.warmstart_size, FLAGS.batch_size, c, standardize_data, normalize_data, seed) sampler_output = sampler_state.to_dict() results["sampler_output"] = sampler_output all_results[key] = results fields = [ "dataset", "sampler", "score_method", "select_method", "active percentage", "warmstart size", "batch size", "confusion", "standardize", "normalize", "seed" ] all_results["tuple_keys"] = fields if do_save: filename = ("results_score_" + FLAGS.score_method + "_select_" + FLAGS.select_method + "_norm_" + str(normalize_data) + "_stand_" + str(standardize_data)) existing_files = gfile.Glob(os.path.join(save_dir, filename + "*.pkl")) filename = os.path.join(save_dir, filename + "_" + str(1000+len(existing_files))[1:] + ".pkl") pickle.dump(all_results, gfile.GFile(filename, "w")) sys.stdout.flush_file()
def train_classifier(): global dataset global kwargs global sampler global classifier_trained global X_pred global y_pred data = bottle.request.json # Train on samples that have been labeled so far # dataset.set_kind(DetectionKind.UserDetection.value) dataset.set_kind(4) print(dataset.current_set) print(type(dataset.current_set)) X_train = dataset.em[dataset.current_set] y_train = np.asarray(dataset.getlabels()) # print(y_train) timer = time.time() kwargs["model"].fit(X_train, y_train) print('Training took %0.2f seconds' % (time.time() - timer)) timer = time.time() joblib.dump( kwargs["model"], "%s/%s_%04d.skmodel" % (args.checkpoint_dir, 'classifier', len(dataset.current_set))) print('Saving classifier checkpoint took %0.2f seconds' % (time.time() - timer)) # Predict on the samples that have not been labeled timer = time.time() dataset.set_kind(DetectionKind.ModelDetection.value) X_pred = dataset.em[dataset.current_set] y_pred = kwargs["model"].predict(X_pred) print('Predicting on unlabeled samples took %0.2f seconds' % (time.time() - timer)) # print(y_pred) # Update model predicted class in PostgreSQL database # timer = time.time() # for pos in range(len(y_pred)): # idx = dataset.current_set[pos] # det_id = dataset.samples[idx][0] # matching_detection_entries = (Detection # .select(Detection.id, Detection.category_id) # .where((Detection.id == det_id))) # mde = matching_detection_entries.get() # command = Detection.update(category_id=y_pred[pos]).where(Detection.id == mde.id) # command.execute() # print('Updating the database took %0.2f seconds'%(time.time() - timer)) # Alternative: batch update PostgreSQL database # timer = time.time() # det_ids = [dataset.samples[dataset.current_set[pos]][0] for pos in range(len(y_pred))] # y_pred = [int(y) for y in y_pred] # det_id_pred_pairs = list(zip(det_ids, y_pred)) # case_statement = Case(Detection.id, det_id_pred_pairs) # command = Detection.update(category_id=case_statement).where(Detection.id.in_(det_ids)) # command.execute() # print('Updating the database the other way took %0.2f seconds'%(time.time() - timer)) # Update dataset dataloader timer = time.time() for pos in range(len(y_pred)): idx = dataset.current_set[pos] sample_data = list(dataset.samples[idx]) sample_data[1] = y_pred[pos] dataset.samples[idx] = tuple(sample_data) print('Updating the dataset dataloader took %0.2f seconds' % (time.time() - timer)) if not classifier_trained: # once the classifier has been trained the first time, switch to AL sampling classifier_trained = True sampler = get_AL_sampler('confidence')(dataset.em, dataset.getalllabels(), 1234) bottle.response.content_type = 'application/json' bottle.response.status = 200 return json.dumps(data)
color_indices = list( set(range(len(dataset.samples))) - set(grayscale_indices)) # records with color images detection_conf_values = [rec[4] for rec in dataset.samples] dataset.updateEmbedding(model) dataset.embedding_mode() dataset.train() kwargs = {} kwargs["N"] = 25 kwargs["already_selected"] = set() if args.classifier_checkpoint is not '': print('loading pre-trained classifier') kwargs["model"] = joblib.load(args.classifier_checkpoint) classifier_trained = True sampler = get_AL_sampler('confidence')(dataset.em, dataset.getalllabels(), 1234) # Use classifier to generate predictions dataset.set_kind(DetectionKind.ModelDetection.value) X_pred = dataset.em[dataset.current_set] y_pred = kwargs["model"].predict(X_pred) # # Update model predicted class in PostgreSQL database # for pos in range(len(y_pred)): # idx = dataset.current_set[pos] # det_id = dataset.samples[idx][0] # matching_detection_entries = (Detection # .select(Detection.id, Detection.category_id) # .where((Detection.id == det_id))) # mde = matching_detection_entries.get() # command = Detection.update(category_id=y_pred[pos]).where(Detection.id == mde.id)
def main(): args = parser.parse_args() # Initialize Database ## database connection credentials DB_NAME = args.db_name USER = args.db_user PASSWORD = args.db_password print("DB Connect") ## try to connect as USER to database DB_NAME through peewee target_db = PostgresqlDatabase(DB_NAME, user=USER, password=PASSWORD, host='localhost') target_db.connect(reuse_if_open=True) db_proxy.initialize(target_db) print("connected") # Load the saved embedding model checkpoint = load_checkpoint(args.base_model) if args.experiment_name == '': args.experiment_name = "experiment_%s_%s" % (checkpoint['loss_type'], args.strategy) if not os.path.exists(args.experiment_name): os.mkdir(args.experiment_name) if checkpoint['loss_type'].lower( ) == 'center' or checkpoint['loss_type'].lower() == 'softmax': embedding_net = SoftmaxNet(checkpoint['arch'], checkpoint['feat_dim'], checkpoint['num_classes'], False) else: embedding_net = NormalizedEmbeddingNet(checkpoint['arch'], checkpoint['feat_dim'], False) model = torch.nn.DataParallel(embedding_net).cuda() model.load_state_dict(checkpoint['state_dict']) # dataset_query = Detection.select().limit(5) dataset_query = Detection.select( Detection.image_id, Oracle.label, Detection.kind).join(Oracle).order_by(fn.random()).limit( args.db_query_limit ) ## TODO: should this really be order_by random? dataset = SQLDataLoader(args.crop_dir, query=dataset_query, is_training=False, kind=DetectionKind.ModelDetection.value, num_workers=8, limit=args.db_query_limit) dataset.updateEmbedding(model) # plot_embedding_images(dataset.em[:], np.asarray(dataset.getlabels()) , dataset.getpaths(), {}) # plot_embedding_images(dataset.em[:], np.asarray(dataset.getalllabels()) , dataset.getallpaths(), {}) # Random examples to start #random_ids = np.random.choice(dataset.current_set, 1000, replace=False).tolist() #random_ids = selectSamples(dataset.em[dataset.current_set], dataset.current_set, 2000) #print(random_ids) # Move Records #moveRecords(dataset, DetectionKind.ModelDetection.value, DetectionKind.UserDetection.value, random_ids) # #print([len(x) for x in dataset.set_indices]) # # Finetune the embedding model # #dataset.set_kind(DetectionKind.UserDetection.value) # #dataset.train() # #train_dataset = SQLDataLoader(trainset_query, os.path.join(args.run_data, 'crops'), is_training= True) # #finetune_embedding(model, checkpoint['loss_type'], dataset, 32, 4, 100) # #save_checkpoint({ # # 'arch': model.arch, # # 'state_dict': model.state_dict(), # # 'optimizer' : optimizer.state_dict(), # # 'loss_type' : loss_type, # # }, False, "%s%s_%s_%04d.tar"%('finetuned', loss_type, model.arch, len(dataset.set_indices[DetectionKind.UserDetection.value]))) dataset.embedding_mode() dataset.train() sampler = get_AL_sampler(args.strategy)(dataset.em, dataset.getalllabels(), 12) kwargs = {} kwargs["N"] = args.active_batch kwargs["already_selected"] = dataset.set_indices[ DetectionKind.UserDetection.value] kwargs["model"] = MLPClassifier(alpha=0.0001) print("Start the active learning loop") sys.stdout.flush() numLabeled = len(dataset.set_indices[DetectionKind.UserDetection.value]) while numLabeled <= args.active_budget: print([len(x) for x in dataset.set_indices]) sys.stdout.flush() # Get indices of samples to get user to label if numLabeled == 0: indices = np.random.choice(dataset.current_set, kwargs["N"], replace=False).tolist() else: indices = sampler.select_batch(**kwargs) # numLabeled = len(dataset.set_indices[DetectionKind.UserDetection.value]) #kwargs["already_selected"].extend(indices) moveRecords(dataset, DetectionKind.ModelDetection.value, DetectionKind.UserDetection.value, indices) numLabeled = len( dataset.set_indices[DetectionKind.UserDetection.value]) # Train on samples that have been labeled so far dataset.set_kind(DetectionKind.UserDetection.value) X_train = dataset.em[dataset.current_set] y_train = np.asarray(dataset.getlabels()) kwargs["model"].fit(X_train, y_train) joblib.dump( kwargs["model"], "%s/%s_%04d.skmodel" % (args.experiment_name, 'classifier', numLabeled)) # Test on the samples that have not been labeled dataset.set_kind(DetectionKind.ModelDetection.value) dataset.embedding_mode() X_test = dataset.em[dataset.current_set] y_test = np.asarray(dataset.getlabels()) print("Accuracy", kwargs["model"].score(X_test, y_test)) sys.stdout.flush() if numLabeled % 2000 == 1000: dataset.set_kind(DetectionKind.UserDetection.value) finetune_embedding(model, checkpoint['loss_type'], dataset, 10, 4, 100 if numLabeled == 1000 else 50) save_checkpoint( { 'arch': checkpoint['arch'], 'state_dict': model.state_dict(), #'optimizer' : optimizer.state_dict(), 'loss_type': checkpoint['loss_type'], 'feat_dim': checkpoint['feat_dim'], 'num_classes': args.num_classes }, False, "%s/%s%s_%s_%04d.tar" % (args.experiment_name, 'finetuned', checkpoint['loss_type'], checkpoint['arch'], numLabeled)) dataset.set_kind(DetectionKind.ModelDetection.value) dataset.updateEmbedding(model) dataset.embedding_mode()
def main(): args = parser.parse_args() print("DB Connect") db_path = os.path.join(args.run_data, os.path.basename( args.run_data)) + ".db" print(db_path) db = SqliteDatabase(db_path) proxy.initialize(db) db.connect() print("connected") print("CompleteLoop") checkpoint = load_checkpoint(args.base_model) embedding_net = EmbeddingNet(checkpoint['arch'], checkpoint['feat_dim'], False) #embedding_net = EmbeddingNet('resnet50', 256, True) model = torch.nn.DataParallel(embedding_net).cuda() model.load_state_dict(checkpoint['state_dict']) #unlabeledset_query= Detection.select(Detection.id,Oracle.label).join(Oracle).where(Detection.kind==DetectionKind.ModelDetection.value).order_by(fn.random()).limit(150000) #unlabeled_dataset = SQLDataLoader(unlabeledset_query, os.path.join(args.run_data, "crops"), is_training= False, num_workers= 8) dataset = SQLDataLoader(os.path.join(args.run_data, "crops"), is_training=False, kind=DetectionKind.ModelDetection.value, num_workers=8) dataset.updateEmbedding(model) #print('Embedding Done') #sys.stdout.flush() #plot_embedding(dataset.em[dataset.current_set], np.asarray(dataset.getlabels()) , dataset.getpaths(), {}) # Random examples to start random_ids = np.random.choice(dataset.current_set, 5000, replace=False).tolist() #random_ids = selectSamples(dataset.em[dataset.current_set], dataset.current_set, 2000) #print(random_ids) # Move Records moveRecords(dataset, DetectionKind.ModelDetection.value, DetectionKind.UserDetection.value, random_ids) print([len(x) for x in dataset.set_indices]) # Finetune the embedding model dataset.setKind(DetectionKind.UserDetection.value) dataset.train() #train_dataset = SQLDataLoader(trainset_query, os.path.join(args.run_data, 'crops'), is_training= True) finetune_embedding(model, dataset, 32, 4, 0) #unlabeled_dataset.updateEmbedding(model) dataset.updateEmbedding(model) dataset.setKind(DetectionKind.UserDetection.value) #print(dataset.em[dataset.current_set].shape, np.asarray(dataset.getlabels()).shape, len(dataset.getpaths())) #plot_embedding( dataset.em[dataset.current_set], np.asarray(dataset.getlabels()) , dataset.getpaths(), {}) #plot_embedding( unlabeled_dataset.em, np.asarray(unlabeled_dataset.getlabels()) , unlabeled_dataset.getIDs(), {}) dataset.embedding_mode() dataset.train() clf_model = ClassificationNet(256, 48).cuda() #train_eval_classifier() #clf_model = ClassificationNet(checkpoint['feat_dim'], 48).cuda() clf_criterion = FocalLoss(gamma=2) #nn.CrossEntropyLoss() clf_optimizer = torch.optim.Adam(clf_model.parameters(), lr=0.001, weight_decay=0.0005) clf_e = Engine(clf_model, clf_criterion, clf_optimizer, verbose=True, print_freq=10) #names = ["Linear SVM", "RBF SVM", "Random Forest", "Neural Net", "Naive Bayes"] #classifiers = [SVC(kernel="linear", C=0.025, probability= True, class_weight='balanced'), # SVC(gamma=2, C=1, probability= True, class_weight='balanced'), # RandomForestClassifier(max_depth=None, n_estimators=100, class_weight='balanced'), # MLPClassifier(alpha=1), # GaussianNB()] #estimators= [] #for name, clf in zip(names, classifiers): # estimators.append((name, clf)) #eclf1 = VotingClassifier(estimators= estimators, voting='hard') #eclf2 = VotingClassifier(estimators= estimators, voting='soft') #names.append("ensemble hard") #classifiers.append(eclf1) #names.append("ensemble soft") #classifiers.append(eclf2) names = ["Neural Net"] classifiers = [MLPClassifier(alpha=1)] """dataset.setKind(DetectionKind.UserDetection.value) learner = ActiveLearner( estimator=MLPClassifier(), query_strategy=uncertainty_sampling, X_training = dataset.em[dataset.current_set], y_training = np.asarray(dataset.getlabels())) for step in range(91): dataset.setKind(DetectionKind.ModelDetection.value) query_idx, query_inst = learner.query(dataset.em[dataset.current_set], n_instances=100) moveRecords(dataset, DetectionKind.ModelDetection.value, DetectionKind.UserDetection.value, [dataset.current_set[i] for i in query_idx]) dataset.setKind(DetectionKind.UserDetection.value) learner.teach(dataset.em[dataset.current_set], np.asarray(dataset.getlabels())) if step in [11, 31, 51, 71, 91, 101]: finetune_embedding(model, dataset, 32, 4, 100) dataset.updateEmbedding(model) dataset.embedding_mode() dataset.setKind(DetectionKind.ModelDetection.value) print(learner.score(dataset.em[dataset.current_set], np.asarray(dataset.getlabels()))) print([len(x) for x in dataset.set_indices]) sys.stdout.flush()""" sampler = get_AL_sampler('uniform')(dataset.em[dataset.current_set], dataset.getlabels(), 12) print(sampler, type(sampler), dir(sampler)) kwargs = {} kwargs["N"] = 100 kwargs["already_selected"] = [] kwargs["model"] = SVC(kernel="linear", C=0.025, probability=True, class_weight='balanced') kwargs["model"].fit(dataset.em[dataset.current_set], dataset.getlabels()) batch_AL = sampler.select_batch(**kwargs) print(batch_AL) for step in range(101): dataset.setKind(DetectionKind.UserDetection.value) clf_model.train() clf_train_loader = dataset.getSingleLoader(batch_size=64) for i in range(15): clf_e.train_one_epoch(clf_train_loader, i, True) clf_model.eval() X_train = dataset.em[dataset.current_set] y_train = np.asarray(dataset.getlabels()) for name, clf in zip(names, classifiers): clf.fit(X_train, y_train) print(name) dataset.setKind(DetectionKind.ModelDetection.value) #dataset.image_mode() #dataset.updateEmbedding(model) dataset.embedding_mode() dataset.eval() eval_loader = dataset.getSingleLoader(batch_size=1024) clf_e.validate(eval_loader, True) X_test = dataset.em[dataset.current_set] y_test = np.asarray(dataset.getlabels()) prob_list = [] for name, clf in zip(names, classifiers): #y_pred= clf.predict(X_test) #print(confusion_matrix(y_test, y_pred)) #paths= dataset.getpaths() #for i, (yp, yt) in enumerate(zip(y_pred, y_test)): # if yp != yt: #copy(paths[i],"mistakes") #print(yt, yp, paths[i],i) if not name.startswith("ensemble"): prob_list.append(clf.predict_proba(X_test)) score = clf.score(X_test, y_test) print(name, score) #clf_output= clf_e.embedding(eval_loader, dim=48) if step % 10 == 1 and step > 10: dataset.setKind(DetectionKind.UserDetection.value) finetune_embedding(model, dataset, 32, 4, 50) dataset.setKind(DetectionKind.ModelDetection.value) dataset.updateEmbedding(model) dataset.embedding_mode() indices = activeLearning(prob_list, dataset) moveRecords(dataset, DetectionKind.ModelDetection.value, DetectionKind.UserDetection.value, [dataset.current_set[i] for i in indices]) print([len(x) for x in dataset.set_indices])