Exemplos de get_AL_sampler em Python, exemplos de sampling_methods.constants.get_AL_sampler em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: run_experiment.py Projeto: iamgroot42/active-learning

def main(argv):
  del argv

  confusions = [float(t) for t in FLAGS.confusions.split(" ")]
  mixtures = [float(t) for t in FLAGS.active_sampling_percentage.split(" ")]
  max_dataset_size = None if FLAGS.max_dataset_size == 0 else FLAGS.max_dataset_size
  starting_seed = 42

  for c in confusions:
    for m in mixtures:
      for seed in range(starting_seed, starting_seed + FLAGS.trials):
        sampler = get_AL_sampler(FLAGS.sampling_method)
        score_model = utils.get_model(seed)
        results, sampler_state = generate_one_curve(
            sampler, score_model, seed, FLAGS.warmstart_size,
            FLAGS.batch_size, c, m, FLAGS.train_horizon)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: run_experiment.py Projeto: J535D165/active-learning

def main(args):

    # make the export folder structure
    # this is made here because the Logger uses the filename
    if args.do_save:
        # make a base save directory
        utils.make_dir(args.save_dir)

        # make a directory in the base save directory with for the specific
        # method.
        save_subdir = os.path.join(args.save_dir,
                                   args.dataset + "_" + args.sampling_method)
        utils.make_dir(save_subdir)

        filename = os.path.join(
            save_subdir,
            "log-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) + ".txt")
        sys.stdout = utils.Logger(filename)

    # confusion argument can have multiple values
    confusions = [float(t) for t in args.confusions.split(" ")]
    mixtures = [float(t) for t in args.active_sampling_percentage.split(" ")]
    max_dataset_size = None if args.max_dataset_size == 0 else args.max_dataset_size
    starting_seed = args.seed

    # get the dataset from file based on the data directory and dataset name
    X, y = utils.get_mldata(args.data_dir, args.dataset)

    # object to store the results in
    all_results = {}

    # percentage of labels to randomize
    for c in confusions:

        # Mixture weights on active sampling."
        for m in mixtures:

            # the number of curves created during multiple trials
            for seed in range(starting_seed, starting_seed + args.trials):

                # get the sampler based on the name
                # returns a python object
                # also named: query strategy
                sampler = get_AL_sampler(args.sampling_method)

                # get the model
                score_model = utils.get_model(args.score_method, seed)

                #
                if (args.select_method == "None"
                        or args.select_method == args.score_method):
                    select_model = None
                else:
                    select_model = utils.get_model(args.select_method, seed)

                # create the learning curve
                results, sampler_state = generate_one_curve(
                    X,
                    y,
                    sampler,
                    score_model,
                    seed,
                    args.warmstart_size,
                    args.batch_size,
                    select_model,
                    confusion=c,
                    active_p=m,
                    max_points=max_dataset_size,
                    standardize_data=args.standardize_data,
                    norm_data=args.normalize_data,
                    train_horizon=args.train_horizon)
                key = (args.dataset, args.sampling_method, args.score_method,
                       args.select_method, m, args.warmstart_size,
                       args.batch_size, c, args.standardize_data,
                       args.normalize_data, seed)
                sampler_output = sampler_state.to_dict()
                results["sampler_output"] = sampler_output
                all_results[key] = results

    # Not sure why this is done in a qay like this.
    fields = [
        "dataset", "sampler", "score_method", "select_method",
        "active percentage", "warmstart size", "batch size", "confusion",
        "standardize", "normalize", "seed"
    ]
    all_results["tuple_keys"] = fields

    # write the results to a file
    if args.do_save:

        # format the filename
        filename = "results_score_{}_select_{}_norm_{}_stand_{}".format(
            args.score_method, args.select_method, args.normalize_data,
            args.standardize_data)

        existing_files = gfile.Glob(
            os.path.join(save_subdir, "{}*.pkl".format(filename)))
        filepath = os.path.join(
            save_subdir, "{}_{}.pkl".format(filename,
                                            1000 + len(existing_files))[1:])

        # dump the dict to a pickle file
        pickle.dump(all_results, gfile.GFile(filepath, "w"))

        # flush stfout
        sys.stdout.flush_file()

Exemplo n.º 3

0

Exibir arquivo

def main(argv):
    del argv

    if not gfile.Exists(FLAGS.save_dir):
        try:
            gfile.MkDir(FLAGS.save_dir)
        except:
            print(('WARNING: error creating save directory, '))

    save_dir = os.path.join(FLAGS.save_dir,
                            FLAGS.dataset + '_' + FLAGS.sampling_method)

    if FLAGS.do_save == "True":
        if not gfile.Exists(save_dir):
            try:
                gfile.MkDir(save_dir)
            except:
                print(('WARNING: error creating save directory, '
                       'directory most likely already created.'))

        # Set up logging
        filename = os.path.join(
            save_dir,
            "log-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) + ".txt")
        sys.stdout = utils.Logger(filename)

    X, y = utils.get_mldata(FLAGS.data_dir, FLAGS.dataset)  #load dataset!
    starting_seed = FLAGS.seed

    all_results = {}

    for seed in range(starting_seed, starting_seed + FLAGS.trials):
        sampler = get_AL_sampler(FLAGS.sampling_method)  #load sampler!
        score_model = utils.get_model(FLAGS.score_method,
                                      seed)  #load score model!
        if (FLAGS.select_method == "None" or  #load select model!
                FLAGS.select_method == FLAGS.score_method):
            select_model = None
        else:
            select_model = utils.get_model(FLAGS.select_method, seed)

        results, sampler_state = \
        generate_one_curve(X=X,
                           y=y,
                           sampler=sampler,
                           score_model=score_model,
                           seed=seed,
                           warmstart_size=FLAGS.warmstart_size,
                           batch_size=FLAGS.batch_size,
                           select_model=select_model,
                           max_points=FLAGS.max_dataset_size)

        key = (FLAGS.dataset, FLAGS.sampling_method, FLAGS.score_method,
               FLAGS.select_method, FLAGS.warmstart_size, FLAGS.batch_size,
               seed)

        #sampler_output = sampler_state.to_dict()
        #results['sampler_output'] = sampler_output
        results['sampler_output'] = None
        all_results[key] = results

    fields = [
        'dataset', 'sampling_methods', 'score_method', 'select_method',
        'warmstart size', 'batch size', 'seed'
    ]
    all_results['tuple_keys'] = fields

    if FLAGS.do_save == "True":
        filename = ("results_score_" + FLAGS.score_method + "_select_" +
                    FLAGS.select_method)
        existing_files = gfile.Glob(os.path.join(save_dir, filename + "*.pkl"))
        filename = os.path.join(
            save_dir,
            filename + "_" + str(1000 + len(existing_files))[1:] + ".pkl")
        pickle.dump(all_results, gfile.GFile(filename, "w"))
        sys.stdout.flush_file()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: run_experiment.py Projeto: yuiiiiiiii/trashbin

def main(argv):
  del argv

  if not gfile.Exists(FLAGS.save_dir):
    try:
      gfile.MkDir(FLAGS.save_dir)
    except:
      print(('WARNING: error creating save directory, '
             'directory most likely already created.'))

  save_dir = os.path.join(
      FLAGS.save_dir,
      FLAGS.dataset + "_" + FLAGS.sampling_method)
  do_save = FLAGS.do_save == "True"

  if do_save:
    if not gfile.Exists(save_dir):
      try:
        gfile.MkDir(save_dir)
      except:
        print(('WARNING: error creating save directory, '
               'directory most likely already created.'))
    # Set up logging
    filename = os.path.join(
        save_dir, "log-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) + ".txt")
    sys.stdout = utils.Logger(filename)

  confusions = [float(t) for t in FLAGS.confusions.split(" ")]
  mixtures = [float(t) for t in FLAGS.active_sampling_percentage.split(" ")]
  all_results = {}
  max_dataset_size = None if FLAGS.max_dataset_size == "0" else int(
      FLAGS.max_dataset_size)
  normalize_data = FLAGS.normalize_data == "True"
  standardize_data = FLAGS.standardize_data == "True"
  X, y = utils.get_mldata(FLAGS.data_dir, FLAGS.dataset)
  starting_seed = FLAGS.seed

  for c in confusions:
    for m in mixtures:
      for seed in range(starting_seed, starting_seed + FLAGS.trials):
        sampler = get_AL_sampler(FLAGS.sampling_method)
        score_model = utils.get_model(FLAGS.score_method, seed)
        if (FLAGS.select_method == "None" or
            FLAGS.select_method == FLAGS.score_method):
          select_model = None
        else:
          select_model = utils.get_model(FLAGS.select_method, seed)
        results, sampler_state = generate_one_curve(
            X, y, sampler, score_model, seed, FLAGS.warmstart_size,
            FLAGS.batch_size, select_model, c, m, max_dataset_size,
            standardize_data, normalize_data, FLAGS.train_horizon)
        key = (FLAGS.dataset, FLAGS.sampling_method, FLAGS.score_method,
               FLAGS.select_method, m, FLAGS.warmstart_size, FLAGS.batch_size,
               c, standardize_data, normalize_data, seed)
        sampler_output = sampler_state.to_dict()
        results["sampler_output"] = sampler_output
        all_results[key] = results
  fields = [
      "dataset", "sampler", "score_method", "select_method",
      "active percentage", "warmstart size", "batch size", "confusion",
      "standardize", "normalize", "seed"
  ]
  all_results["tuple_keys"] = fields

  if do_save:
    filename = ("results_score_" + FLAGS.score_method +
                "_select_" + FLAGS.select_method +
                "_norm_" + str(normalize_data) +
                "_stand_" + str(standardize_data))
    existing_files = gfile.Glob(os.path.join(save_dir, filename + "*.pkl"))
    filename = os.path.join(save_dir,
                            filename + "_" + str(1000+len(existing_files))[1:] + ".pkl")
    pickle.dump(all_results, gfile.GFile(filename, "w"))
    sys.stdout.flush_file()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: runapp.py Projeto: microsoft/CameraTraps

    def train_classifier():
        global dataset
        global kwargs
        global sampler
        global classifier_trained
        global X_pred
        global y_pred

        data = bottle.request.json

        # Train on samples that have been labeled so far
        # dataset.set_kind(DetectionKind.UserDetection.value)
        dataset.set_kind(4)
        print(dataset.current_set)
        print(type(dataset.current_set))
        X_train = dataset.em[dataset.current_set]
        y_train = np.asarray(dataset.getlabels())
        # print(y_train)
        timer = time.time()
        kwargs["model"].fit(X_train, y_train)
        print('Training took %0.2f seconds' % (time.time() - timer))

        timer = time.time()
        joblib.dump(
            kwargs["model"], "%s/%s_%04d.skmodel" %
            (args.checkpoint_dir, 'classifier', len(dataset.current_set)))
        print('Saving classifier checkpoint took %0.2f seconds' %
              (time.time() - timer))

        # Predict on the samples that have not been labeled
        timer = time.time()
        dataset.set_kind(DetectionKind.ModelDetection.value)
        X_pred = dataset.em[dataset.current_set]
        y_pred = kwargs["model"].predict(X_pred)
        print('Predicting on unlabeled samples took %0.2f seconds' %
              (time.time() - timer))
        # print(y_pred)

        # Update model predicted class in PostgreSQL database
        # timer = time.time()
        # for pos in range(len(y_pred)):
        #     idx = dataset.current_set[pos]
        #     det_id = dataset.samples[idx][0]
        #     matching_detection_entries = (Detection
        #                                 .select(Detection.id, Detection.category_id)
        #                                 .where((Detection.id == det_id)))
        #     mde = matching_detection_entries.get()
        #     command = Detection.update(category_id=y_pred[pos]).where(Detection.id == mde.id)
        #     command.execute()
        # print('Updating the database took %0.2f seconds'%(time.time() - timer))

        # Alternative: batch update PostgreSQL database
        # timer = time.time()
        # det_ids = [dataset.samples[dataset.current_set[pos]][0] for pos in range(len(y_pred))]
        # y_pred = [int(y) for y in y_pred]
        # det_id_pred_pairs = list(zip(det_ids, y_pred))
        # case_statement = Case(Detection.id, det_id_pred_pairs)
        # command = Detection.update(category_id=case_statement).where(Detection.id.in_(det_ids))
        # command.execute()
        # print('Updating the database the other way took %0.2f seconds'%(time.time() - timer))

        # Update dataset dataloader
        timer = time.time()
        for pos in range(len(y_pred)):
            idx = dataset.current_set[pos]
            sample_data = list(dataset.samples[idx])
            sample_data[1] = y_pred[pos]
            dataset.samples[idx] = tuple(sample_data)
        print('Updating the dataset dataloader took %0.2f seconds' %
              (time.time() - timer))

        if not classifier_trained:
            # once the classifier has been trained the first time, switch to AL sampling
            classifier_trained = True
            sampler = get_AL_sampler('confidence')(dataset.em,
                                                   dataset.getalllabels(),
                                                   1234)

        bottle.response.content_type = 'application/json'
        bottle.response.status = 200
        return json.dumps(data)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: runapp.py Projeto: microsoft/CameraTraps

    color_indices = list(
        set(range(len(dataset.samples))) -
        set(grayscale_indices))  # records with color images
    detection_conf_values = [rec[4] for rec in dataset.samples]
    dataset.updateEmbedding(model)
    dataset.embedding_mode()
    dataset.train()

    kwargs = {}
    kwargs["N"] = 25
    kwargs["already_selected"] = set()
    if args.classifier_checkpoint is not '':
        print('loading pre-trained classifier')
        kwargs["model"] = joblib.load(args.classifier_checkpoint)
        classifier_trained = True
        sampler = get_AL_sampler('confidence')(dataset.em,
                                               dataset.getalllabels(), 1234)

        # Use classifier to generate predictions
        dataset.set_kind(DetectionKind.ModelDetection.value)
        X_pred = dataset.em[dataset.current_set]
        y_pred = kwargs["model"].predict(X_pred)

        # # Update model predicted class in PostgreSQL database
        # for pos in range(len(y_pred)):
        #     idx = dataset.current_set[pos]
        #     det_id = dataset.samples[idx][0]
        #     matching_detection_entries = (Detection
        #                                 .select(Detection.id, Detection.category_id)
        #                                 .where((Detection.id == det_id)))
        #     mde = matching_detection_entries.get()
        #     command = Detection.update(category_id=y_pred[pos]).where(Detection.id == mde.id)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: run.py Projeto: microsoft/CameraTraps

def main():
    args = parser.parse_args()

    # Initialize Database
    ## database connection credentials
    DB_NAME = args.db_name
    USER = args.db_user
    PASSWORD = args.db_password
    print("DB Connect")
    ## try to connect as USER to database DB_NAME through peewee
    target_db = PostgresqlDatabase(DB_NAME,
                                   user=USER,
                                   password=PASSWORD,
                                   host='localhost')
    target_db.connect(reuse_if_open=True)
    db_proxy.initialize(target_db)
    print("connected")

    # Load the saved embedding model
    checkpoint = load_checkpoint(args.base_model)
    if args.experiment_name == '':
        args.experiment_name = "experiment_%s_%s" % (checkpoint['loss_type'],
                                                     args.strategy)
    if not os.path.exists(args.experiment_name):
        os.mkdir(args.experiment_name)

    if checkpoint['loss_type'].lower(
    ) == 'center' or checkpoint['loss_type'].lower() == 'softmax':
        embedding_net = SoftmaxNet(checkpoint['arch'], checkpoint['feat_dim'],
                                   checkpoint['num_classes'], False)
    else:
        embedding_net = NormalizedEmbeddingNet(checkpoint['arch'],
                                               checkpoint['feat_dim'], False)

    model = torch.nn.DataParallel(embedding_net).cuda()
    model.load_state_dict(checkpoint['state_dict'])

    # dataset_query = Detection.select().limit(5)
    dataset_query = Detection.select(
        Detection.image_id, Oracle.label,
        Detection.kind).join(Oracle).order_by(fn.random()).limit(
            args.db_query_limit
        )  ## TODO: should this really be order_by random?
    dataset = SQLDataLoader(args.crop_dir,
                            query=dataset_query,
                            is_training=False,
                            kind=DetectionKind.ModelDetection.value,
                            num_workers=8,
                            limit=args.db_query_limit)
    dataset.updateEmbedding(model)
    # plot_embedding_images(dataset.em[:], np.asarray(dataset.getlabels()) , dataset.getpaths(), {})
    # plot_embedding_images(dataset.em[:], np.asarray(dataset.getalllabels()) , dataset.getallpaths(), {})

    # Random examples to start
    #random_ids = np.random.choice(dataset.current_set, 1000, replace=False).tolist()
    #random_ids = selectSamples(dataset.em[dataset.current_set], dataset.current_set, 2000)
    #print(random_ids)
    # Move Records
    #moveRecords(dataset, DetectionKind.ModelDetection.value, DetectionKind.UserDetection.value, random_ids)

    # #print([len(x) for x in dataset.set_indices])
    # # Finetune the embedding model
    # #dataset.set_kind(DetectionKind.UserDetection.value)
    # #dataset.train()
    # #train_dataset = SQLDataLoader(trainset_query, os.path.join(args.run_data, 'crops'), is_training= True)
    # #finetune_embedding(model, checkpoint['loss_type'], dataset, 32, 4, 100)
    # #save_checkpoint({
    # #        'arch': model.arch,
    # #        'state_dict': model.state_dict(),
    # #        'optimizer' : optimizer.state_dict(),
    # #        'loss_type' : loss_type,
    # #        }, False, "%s%s_%s_%04d.tar"%('finetuned', loss_type, model.arch, len(dataset.set_indices[DetectionKind.UserDetection.value])))

    dataset.embedding_mode()
    dataset.train()
    sampler = get_AL_sampler(args.strategy)(dataset.em, dataset.getalllabels(),
                                            12)

    kwargs = {}
    kwargs["N"] = args.active_batch
    kwargs["already_selected"] = dataset.set_indices[
        DetectionKind.UserDetection.value]
    kwargs["model"] = MLPClassifier(alpha=0.0001)

    print("Start the active learning loop")
    sys.stdout.flush()
    numLabeled = len(dataset.set_indices[DetectionKind.UserDetection.value])
    while numLabeled <= args.active_budget:
        print([len(x) for x in dataset.set_indices])
        sys.stdout.flush()

        # Get indices of samples to get user to label
        if numLabeled == 0:
            indices = np.random.choice(dataset.current_set,
                                       kwargs["N"],
                                       replace=False).tolist()
        else:
            indices = sampler.select_batch(**kwargs)
        # numLabeled = len(dataset.set_indices[DetectionKind.UserDetection.value])
        #kwargs["already_selected"].extend(indices)
        moveRecords(dataset, DetectionKind.ModelDetection.value,
                    DetectionKind.UserDetection.value, indices)
        numLabeled = len(
            dataset.set_indices[DetectionKind.UserDetection.value])

        # Train on samples that have been labeled so far
        dataset.set_kind(DetectionKind.UserDetection.value)
        X_train = dataset.em[dataset.current_set]
        y_train = np.asarray(dataset.getlabels())

        kwargs["model"].fit(X_train, y_train)
        joblib.dump(
            kwargs["model"], "%s/%s_%04d.skmodel" %
            (args.experiment_name, 'classifier', numLabeled))

        # Test on the samples that have not been labeled
        dataset.set_kind(DetectionKind.ModelDetection.value)
        dataset.embedding_mode()
        X_test = dataset.em[dataset.current_set]
        y_test = np.asarray(dataset.getlabels())
        print("Accuracy", kwargs["model"].score(X_test, y_test))

        sys.stdout.flush()
        if numLabeled % 2000 == 1000:
            dataset.set_kind(DetectionKind.UserDetection.value)
            finetune_embedding(model, checkpoint['loss_type'], dataset, 10, 4,
                               100 if numLabeled == 1000 else 50)
            save_checkpoint(
                {
                    'arch': checkpoint['arch'],
                    'state_dict': model.state_dict(),
                    #'optimizer' : optimizer.state_dict(),
                    'loss_type': checkpoint['loss_type'],
                    'feat_dim': checkpoint['feat_dim'],
                    'num_classes': args.num_classes
                },
                False,
                "%s/%s%s_%s_%04d.tar" %
                (args.experiment_name, 'finetuned', checkpoint['loss_type'],
                 checkpoint['arch'], numLabeled))

            dataset.set_kind(DetectionKind.ModelDetection.value)
            dataset.updateEmbedding(model)
            dataset.embedding_mode()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: good_run.py Projeto: microsoft/CameraTraps

def main():
    args = parser.parse_args()
    print("DB Connect")
    db_path = os.path.join(args.run_data, os.path.basename(
        args.run_data)) + ".db"
    print(db_path)
    db = SqliteDatabase(db_path)
    proxy.initialize(db)
    db.connect()
    print("connected")
    print("CompleteLoop")

    checkpoint = load_checkpoint(args.base_model)
    embedding_net = EmbeddingNet(checkpoint['arch'], checkpoint['feat_dim'],
                                 False)
    #embedding_net = EmbeddingNet('resnet50', 256, True)
    model = torch.nn.DataParallel(embedding_net).cuda()
    model.load_state_dict(checkpoint['state_dict'])
    #unlabeledset_query= Detection.select(Detection.id,Oracle.label).join(Oracle).where(Detection.kind==DetectionKind.ModelDetection.value).order_by(fn.random()).limit(150000)
    #unlabeled_dataset = SQLDataLoader(unlabeledset_query, os.path.join(args.run_data, "crops"), is_training= False, num_workers= 8)
    dataset = SQLDataLoader(os.path.join(args.run_data, "crops"),
                            is_training=False,
                            kind=DetectionKind.ModelDetection.value,
                            num_workers=8)
    dataset.updateEmbedding(model)
    #print('Embedding Done')
    #sys.stdout.flush()
    #plot_embedding(dataset.em[dataset.current_set], np.asarray(dataset.getlabels()) , dataset.getpaths(), {})
    # Random examples to start
    random_ids = np.random.choice(dataset.current_set, 5000,
                                  replace=False).tolist()
    #random_ids = selectSamples(dataset.em[dataset.current_set], dataset.current_set, 2000)
    #print(random_ids)
    # Move Records
    moveRecords(dataset, DetectionKind.ModelDetection.value,
                DetectionKind.UserDetection.value, random_ids)

    print([len(x) for x in dataset.set_indices])
    # Finetune the embedding model
    dataset.setKind(DetectionKind.UserDetection.value)
    dataset.train()
    #train_dataset = SQLDataLoader(trainset_query, os.path.join(args.run_data, 'crops'), is_training= True)
    finetune_embedding(model, dataset, 32, 4, 0)
    #unlabeled_dataset.updateEmbedding(model)
    dataset.updateEmbedding(model)
    dataset.setKind(DetectionKind.UserDetection.value)
    #print(dataset.em[dataset.current_set].shape, np.asarray(dataset.getlabels()).shape, len(dataset.getpaths()))
    #plot_embedding( dataset.em[dataset.current_set], np.asarray(dataset.getlabels()) , dataset.getpaths(), {})
    #plot_embedding( unlabeled_dataset.em, np.asarray(unlabeled_dataset.getlabels()) , unlabeled_dataset.getIDs(), {})
    dataset.embedding_mode()
    dataset.train()
    clf_model = ClassificationNet(256, 48).cuda()
    #train_eval_classifier()
    #clf_model = ClassificationNet(checkpoint['feat_dim'], 48).cuda()
    clf_criterion = FocalLoss(gamma=2)  #nn.CrossEntropyLoss()
    clf_optimizer = torch.optim.Adam(clf_model.parameters(),
                                     lr=0.001,
                                     weight_decay=0.0005)
    clf_e = Engine(clf_model,
                   clf_criterion,
                   clf_optimizer,
                   verbose=True,
                   print_freq=10)
    #names = ["Linear SVM", "RBF SVM", "Random Forest", "Neural Net", "Naive Bayes"]
    #classifiers = [SVC(kernel="linear", C=0.025, probability= True, class_weight='balanced'),
    #    SVC(gamma=2, C=1, probability= True, class_weight='balanced'),
    #    RandomForestClassifier(max_depth=None, n_estimators=100, class_weight='balanced'),
    #    MLPClassifier(alpha=1),
    #    GaussianNB()]
    #estimators= []
    #for name, clf in zip(names, classifiers):
    #    estimators.append((name, clf))
    #eclf1 = VotingClassifier(estimators= estimators, voting='hard')
    #eclf2 = VotingClassifier(estimators= estimators, voting='soft')
    #names.append("ensemble hard")
    #classifiers.append(eclf1)
    #names.append("ensemble soft")
    #classifiers.append(eclf2)
    names = ["Neural Net"]
    classifiers = [MLPClassifier(alpha=1)]
    """dataset.setKind(DetectionKind.UserDetection.value)

    learner = ActiveLearner(
            estimator=MLPClassifier(),
            query_strategy=uncertainty_sampling,
            X_training = dataset.em[dataset.current_set], y_training = np.asarray(dataset.getlabels()))

    for step in range(91):
        dataset.setKind(DetectionKind.ModelDetection.value)
        query_idx, query_inst = learner.query(dataset.em[dataset.current_set], n_instances=100)
        moveRecords(dataset, DetectionKind.ModelDetection.value, DetectionKind.UserDetection.value, [dataset.current_set[i] for i in query_idx])
        dataset.setKind(DetectionKind.UserDetection.value)
        learner.teach(dataset.em[dataset.current_set], np.asarray(dataset.getlabels()))
        if step in [11, 31, 51, 71, 91, 101]:
            finetune_embedding(model, dataset, 32, 4, 100)
            dataset.updateEmbedding(model)
            dataset.embedding_mode()
        dataset.setKind(DetectionKind.ModelDetection.value)
        print(learner.score(dataset.em[dataset.current_set], np.asarray(dataset.getlabels())))
        print([len(x) for x in dataset.set_indices])
        sys.stdout.flush()"""
    sampler = get_AL_sampler('uniform')(dataset.em[dataset.current_set],
                                        dataset.getlabels(), 12)
    print(sampler, type(sampler), dir(sampler))
    kwargs = {}
    kwargs["N"] = 100
    kwargs["already_selected"] = []
    kwargs["model"] = SVC(kernel="linear",
                          C=0.025,
                          probability=True,
                          class_weight='balanced')
    kwargs["model"].fit(dataset.em[dataset.current_set], dataset.getlabels())
    batch_AL = sampler.select_batch(**kwargs)
    print(batch_AL)
    for step in range(101):
        dataset.setKind(DetectionKind.UserDetection.value)
        clf_model.train()
        clf_train_loader = dataset.getSingleLoader(batch_size=64)
        for i in range(15):
            clf_e.train_one_epoch(clf_train_loader, i, True)
        clf_model.eval()
        X_train = dataset.em[dataset.current_set]
        y_train = np.asarray(dataset.getlabels())
        for name, clf in zip(names, classifiers):
            clf.fit(X_train, y_train)
            print(name)

        dataset.setKind(DetectionKind.ModelDetection.value)
        #dataset.image_mode()
        #dataset.updateEmbedding(model)
        dataset.embedding_mode()
        dataset.eval()
        eval_loader = dataset.getSingleLoader(batch_size=1024)
        clf_e.validate(eval_loader, True)
        X_test = dataset.em[dataset.current_set]
        y_test = np.asarray(dataset.getlabels())
        prob_list = []
        for name, clf in zip(names, classifiers):
            #y_pred= clf.predict(X_test)
            #print(confusion_matrix(y_test, y_pred))
            #paths= dataset.getpaths()
            #for i, (yp, yt) in enumerate(zip(y_pred, y_test)):
            #    if yp != yt:
            #copy(paths[i],"mistakes")
            #print(yt, yp, paths[i],i)
            if not name.startswith("ensemble"):
                prob_list.append(clf.predict_proba(X_test))
            score = clf.score(X_test, y_test)
            print(name, score)
        #clf_output= clf_e.embedding(eval_loader, dim=48)
        if step % 10 == 1 and step > 10:
            dataset.setKind(DetectionKind.UserDetection.value)
            finetune_embedding(model, dataset, 32, 4, 50)
            dataset.setKind(DetectionKind.ModelDetection.value)
            dataset.updateEmbedding(model)
            dataset.embedding_mode()
        indices = activeLearning(prob_list, dataset)
        moveRecords(dataset, DetectionKind.ModelDetection.value,
                    DetectionKind.UserDetection.value,
                    [dataset.current_set[i] for i in indices])
        print([len(x) for x in dataset.set_indices])