def active_train(): init_learning_rate = params["LEARNING_RATE"] init_selection_size = params["SELECTION_SIZE"] init_data = {} init_data["train_y"] = copy.deepcopy(data["train_y"]) init_data["train_x"] = copy.deepcopy(data["train_x"]) average_accs = {} average_losses = {} if params["MODEL"] == "cnn": model = CNN() elif params["MODEL"] == "rnn": model = RNN(params, data) else: model = CNN(data, params) if params["CUDA"]: model.cuda() models["CLASSIFIER"] = model for j in range(params["N_AVERAGE"]): params["LEARNING_RATE"] = init_learning_rate params["SELECTION_SIZE"] = init_selection_size data["train_x"] = copy.deepcopy(init_data["train_x"]) data["train_y"] = copy.deepcopy(init_data["train_y"]) lg = None if params["LOG"]: lg = global_logger["lg"] start_accuracy = 100 / params["CLASS_SIZE"] lg.scalar_summary("test-acc", start_accuracy, 0) lg.scalar_summary("test-acc-avg", start_accuracy, 0) print("-" * 20, "Round {}".format(j + 1), "-" * 20) model.init_model() train_features = [] train_targets = [] distribution = {} for key in range(len(data["classes"])): distribution[key] = [] data["train_x"], data["train_y"] = shuffle(data["train_x"], data["train_y"]) if 500 % params["SELECTION_SIZE"] == 0: n_rounds = int(500 / params["SELECTION_SIZE"]) last_selection_size = params["SELECTION_SIZE"] else: n_rounds = int(500 / params["SELECTION_SIZE"]) + 1 last_selection_size = 500 % params["SELECTION_SIZE"] for i in range(n_rounds): if (n_rounds - 1 == i): params["SELECTION_SIZE"] = last_selection_size if params["SCORE_FN"] == "all": t1, t2 = select_all(model, lg, i) elif params["SCORE_FN"] == "entropy": t1, t2 = select_entropy(model, lg, i) elif params["SCORE_FN"] == "egl": t1, t2 = select_egl(model, lg, i) elif params["SCORE_FN"] == "random": t1, t2 = select_random(model, lg, i) train_features.extend(t1) train_targets.extend(t2) print("\n") model.init_model() model = train(model, train_features, train_targets) accuracy, loss, corrects, size = evaluate(model, i, mode="test") print("{:10s} loss: {:10.6f} acc: {:10.4f}%({}/{}) \n".format("test", loss, accuracy, corrects, size)) if i not in average_accs: average_accs[i] = [accuracy] else: average_accs[i].append(accuracy) if i not in average_losses: average_losses[i] = [loss] else: average_losses[i].append(loss) if params["LOG"]: lg.scalar_summary("test-acc", accuracy, len(train_features)) lg.scalar_summary( "test-acc-avg", sum(average_accs[i]) / len(average_accs[i]), len(train_features)) lg.scalar_summary("test-loss", loss, len(train_features)) lg.scalar_summary( "test-loss-avg", sum(average_losses[i]) / len(average_losses[i]), len(train_features)) for each in range(len(data["classes"])): val = train_targets.count(each) / len(train_targets) distribution[each].append(val) # count number of positive and negativ added to labeledpool. # nameOfFile = '{}/distribution{}.html'.format(lg.log_dir, j) best_model = {} return best_model
def create_model(args, num_classes, embedding_vector): nl_str = args.nonlin.lower() if nl_str == 'relu': nonlin = nn.ReLU elif nl_str == 'threshrelu': nonlin = ThresholdReLU elif nl_str == 'sign11': nonlin = partial(Sign11, targetprop_rule=args.tp_rule) elif nl_str == 'qrelu': nonlin = partial(qReLU, targetprop_rule=args.tp_rule, nsteps=3) else: raise NotImplementedError( 'no other non-linearities currently supported') # input size if args.ds == 'sentiment140' or args.ds == 'tsad': input_shape, target_shape = (1, 60, 50), None elif args.ds == 'semeval': input_shape, target_shape = (1, 60, 100), (1, 6, 100) else: raise NotImplementedError('no other datasets currently supported') # create a model with the specified architecture if args.arch == 'cnn': model = CNN(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'lstm': model = LSTM(input_shape, num_classes, embedding_vector) elif args.arch == 'cnn-lstm': model = CNN_LSTM(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'lstm-cnn': model = LSTM_CNN(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'textcnn': model = TextCNN(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'bilstm': model = BiLSTM(input_shape, target_shape, num_classes, embedding_vector, nonlin=nonlin) else: raise NotImplementedError('other models not yet supported') logging.info("{} model has {} parameters and non-linearity={} ({})".format( args.arch, sum([p.data.nelement() for p in model.parameters()]), nl_str, args.tp_rule.name)) if len(args.gpus) > 1: model = nn.DataParallel(model) if args.cuda: model.cuda() return model