def hill_climbing(model, choose_list=[]): chooses = choose_list feature_list = model.feature_list visited = [ True if x in choose_list else False for x in range(len(feature_list)) ] for idx in range(len(choose_list), len(feature_list)): chooseIndex = -1 best_score = 0.0 best_test_score = 0.0 chooses.append(-1) for i in range(len(feature_list)): if visited[i] == False: chooses[idx] = i feature = [feature_list[s] for s in chooses] # print(len(feature_list)) model.feature_list = feature train_nlpcc(model) cur_score = dev_nlpcc(model) test_score = test_nlpcc(model) stst.record('./data/records.csv', cur_score, test_score, model) if best_score < cur_score: chooseIndex = i best_score = cur_score best_test_score = test_score chooses[idx] = chooseIndex visited[chooseIndex] = True # feature = [ feature_list[s] for s in chooses] print('Best Score: %.2f %%, %.2f%%,choose Feature %s' % (best_score * 100, best_test_score * 100, feature_list[chooseIndex].feature_name))
model.add(stst.NegativeFeature()) # train and test train_file = './data/stsbenchmark/sts-train.csv' dev_file = './data/stsbenchmark/sts-dev.csv' test_file = './data/stsbenchmark/sts-test.csv' # init the server and input the address nlp = stst.StanfordNLP('http://localhost:9000') # parse data train_instances = stst.load_parse_data(train_file, nlp) dev_instances = stst.load_parse_data(dev_file, nlp) # train and test model.train(train_instances, train_file) model.test(dev_instances, dev_file) # evaluation dev_pearsonr = stst.eval_output_file(model.output_file) print('Dev:', dev_pearsonr) # test on new data set test_instances = stst.load_parse_data(test_file, nlp) model.test(test_instances, test_file) test_pearsonr = stst.eval_output_file(model.output_file) print('Test:', test_pearsonr) recod_file = './data/records.csv' stst.record(recod_file, dev_pearsonr, test_pearsonr, model)